作者 lixiang

导入方法修改

@@ -192,25 +192,12 @@ public class QuestionEmbeddingMapper { @@ -192,25 +192,12 @@ public class QuestionEmbeddingMapper {
192 192
193 try (Connection conn = getConnection(); 193 try (Connection conn = getConnection();
194 PreparedStatement stmt = conn.prepareStatement(sql)) { 194 PreparedStatement stmt = conn.prepareStatement(sql)) {
195 - Map<String,Object> matadata = new LinkedHashMap<String,Object>();  
196 - SnowflakeGenerator snowflakeGenerator = new SnowflakeGenerator();  
197 -  
198 - String docId = String.valueOf(snowflakeGenerator.next());  
199 - matadata.put("docId",docId);  
200 - matadata.put("docName","");  
201 - matadata.put("knowledgeId",record.getKnowledgeId());  
202 -  
203 - record.setMetadata(toJson(matadata));  
204 stmt.setString(1, UUID.randomUUID().toString()); 195 stmt.setString(1, UUID.randomUUID().toString());
205 stmt.setString(2, record.getText()); 196 stmt.setString(2, record.getText());
206 stmt.setString(3, record.getQuestion()); 197 stmt.setString(3, record.getQuestion());
207 stmt.setString(4, record.getAnswer()); 198 stmt.setString(4, record.getAnswer());
208 PGobject jsonObject = new PGobject(); 199 PGobject jsonObject = new PGobject();
209 jsonObject.setType("json"); 200 jsonObject.setType("json");
210 -  
211 -// JSONObject mataData = new JSONObject();  
212 -// mataData.put("knowledgeId",record.getKnowledgeId());  
213 -  
214 jsonObject.setValue(record.getMetadata()); 201 jsonObject.setValue(record.getMetadata());
215 stmt.setObject(5, jsonObject); 202 stmt.setObject(5, jsonObject);
216 Response<Embedding> embedding = aiModelUtils.getEmbedding("1925730210204721154", record.getQuestion()); 203 Response<Embedding> embedding = aiModelUtils.getEmbedding("1925730210204721154", record.getQuestion());
1 package org.jeecg.modules.airag.app.service.impl; 1 package org.jeecg.modules.airag.app.service.impl;
2 2
3 import com.baomidou.mybatisplus.extension.plugins.pagination.Page; 3 import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
  4 +import com.fasterxml.jackson.core.JsonProcessingException;
4 import org.apache.poi.hwpf.usermodel.CharacterRun; 5 import org.apache.poi.hwpf.usermodel.CharacterRun;
5 import org.apache.poi.hwpf.HWPFDocument; 6 import org.apache.poi.hwpf.HWPFDocument;
6 import org.apache.poi.hwpf.usermodel.Paragraph; 7 import org.apache.poi.hwpf.usermodel.Paragraph;
@@ -22,12 +23,19 @@ import org.jeecg.modules.airag.app.mapper.QuestionEmbeddingMapper; @@ -22,12 +23,19 @@ import org.jeecg.modules.airag.app.mapper.QuestionEmbeddingMapper;
22 import org.jeecg.modules.airag.app.service.IQuestionEmbeddingService; 23 import org.jeecg.modules.airag.app.service.IQuestionEmbeddingService;
23 import org.jeecg.modules.airag.app.utils.AiModelUtils; 24 import org.jeecg.modules.airag.app.utils.AiModelUtils;
24 import org.jeecg.modules.airag.common.handler.IAIChatHandler; 25 import org.jeecg.modules.airag.common.handler.IAIChatHandler;
  26 +import org.postgresql.util.PGobject;
25 import org.slf4j.Logger; 27 import org.slf4j.Logger;
26 import org.slf4j.LoggerFactory; 28 import org.slf4j.LoggerFactory;
27 import org.springframework.beans.factory.annotation.Autowired; 29 import org.springframework.beans.factory.annotation.Autowired;
28 import org.springframework.beans.factory.annotation.Value; 30 import org.springframework.beans.factory.annotation.Value;
29 import org.springframework.stereotype.Service; 31 import org.springframework.stereotype.Service;
30 import org.springframework.web.multipart.MultipartFile; 32 import org.springframework.web.multipart.MultipartFile;
  33 +import com.fasterxml.jackson.databind.ObjectMapper;
  34 +import com.pgvector.PGvector;
  35 +import java.sql.Connection;
  36 +import java.sql.DriverManager;
  37 +import java.sql.PreparedStatement;
  38 +import java.sql.SQLException;
31 39
32 import java.io.File; 40 import java.io.File;
33 import java.io.FileInputStream; 41 import java.io.FileInputStream;
@@ -64,6 +72,11 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { @@ -64,6 +72,11 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService {
64 private static final Pattern SPECIAL_CHARS_PATTERN = Pattern.compile("[^a-zA-Z0-9\\u4e00-\\u9fa5\\s]"); 72 private static final Pattern SPECIAL_CHARS_PATTERN = Pattern.compile("[^a-zA-Z0-9\\u4e00-\\u9fa5\\s]");
65 private static final Pattern UUID_PATTERN = Pattern.compile("_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"); 73 private static final Pattern UUID_PATTERN = Pattern.compile("_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}");
66 74
  75 + // 新增:数据库连接配置
  76 + private static final String DB_URL = "jdbc:postgresql://192.168.100.104:5432/postgres";
  77 + private static final String DB_USER = "postgres";
  78 + private static final String DB_PASSWORD = "postgres";
  79 +
67 @Override 80 @Override
68 public Page<QuestionEmbedding> findAll(QuestionEmbedding questionEmbedding, Integer pageNo, Integer pageSize) { 81 public Page<QuestionEmbedding> findAll(QuestionEmbedding questionEmbedding, Integer pageNo, Integer pageSize) {
69 return questionEmbeddingMapper.findAll(questionEmbedding,pageNo,pageSize); 82 return questionEmbeddingMapper.findAll(questionEmbedding,pageNo,pageSize);
@@ -171,7 +184,62 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { @@ -171,7 +184,62 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService {
171 segments = splitWordDocument(targetPath.toString()); 184 segments = splitWordDocument(targetPath.toString());
172 } 185 }
173 186
  187 + // 原有逻辑:保存到question_embedding表
174 saveSegmentsToDatabase(segments, originalFileName, storedFileName, knowledgeId); 188 saveSegmentsToDatabase(segments, originalFileName, storedFileName, knowledgeId);
  189 +
  190 + // 新增逻辑:同时保存到embeddings表
  191 + saveToEmbeddingsTable(segments, originalFileName, knowledgeId);
  192 + }
  193 +
  194 + // 新增方法:将内容保存到embeddings表
  195 + private void saveToEmbeddingsTable(List<String> segments, String originalFileName, String knowledgeId) {
  196 + if (segments.isEmpty()) {
  197 + return;
  198 + }
  199 +
  200 + // 获取无UUID和扩展名的文件名用于显示
  201 + String displayFileName = removeUuidSuffix(originalFileName);
  202 + displayFileName = FilenameUtils.removeExtension(displayFileName);
  203 +
  204 + // 为整个文档生成一个唯一的docId
  205 + String docId = UUID.randomUUID().toString();
  206 +
  207 + // 合并所有段落作为完整内容
  208 + String fullContent = String.join("\n\n", segments);
  209 +
  210 + try (Connection conn = getConnection()) {
  211 + // 准备元数据
  212 + Map<String, Object> metadata = new HashMap<>();
  213 + metadata.put("docId", docId);
  214 + metadata.put("docName", originalFileName);
  215 + metadata.put("knowledgeId", knowledgeId);
  216 +
  217 + // 获取文本的向量表示
  218 + Response<Embedding> embeddingResponse = aiModelUtils.getEmbedding("1925730210204721154", displayFileName + ": " + fullContent);
  219 + float[] embeddingVector = embeddingResponse.content().vector();
  220 +
  221 + // 插入到embeddings表
  222 + String sql = "INSERT INTO embeddings (embedding_id, embedding, text, metadata) VALUES (?, ?, ?, ?::jsonb)";
  223 + try (PreparedStatement stmt = conn.prepareStatement(sql)) {
  224 + stmt.setString(1, UUID.randomUUID().toString());
  225 + stmt.setObject(2, new PGvector(embeddingVector));
  226 + stmt.setString(3, fullContent);
  227 +
  228 + PGobject jsonObject = new PGobject();
  229 + jsonObject.setType("json");
  230 + jsonObject.setValue(new ObjectMapper().writeValueAsString(metadata));
  231 + stmt.setObject(4, jsonObject);
  232 +
  233 + stmt.executeUpdate();
  234 + }
  235 + } catch (Exception e) {
  236 + log.error("保存到embeddings表失败", e);
  237 + }
  238 + }
  239 +
  240 + // 新增方法:获取数据库连接
  241 + private Connection getConnection() throws SQLException {
  242 + return DriverManager.getConnection(DB_URL, DB_USER, DB_PASSWORD);
175 } 243 }
176 244
177 private String generateStoredFileName(String originalFileName) { 245 private String generateStoredFileName(String originalFileName) {
@@ -196,7 +264,6 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { @@ -196,7 +264,6 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService {
196 return text.replaceAll("\\s+", " ").trim(); 264 return text.replaceAll("\\s+", " ").trim();
197 } 265 }
198 266
199 - // 修改isHeading方法中的判断条件,不再排除包含.的文本  
200 private static boolean isHeading(Paragraph para, Range range) { 267 private static boolean isHeading(Paragraph para, Range range) {
201 int styleIndex = para.getStyleIndex(); 268 int styleIndex = para.getStyleIndex();
202 if (styleIndex >= 1 && styleIndex <= 9) { 269 if (styleIndex >= 1 && styleIndex <= 9) {
@@ -215,7 +282,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { @@ -215,7 +282,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService {
215 String text = para.text().trim(); 282 String text = para.text().trim();
216 return text.toUpperCase().equals(text) && 283 return text.toUpperCase().equals(text) &&
217 text.length() < 100 && 284 text.length() < 100 &&
218 - !text.contains("\t"); // 移除了 !text.contains(".") 的判断 285 + !text.contains("\t");
219 } 286 }
220 287
221 private String removeUuidSuffix(String fileName) { 288 private String removeUuidSuffix(String fileName) {
@@ -350,13 +417,14 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { @@ -350,13 +417,14 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService {
350 return tableContent.toString(); 417 return tableContent.toString();
351 } 418 }
352 419
  420 +
353 private void saveSegmentsToDatabase(List<String> segments, String originalFileName, String storedFileName, String knowledgeId) { 421 private void saveSegmentsToDatabase(List<String> segments, String originalFileName, String storedFileName, String knowledgeId) {
354 if (segments.isEmpty()) { 422 if (segments.isEmpty()) {
355 return; 423 return;
356 } 424 }
357 425
358 - // 从存储文件名中提取UUID部分  
359 - String uuid = storedFileName.substring( 426 + // 从存储文件名中提取UUID部分作为docId
  427 + String docId = storedFileName.substring(
360 storedFileName.lastIndexOf('_') + 1, 428 storedFileName.lastIndexOf('_') + 1,
361 storedFileName.lastIndexOf('.') 429 storedFileName.lastIndexOf('.')
362 ); 430 );
@@ -386,12 +454,25 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { @@ -386,12 +454,25 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService {
386 } 454 }
387 455
388 record.setText(""); 456 record.setText("");
  457 +
  458 + // 构建metadata JSON对象
  459 + Map<String, String> metadata = new LinkedHashMap<>(); // 使用LinkedHashMap保持字段顺序
  460 + metadata.put("docId", docId);
  461 + metadata.put("docName", originalFileName); // 上传前的原始文件名
  462 + metadata.put("storedFileName", storedFileName); // 上传后的带UUID的文件名
  463 + metadata.put("knowledgeId", knowledgeId);
  464 +
  465 + // 使用ObjectMapper转换为JSON字符串
  466 + try {
  467 + record.setMetadata(new ObjectMapper().writeValueAsString(metadata));
  468 + } catch (JsonProcessingException e) {
  469 + log.error("生成metadata JSON失败", e);
  470 + // 使用默认值
389 record.setMetadata(String.format( 471 record.setMetadata(String.format(
390 - "{\"docId\":\"%s\",\"docName\":\"%s\",\"knowledgeId\":\"%s\"}",  
391 - uuid,  
392 - originalFileName,  
393 - knowledgeId 472 + "{\"docId\":\"%s\",\"docName\":\"%s\",\"storedFileName\":\"%s\",\"knowledgeId\":\"%s\"}",
  473 + docId, originalFileName, storedFileName, knowledgeId
394 )); 474 ));
  475 + }
395 476
396 Response<Embedding> embeddingResponse = aiModelUtils.getEmbedding("1925730210204721154", record.getQuestion()); 477 Response<Embedding> embeddingResponse = aiModelUtils.getEmbedding("1925730210204721154", record.getQuestion());
397 record.setEmbedding(embeddingResponse.content().vector()); 478 record.setEmbedding(embeddingResponse.content().vector());
@@ -399,4 +480,5 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { @@ -399,4 +480,5 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService {
399 questionEmbeddingMapper.insert(record); 480 questionEmbeddingMapper.insert(record);
400 } 481 }
401 } 482 }
  483 +
402 } 484 }