|
|
|
package org.jeecg.modules.airag.app.service.impl;
|
|
|
|
|
|
|
|
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
|
|
|
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
|
|
|
import org.apache.poi.hwpf.usermodel.CharacterRun;
|
|
|
|
import org.apache.poi.hwpf.HWPFDocument;
|
|
|
|
import org.apache.poi.hwpf.usermodel.Paragraph;
|
|
...
|
...
|
@@ -22,12 +23,19 @@ import org.jeecg.modules.airag.app.mapper.QuestionEmbeddingMapper; |
|
|
|
import org.jeecg.modules.airag.app.service.IQuestionEmbeddingService;
|
|
|
|
import org.jeecg.modules.airag.app.utils.AiModelUtils;
|
|
|
|
import org.jeecg.modules.airag.common.handler.IAIChatHandler;
|
|
|
|
import org.postgresql.util.PGobject;
|
|
|
|
import org.slf4j.Logger;
|
|
|
|
import org.slf4j.LoggerFactory;
|
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
|
|
import org.springframework.beans.factory.annotation.Value;
|
|
|
|
import org.springframework.stereotype.Service;
|
|
|
|
import org.springframework.web.multipart.MultipartFile;
|
|
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
|
import com.pgvector.PGvector;
|
|
|
|
import java.sql.Connection;
|
|
|
|
import java.sql.DriverManager;
|
|
|
|
import java.sql.PreparedStatement;
|
|
|
|
import java.sql.SQLException;
|
|
|
|
|
|
|
|
import java.io.File;
|
|
|
|
import java.io.FileInputStream;
|
|
...
|
...
|
@@ -64,6 +72,11 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
|
|
private static final Pattern SPECIAL_CHARS_PATTERN = Pattern.compile("[^a-zA-Z0-9\\u4e00-\\u9fa5\\s]");
|
|
|
|
private static final Pattern UUID_PATTERN = Pattern.compile("_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}");
|
|
|
|
|
|
|
|
// 新增:数据库连接配置
|
|
|
|
private static final String DB_URL = "jdbc:postgresql://192.168.100.104:5432/postgres";
|
|
|
|
private static final String DB_USER = "postgres";
|
|
|
|
private static final String DB_PASSWORD = "postgres";
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public Page<QuestionEmbedding> findAll(QuestionEmbedding questionEmbedding, Integer pageNo, Integer pageSize) {
|
|
|
|
return questionEmbeddingMapper.findAll(questionEmbedding,pageNo,pageSize);
|
|
...
|
...
|
@@ -171,7 +184,62 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
|
|
segments = splitWordDocument(targetPath.toString());
|
|
|
|
}
|
|
|
|
|
|
|
|
// 原有逻辑:保存到question_embedding表
|
|
|
|
saveSegmentsToDatabase(segments, originalFileName, storedFileName, knowledgeId);
|
|
|
|
|
|
|
|
// 新增逻辑:同时保存到embeddings表
|
|
|
|
saveToEmbeddingsTable(segments, originalFileName, knowledgeId);
|
|
|
|
}
|
|
|
|
|
|
|
|
// 新增方法:将内容保存到embeddings表
|
|
|
|
private void saveToEmbeddingsTable(List<String> segments, String originalFileName, String knowledgeId) {
|
|
|
|
if (segments.isEmpty()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// 获取无UUID和扩展名的文件名用于显示
|
|
|
|
String displayFileName = removeUuidSuffix(originalFileName);
|
|
|
|
displayFileName = FilenameUtils.removeExtension(displayFileName);
|
|
|
|
|
|
|
|
// 为整个文档生成一个唯一的docId
|
|
|
|
String docId = UUID.randomUUID().toString();
|
|
|
|
|
|
|
|
// 合并所有段落作为完整内容
|
|
|
|
String fullContent = String.join("\n\n", segments);
|
|
|
|
|
|
|
|
try (Connection conn = getConnection()) {
|
|
|
|
// 准备元数据
|
|
|
|
Map<String, Object> metadata = new HashMap<>();
|
|
|
|
metadata.put("docId", docId);
|
|
|
|
metadata.put("docName", originalFileName);
|
|
|
|
metadata.put("knowledgeId", knowledgeId);
|
|
|
|
|
|
|
|
// 获取文本的向量表示
|
|
|
|
Response<Embedding> embeddingResponse = aiModelUtils.getEmbedding("1925730210204721154", displayFileName + ": " + fullContent);
|
|
|
|
float[] embeddingVector = embeddingResponse.content().vector();
|
|
|
|
|
|
|
|
// 插入到embeddings表
|
|
|
|
String sql = "INSERT INTO embeddings (embedding_id, embedding, text, metadata) VALUES (?, ?, ?, ?::jsonb)";
|
|
|
|
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
|
|
|
|
stmt.setString(1, UUID.randomUUID().toString());
|
|
|
|
stmt.setObject(2, new PGvector(embeddingVector));
|
|
|
|
stmt.setString(3, fullContent);
|
|
|
|
|
|
|
|
PGobject jsonObject = new PGobject();
|
|
|
|
jsonObject.setType("json");
|
|
|
|
jsonObject.setValue(new ObjectMapper().writeValueAsString(metadata));
|
|
|
|
stmt.setObject(4, jsonObject);
|
|
|
|
|
|
|
|
stmt.executeUpdate();
|
|
|
|
}
|
|
|
|
} catch (Exception e) {
|
|
|
|
log.error("保存到embeddings表失败", e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// 新增方法:获取数据库连接
|
|
|
|
private Connection getConnection() throws SQLException {
|
|
|
|
return DriverManager.getConnection(DB_URL, DB_USER, DB_PASSWORD);
|
|
|
|
}
|
|
|
|
|
|
|
|
private String generateStoredFileName(String originalFileName) {
|
|
...
|
...
|
@@ -196,7 +264,6 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
|
|
return text.replaceAll("\\s+", " ").trim();
|
|
|
|
}
|
|
|
|
|
|
|
|
// 修改isHeading方法中的判断条件,不再排除包含.的文本
|
|
|
|
private static boolean isHeading(Paragraph para, Range range) {
|
|
|
|
int styleIndex = para.getStyleIndex();
|
|
|
|
if (styleIndex >= 1 && styleIndex <= 9) {
|
|
...
|
...
|
@@ -215,7 +282,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
|
|
String text = para.text().trim();
|
|
|
|
return text.toUpperCase().equals(text) &&
|
|
|
|
text.length() < 100 &&
|
|
|
|
!text.contains("\t"); // 移除了 !text.contains(".") 的判断
|
|
|
|
!text.contains("\t");
|
|
|
|
}
|
|
|
|
|
|
|
|
private String removeUuidSuffix(String fileName) {
|
|
...
|
...
|
@@ -350,13 +417,14 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
|
|
return tableContent.toString();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
private void saveSegmentsToDatabase(List<String> segments, String originalFileName, String storedFileName, String knowledgeId) {
|
|
|
|
if (segments.isEmpty()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// 从存储文件名中提取UUID部分
|
|
|
|
String uuid = storedFileName.substring(
|
|
|
|
// 从存储文件名中提取UUID部分作为docId
|
|
|
|
String docId = storedFileName.substring(
|
|
|
|
storedFileName.lastIndexOf('_') + 1,
|
|
|
|
storedFileName.lastIndexOf('.')
|
|
|
|
);
|
|
...
|
...
|
@@ -386,12 +454,25 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
|
|
}
|
|
|
|
|
|
|
|
record.setText("");
|
|
|
|
record.setMetadata(String.format(
|
|
|
|
"{\"docId\":\"%s\",\"docName\":\"%s\",\"knowledgeId\":\"%s\"}",
|
|
|
|
uuid,
|
|
|
|
originalFileName,
|
|
|
|
knowledgeId
|
|
|
|
));
|
|
|
|
|
|
|
|
// 构建metadata JSON对象
|
|
|
|
Map<String, String> metadata = new LinkedHashMap<>(); // 使用LinkedHashMap保持字段顺序
|
|
|
|
metadata.put("docId", docId);
|
|
|
|
metadata.put("docName", originalFileName); // 上传前的原始文件名
|
|
|
|
metadata.put("storedFileName", storedFileName); // 上传后的带UUID的文件名
|
|
|
|
metadata.put("knowledgeId", knowledgeId);
|
|
|
|
|
|
|
|
// 使用ObjectMapper转换为JSON字符串
|
|
|
|
try {
|
|
|
|
record.setMetadata(new ObjectMapper().writeValueAsString(metadata));
|
|
|
|
} catch (JsonProcessingException e) {
|
|
|
|
log.error("生成metadata JSON失败", e);
|
|
|
|
// 使用默认值
|
|
|
|
record.setMetadata(String.format(
|
|
|
|
"{\"docId\":\"%s\",\"docName\":\"%s\",\"storedFileName\":\"%s\",\"knowledgeId\":\"%s\"}",
|
|
|
|
docId, originalFileName, storedFileName, knowledgeId
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
|
|
|
Response<Embedding> embeddingResponse = aiModelUtils.getEmbedding("1925730210204721154", record.getQuestion());
|
|
|
|
record.setEmbedding(embeddingResponse.content().vector());
|
|
...
|
...
|
@@ -399,4 +480,5 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
|
|
questionEmbeddingMapper.insert(record);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} |
|
|
\ No newline at end of file |
...
|
...
|
|