作者 lixiang

导入方法修改

... ... @@ -192,25 +192,12 @@ public class QuestionEmbeddingMapper {
try (Connection conn = getConnection();
PreparedStatement stmt = conn.prepareStatement(sql)) {
Map<String,Object> matadata = new LinkedHashMap<String,Object>();
SnowflakeGenerator snowflakeGenerator = new SnowflakeGenerator();
String docId = String.valueOf(snowflakeGenerator.next());
matadata.put("docId",docId);
matadata.put("docName","");
matadata.put("knowledgeId",record.getKnowledgeId());
record.setMetadata(toJson(matadata));
stmt.setString(1, UUID.randomUUID().toString());
stmt.setString(2, record.getText());
stmt.setString(3, record.getQuestion());
stmt.setString(4, record.getAnswer());
PGobject jsonObject = new PGobject();
jsonObject.setType("json");
// JSONObject mataData = new JSONObject();
// mataData.put("knowledgeId",record.getKnowledgeId());
jsonObject.setValue(record.getMetadata());
stmt.setObject(5, jsonObject);
Response<Embedding> embedding = aiModelUtils.getEmbedding("1925730210204721154", record.getQuestion());
... ...
package org.jeecg.modules.airag.app.service.impl;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.fasterxml.jackson.core.JsonProcessingException;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Paragraph;
... ... @@ -22,12 +23,19 @@ import org.jeecg.modules.airag.app.mapper.QuestionEmbeddingMapper;
import org.jeecg.modules.airag.app.service.IQuestionEmbeddingService;
import org.jeecg.modules.airag.app.utils.AiModelUtils;
import org.jeecg.modules.airag.common.handler.IAIChatHandler;
import org.postgresql.util.PGobject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.pgvector.PGvector;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.io.File;
import java.io.FileInputStream;
... ... @@ -64,6 +72,11 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService {
private static final Pattern SPECIAL_CHARS_PATTERN = Pattern.compile("[^a-zA-Z0-9\\u4e00-\\u9fa5\\s]");
private static final Pattern UUID_PATTERN = Pattern.compile("_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}");
// 新增:数据库连接配置
private static final String DB_URL = "jdbc:postgresql://192.168.100.104:5432/postgres";
private static final String DB_USER = "postgres";
private static final String DB_PASSWORD = "postgres";
@Override
public Page<QuestionEmbedding> findAll(QuestionEmbedding questionEmbedding, Integer pageNo, Integer pageSize) {
return questionEmbeddingMapper.findAll(questionEmbedding,pageNo,pageSize);
... ... @@ -171,7 +184,62 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService {
segments = splitWordDocument(targetPath.toString());
}
// 原有逻辑:保存到question_embedding表
saveSegmentsToDatabase(segments, originalFileName, storedFileName, knowledgeId);
// 新增逻辑:同时保存到embeddings表
saveToEmbeddingsTable(segments, originalFileName, knowledgeId);
}
// 新增方法:将内容保存到embeddings表
private void saveToEmbeddingsTable(List<String> segments, String originalFileName, String knowledgeId) {
if (segments.isEmpty()) {
return;
}
// 获取无UUID和扩展名的文件名用于显示
String displayFileName = removeUuidSuffix(originalFileName);
displayFileName = FilenameUtils.removeExtension(displayFileName);
// 为整个文档生成一个唯一的docId
String docId = UUID.randomUUID().toString();
// 合并所有段落作为完整内容
String fullContent = String.join("\n\n", segments);
try (Connection conn = getConnection()) {
// 准备元数据
Map<String, Object> metadata = new HashMap<>();
metadata.put("docId", docId);
metadata.put("docName", originalFileName);
metadata.put("knowledgeId", knowledgeId);
// 获取文本的向量表示
Response<Embedding> embeddingResponse = aiModelUtils.getEmbedding("1925730210204721154", displayFileName + ": " + fullContent);
float[] embeddingVector = embeddingResponse.content().vector();
// 插入到embeddings表
String sql = "INSERT INTO embeddings (embedding_id, embedding, text, metadata) VALUES (?, ?, ?, ?::jsonb)";
try (PreparedStatement stmt = conn.prepareStatement(sql)) {
stmt.setString(1, UUID.randomUUID().toString());
stmt.setObject(2, new PGvector(embeddingVector));
stmt.setString(3, fullContent);
PGobject jsonObject = new PGobject();
jsonObject.setType("json");
jsonObject.setValue(new ObjectMapper().writeValueAsString(metadata));
stmt.setObject(4, jsonObject);
stmt.executeUpdate();
}
} catch (Exception e) {
log.error("保存到embeddings表失败", e);
}
}
// 新增方法:获取数据库连接
private Connection getConnection() throws SQLException {
return DriverManager.getConnection(DB_URL, DB_USER, DB_PASSWORD);
}
private String generateStoredFileName(String originalFileName) {
... ... @@ -196,7 +264,6 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService {
return text.replaceAll("\\s+", " ").trim();
}
// 修改isHeading方法中的判断条件,不再排除包含.的文本
private static boolean isHeading(Paragraph para, Range range) {
int styleIndex = para.getStyleIndex();
if (styleIndex >= 1 && styleIndex <= 9) {
... ... @@ -215,7 +282,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService {
String text = para.text().trim();
return text.toUpperCase().equals(text) &&
text.length() < 100 &&
!text.contains("\t"); // 移除了 !text.contains(".") 的判断
!text.contains("\t");
}
private String removeUuidSuffix(String fileName) {
... ... @@ -350,13 +417,14 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService {
return tableContent.toString();
}
private void saveSegmentsToDatabase(List<String> segments, String originalFileName, String storedFileName, String knowledgeId) {
if (segments.isEmpty()) {
return;
}
// 从存储文件名中提取UUID部分
String uuid = storedFileName.substring(
// 从存储文件名中提取UUID部分作为docId
String docId = storedFileName.substring(
storedFileName.lastIndexOf('_') + 1,
storedFileName.lastIndexOf('.')
);
... ... @@ -386,12 +454,25 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService {
}
record.setText("");
record.setMetadata(String.format(
"{\"docId\":\"%s\",\"docName\":\"%s\",\"knowledgeId\":\"%s\"}",
uuid,
originalFileName,
knowledgeId
));
// 构建metadata JSON对象
Map<String, String> metadata = new LinkedHashMap<>(); // 使用LinkedHashMap保持字段顺序
metadata.put("docId", docId);
metadata.put("docName", originalFileName); // 上传前的原始文件名
metadata.put("storedFileName", storedFileName); // 上传后的带UUID的文件名
metadata.put("knowledgeId", knowledgeId);
// 使用ObjectMapper转换为JSON字符串
try {
record.setMetadata(new ObjectMapper().writeValueAsString(metadata));
} catch (JsonProcessingException e) {
log.error("生成metadata JSON失败", e);
// 使用默认值
record.setMetadata(String.format(
"{\"docId\":\"%s\",\"docName\":\"%s\",\"storedFileName\":\"%s\",\"knowledgeId\":\"%s\"}",
docId, originalFileName, storedFileName, knowledgeId
));
}
Response<Embedding> embeddingResponse = aiModelUtils.getEmbedding("1925730210204721154", record.getQuestion());
record.setEmbedding(embeddingResponse.content().vector());
... ... @@ -399,4 +480,5 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService {
questionEmbeddingMapper.insert(record);
}
}
}
\ No newline at end of file
... ...