|
1
|
package org.jeecg.modules.airag.app.service.impl;
|
1
|
package org.jeecg.modules.airag.app.service.impl;
|
|
2
|
|
2
|
|
|
3
|
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
|
3
|
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
|
|
|
|
4
|
+import com.fasterxml.jackson.core.JsonProcessingException;
|
|
4
|
import org.apache.poi.hwpf.usermodel.CharacterRun;
|
5
|
import org.apache.poi.hwpf.usermodel.CharacterRun;
|
|
5
|
import org.apache.poi.hwpf.HWPFDocument;
|
6
|
import org.apache.poi.hwpf.HWPFDocument;
|
|
6
|
import org.apache.poi.hwpf.usermodel.Paragraph;
|
7
|
import org.apache.poi.hwpf.usermodel.Paragraph;
|
|
@@ -22,12 +23,19 @@ import org.jeecg.modules.airag.app.mapper.QuestionEmbeddingMapper; |
|
@@ -22,12 +23,19 @@ import org.jeecg.modules.airag.app.mapper.QuestionEmbeddingMapper; |
|
22
|
import org.jeecg.modules.airag.app.service.IQuestionEmbeddingService;
|
23
|
import org.jeecg.modules.airag.app.service.IQuestionEmbeddingService;
|
|
23
|
import org.jeecg.modules.airag.app.utils.AiModelUtils;
|
24
|
import org.jeecg.modules.airag.app.utils.AiModelUtils;
|
|
24
|
import org.jeecg.modules.airag.common.handler.IAIChatHandler;
|
25
|
import org.jeecg.modules.airag.common.handler.IAIChatHandler;
|
|
|
|
26
|
+import org.postgresql.util.PGobject;
|
|
25
|
import org.slf4j.Logger;
|
27
|
import org.slf4j.Logger;
|
|
26
|
import org.slf4j.LoggerFactory;
|
28
|
import org.slf4j.LoggerFactory;
|
|
27
|
import org.springframework.beans.factory.annotation.Autowired;
|
29
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
28
|
import org.springframework.beans.factory.annotation.Value;
|
30
|
import org.springframework.beans.factory.annotation.Value;
|
|
29
|
import org.springframework.stereotype.Service;
|
31
|
import org.springframework.stereotype.Service;
|
|
30
|
import org.springframework.web.multipart.MultipartFile;
|
32
|
import org.springframework.web.multipart.MultipartFile;
|
|
|
|
33
|
+import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
|
34
|
+import com.pgvector.PGvector;
|
|
|
|
35
|
+import java.sql.Connection;
|
|
|
|
36
|
+import java.sql.DriverManager;
|
|
|
|
37
|
+import java.sql.PreparedStatement;
|
|
|
|
38
|
+import java.sql.SQLException;
|
|
31
|
|
39
|
|
|
32
|
import java.io.File;
|
40
|
import java.io.File;
|
|
33
|
import java.io.FileInputStream;
|
41
|
import java.io.FileInputStream;
|
|
@@ -64,6 +72,11 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
@@ -64,6 +72,11 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
64
|
private static final Pattern SPECIAL_CHARS_PATTERN = Pattern.compile("[^a-zA-Z0-9\\u4e00-\\u9fa5\\s]");
|
72
|
private static final Pattern SPECIAL_CHARS_PATTERN = Pattern.compile("[^a-zA-Z0-9\\u4e00-\\u9fa5\\s]");
|
|
65
|
private static final Pattern UUID_PATTERN = Pattern.compile("_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}");
|
73
|
private static final Pattern UUID_PATTERN = Pattern.compile("_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}");
|
|
66
|
|
74
|
|
|
|
|
75
|
+ // 新增:数据库连接配置
|
|
|
|
76
|
+ private static final String DB_URL = "jdbc:postgresql://192.168.100.104:5432/postgres";
|
|
|
|
77
|
+ private static final String DB_USER = "postgres";
|
|
|
|
78
|
+ private static final String DB_PASSWORD = "postgres";
|
|
|
|
79
|
+
|
|
67
|
@Override
|
80
|
@Override
|
|
68
|
public Page<QuestionEmbedding> findAll(QuestionEmbedding questionEmbedding, Integer pageNo, Integer pageSize) {
|
81
|
public Page<QuestionEmbedding> findAll(QuestionEmbedding questionEmbedding, Integer pageNo, Integer pageSize) {
|
|
69
|
return questionEmbeddingMapper.findAll(questionEmbedding,pageNo,pageSize);
|
82
|
return questionEmbeddingMapper.findAll(questionEmbedding,pageNo,pageSize);
|
|
@@ -171,7 +184,62 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
@@ -171,7 +184,62 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
171
|
segments = splitWordDocument(targetPath.toString());
|
184
|
segments = splitWordDocument(targetPath.toString());
|
|
172
|
}
|
185
|
}
|
|
173
|
|
186
|
|
|
|
|
187
|
+ // 原有逻辑:保存到question_embedding表
|
|
174
|
saveSegmentsToDatabase(segments, originalFileName, storedFileName, knowledgeId);
|
188
|
saveSegmentsToDatabase(segments, originalFileName, storedFileName, knowledgeId);
|
|
|
|
189
|
+
|
|
|
|
190
|
+ // 新增逻辑:同时保存到embeddings表
|
|
|
|
191
|
+ saveToEmbeddingsTable(segments, originalFileName, knowledgeId);
|
|
|
|
192
|
+ }
|
|
|
|
193
|
+
|
|
|
|
194
|
+ // 新增方法:将内容保存到embeddings表
|
|
|
|
195
|
+ private void saveToEmbeddingsTable(List<String> segments, String originalFileName, String knowledgeId) {
|
|
|
|
196
|
+ if (segments.isEmpty()) {
|
|
|
|
197
|
+ return;
|
|
|
|
198
|
+ }
|
|
|
|
199
|
+
|
|
|
|
200
|
+ // 获取无UUID和扩展名的文件名用于显示
|
|
|
|
201
|
+ String displayFileName = removeUuidSuffix(originalFileName);
|
|
|
|
202
|
+ displayFileName = FilenameUtils.removeExtension(displayFileName);
|
|
|
|
203
|
+
|
|
|
|
204
|
+ // 为整个文档生成一个唯一的docId
|
|
|
|
205
|
+ String docId = UUID.randomUUID().toString();
|
|
|
|
206
|
+
|
|
|
|
207
|
+ // 合并所有段落作为完整内容
|
|
|
|
208
|
+ String fullContent = String.join("\n\n", segments);
|
|
|
|
209
|
+
|
|
|
|
210
|
+ try (Connection conn = getConnection()) {
|
|
|
|
211
|
+ // 准备元数据
|
|
|
|
212
|
+ Map<String, Object> metadata = new HashMap<>();
|
|
|
|
213
|
+ metadata.put("docId", docId);
|
|
|
|
214
|
+ metadata.put("docName", originalFileName);
|
|
|
|
215
|
+ metadata.put("knowledgeId", knowledgeId);
|
|
|
|
216
|
+
|
|
|
|
217
|
+ // 获取文本的向量表示
|
|
|
|
218
|
+ Response<Embedding> embeddingResponse = aiModelUtils.getEmbedding("1925730210204721154", displayFileName + ": " + fullContent);
|
|
|
|
219
|
+ float[] embeddingVector = embeddingResponse.content().vector();
|
|
|
|
220
|
+
|
|
|
|
221
|
+ // 插入到embeddings表
|
|
|
|
222
|
+ String sql = "INSERT INTO embeddings (embedding_id, embedding, text, metadata) VALUES (?, ?, ?, ?::jsonb)";
|
|
|
|
223
|
+ try (PreparedStatement stmt = conn.prepareStatement(sql)) {
|
|
|
|
224
|
+ stmt.setString(1, UUID.randomUUID().toString());
|
|
|
|
225
|
+ stmt.setObject(2, new PGvector(embeddingVector));
|
|
|
|
226
|
+ stmt.setString(3, fullContent);
|
|
|
|
227
|
+
|
|
|
|
228
|
+ PGobject jsonObject = new PGobject();
|
|
|
|
229
|
+ jsonObject.setType("json");
|
|
|
|
230
|
+ jsonObject.setValue(new ObjectMapper().writeValueAsString(metadata));
|
|
|
|
231
|
+ stmt.setObject(4, jsonObject);
|
|
|
|
232
|
+
|
|
|
|
233
|
+ stmt.executeUpdate();
|
|
|
|
234
|
+ }
|
|
|
|
235
|
+ } catch (Exception e) {
|
|
|
|
236
|
+ log.error("保存到embeddings表失败", e);
|
|
|
|
237
|
+ }
|
|
|
|
238
|
+ }
|
|
|
|
239
|
+
|
|
|
|
240
|
+ // 新增方法:获取数据库连接
|
|
|
|
241
|
+ private Connection getConnection() throws SQLException {
|
|
|
|
242
|
+ return DriverManager.getConnection(DB_URL, DB_USER, DB_PASSWORD);
|
|
175
|
}
|
243
|
}
|
|
176
|
|
244
|
|
|
177
|
private String generateStoredFileName(String originalFileName) {
|
245
|
private String generateStoredFileName(String originalFileName) {
|
|
@@ -196,7 +264,6 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
@@ -196,7 +264,6 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
196
|
return text.replaceAll("\\s+", " ").trim();
|
264
|
return text.replaceAll("\\s+", " ").trim();
|
|
197
|
}
|
265
|
}
|
|
198
|
|
266
|
|
|
199
|
- // 修改isHeading方法中的判断条件,不再排除包含.的文本
|
|
|
|
200
|
private static boolean isHeading(Paragraph para, Range range) {
|
267
|
private static boolean isHeading(Paragraph para, Range range) {
|
|
201
|
int styleIndex = para.getStyleIndex();
|
268
|
int styleIndex = para.getStyleIndex();
|
|
202
|
if (styleIndex >= 1 && styleIndex <= 9) {
|
269
|
if (styleIndex >= 1 && styleIndex <= 9) {
|
|
@@ -215,7 +282,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
@@ -215,7 +282,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
215
|
String text = para.text().trim();
|
282
|
String text = para.text().trim();
|
|
216
|
return text.toUpperCase().equals(text) &&
|
283
|
return text.toUpperCase().equals(text) &&
|
|
217
|
text.length() < 100 &&
|
284
|
text.length() < 100 &&
|
|
218
|
- !text.contains("\t"); // 移除了 !text.contains(".") 的判断
|
285
|
+ !text.contains("\t");
|
|
219
|
}
|
286
|
}
|
|
220
|
|
287
|
|
|
221
|
private String removeUuidSuffix(String fileName) {
|
288
|
private String removeUuidSuffix(String fileName) {
|
|
@@ -350,13 +417,14 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
@@ -350,13 +417,14 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
350
|
return tableContent.toString();
|
417
|
return tableContent.toString();
|
|
351
|
}
|
418
|
}
|
|
352
|
|
419
|
|
|
|
|
420
|
+
|
|
353
|
private void saveSegmentsToDatabase(List<String> segments, String originalFileName, String storedFileName, String knowledgeId) {
|
421
|
private void saveSegmentsToDatabase(List<String> segments, String originalFileName, String storedFileName, String knowledgeId) {
|
|
354
|
if (segments.isEmpty()) {
|
422
|
if (segments.isEmpty()) {
|
|
355
|
return;
|
423
|
return;
|
|
356
|
}
|
424
|
}
|
|
357
|
|
425
|
|
|
358
|
- // 从存储文件名中提取UUID部分
|
|
|
|
359
|
- String uuid = storedFileName.substring(
|
426
|
+ // 从存储文件名中提取UUID部分作为docId
|
|
|
|
427
|
+ String docId = storedFileName.substring(
|
|
360
|
storedFileName.lastIndexOf('_') + 1,
|
428
|
storedFileName.lastIndexOf('_') + 1,
|
|
361
|
storedFileName.lastIndexOf('.')
|
429
|
storedFileName.lastIndexOf('.')
|
|
362
|
);
|
430
|
);
|
|
@@ -386,12 +454,25 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
@@ -386,12 +454,25 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
386
|
}
|
454
|
}
|
|
387
|
|
455
|
|
|
388
|
record.setText("");
|
456
|
record.setText("");
|
|
|
|
457
|
+
|
|
|
|
458
|
+ // 构建metadata JSON对象
|
|
|
|
459
|
+ Map<String, String> metadata = new LinkedHashMap<>(); // 使用LinkedHashMap保持字段顺序
|
|
|
|
460
|
+ metadata.put("docId", docId);
|
|
|
|
461
|
+ metadata.put("docName", originalFileName); // 上传前的原始文件名
|
|
|
|
462
|
+ metadata.put("storedFileName", storedFileName); // 上传后的带UUID的文件名
|
|
|
|
463
|
+ metadata.put("knowledgeId", knowledgeId);
|
|
|
|
464
|
+
|
|
|
|
465
|
+ // 使用ObjectMapper转换为JSON字符串
|
|
|
|
466
|
+ try {
|
|
|
|
467
|
+ record.setMetadata(new ObjectMapper().writeValueAsString(metadata));
|
|
|
|
468
|
+ } catch (JsonProcessingException e) {
|
|
|
|
469
|
+ log.error("生成metadata JSON失败", e);
|
|
|
|
470
|
+ // 使用默认值
|
|
389
|
record.setMetadata(String.format(
|
471
|
record.setMetadata(String.format(
|
|
390
|
- "{\"docId\":\"%s\",\"docName\":\"%s\",\"knowledgeId\":\"%s\"}",
|
|
|
|
391
|
- uuid,
|
|
|
|
392
|
- originalFileName,
|
|
|
|
393
|
- knowledgeId
|
472
|
+ "{\"docId\":\"%s\",\"docName\":\"%s\",\"storedFileName\":\"%s\",\"knowledgeId\":\"%s\"}",
|
|
|
|
473
|
+ docId, originalFileName, storedFileName, knowledgeId
|
|
394
|
));
|
474
|
));
|
|
|
|
475
|
+ }
|
|
395
|
|
476
|
|
|
396
|
Response<Embedding> embeddingResponse = aiModelUtils.getEmbedding("1925730210204721154", record.getQuestion());
|
477
|
Response<Embedding> embeddingResponse = aiModelUtils.getEmbedding("1925730210204721154", record.getQuestion());
|
|
397
|
record.setEmbedding(embeddingResponse.content().vector());
|
478
|
record.setEmbedding(embeddingResponse.content().vector());
|
|
@@ -399,4 +480,5 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
@@ -399,4 +480,5 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { |
|
399
|
questionEmbeddingMapper.insert(record);
|
480
|
questionEmbeddingMapper.insert(record);
|
|
400
|
}
|
481
|
}
|
|
401
|
}
|
482
|
}
|
|
|
|
483
|
+
|
|
402
|
} |
484
|
} |