作者 lixiang

模型参数修改

@@ -312,7 +312,7 @@ public class PgVectorMapper { @@ -312,7 +312,7 @@ public class PgVectorMapper {
312 // 自动生成嵌入向量的方法(需根据您的嵌入模型实现) 312 // 自动生成嵌入向量的方法(需根据您的嵌入模型实现)
313 private float[] generateEmbedding(String text) { 313 private float[] generateEmbedding(String text) {
314 // 改为生成 768 维向量 314 // 改为生成 768 维向量
315 - float[] embedding = new float[768]; // OpenAI 标准维度是 1536,这里改为 768 315 + float[] embedding = new float[1024]; // OpenAI 标准维度是 1536,这里改为 768
316 316
317 // 实际项目中应调用嵌入模型 API 317 // 实际项目中应调用嵌入模型 API
318 // 例如:return embeddingClient.generate(text, 768); 318 // 例如:return embeddingClient.generate(text, 768);
@@ -17,6 +17,7 @@ import org.jeecg.modules.airag.app.entity.QuestionEmbedding; @@ -17,6 +17,7 @@ import org.jeecg.modules.airag.app.entity.QuestionEmbedding;
17 import org.jeecg.modules.airag.app.utils.AiModelUtils; 17 import org.jeecg.modules.airag.app.utils.AiModelUtils;
18 import org.postgresql.util.PGobject; 18 import org.postgresql.util.PGobject;
19 import org.springframework.beans.factory.annotation.Autowired; 19 import org.springframework.beans.factory.annotation.Autowired;
  20 +import org.springframework.beans.factory.annotation.Value;
20 import org.springframework.stereotype.Component; 21 import org.springframework.stereotype.Component;
21 22
22 import java.sql.*; 23 import java.sql.*;
@@ -30,6 +31,8 @@ public class QuestionEmbeddingMapper { @@ -30,6 +31,8 @@ public class QuestionEmbeddingMapper {
30 @Autowired 31 @Autowired
31 private AiModelUtils aiModelUtils; 32 private AiModelUtils aiModelUtils;
32 33
  34 + @Value("${jeecg.ai-chat.embedId}")
  35 + private String embedId;
33 // PostgreSQL连接参数(应与项目配置一致) 36 // PostgreSQL连接参数(应与项目配置一致)
34 private static final String URL = "jdbc:postgresql://192.168.100.104:5432/postgres"; 37 private static final String URL = "jdbc:postgresql://192.168.100.104:5432/postgres";
35 private static final String USER = "postgres"; 38 private static final String USER = "postgres";
@@ -185,7 +188,7 @@ public class QuestionEmbeddingMapper { @@ -185,7 +188,7 @@ public class QuestionEmbeddingMapper {
185 jsonObject.setType("json"); 188 jsonObject.setType("json");
186 jsonObject.setValue(record.getMetadata()); 189 jsonObject.setValue(record.getMetadata());
187 stmt.setObject(5, jsonObject); 190 stmt.setObject(5, jsonObject);
188 - Response<Embedding> embedding = aiModelUtils.getEmbedding("1925730210204721154", record.getQuestion()); 191 + Response<Embedding> embedding = aiModelUtils.getEmbedding(embedId, record.getQuestion());
189 stmt.setObject(6, embedding.content().vector()); 192 stmt.setObject(6, embedding.content().vector());
190 return stmt.executeUpdate(); 193 return stmt.executeUpdate();
191 } catch (SQLException e) { 194 } catch (SQLException e) {
@@ -214,7 +217,7 @@ public class QuestionEmbeddingMapper { @@ -214,7 +217,7 @@ public class QuestionEmbeddingMapper {
214 jsonObject.setValue(mataData.toJSONString());*/ 217 jsonObject.setValue(mataData.toJSONString());*/
215 stmt.setObject(4, record.getMetadata()); 218 stmt.setObject(4, record.getMetadata());
216 219
217 - Response<Embedding> embedding = aiModelUtils.getEmbedding("1925730210204721154", record.getQuestion()); 220 + Response<Embedding> embedding = aiModelUtils.getEmbedding(embedId, record.getQuestion());
218 stmt.setObject(5, embedding.content().vector()); 221 stmt.setObject(5, embedding.content().vector());
219 222
220 stmt.setString(6, record.getId()); 223 stmt.setString(6, record.getId());
@@ -274,7 +277,7 @@ public class QuestionEmbeddingMapper { @@ -274,7 +277,7 @@ public class QuestionEmbeddingMapper {
274 } 277 }
275 278
276 // 2. 获取问题的嵌入向量 279 // 2. 获取问题的嵌入向量
277 - Response<Embedding> embeddingResponse = aiModelUtils.getEmbedding("1925730210204721154", question); 280 + Response<Embedding> embeddingResponse = aiModelUtils.getEmbedding(embedId, question);
278 float[] queryVector = embeddingResponse.content().vector(); 281 float[] queryVector = embeddingResponse.content().vector();
279 282
280 // 3. 计算最大允许距离(1 - 相似度阈值) 283 // 3. 计算最大允许距离(1 - 相似度阈值)
@@ -13,10 +13,7 @@ import dev.langchain4j.data.embedding.Embedding; @@ -13,10 +13,7 @@ import dev.langchain4j.data.embedding.Embedding;
13 import dev.langchain4j.data.segment.TextSegment; 13 import dev.langchain4j.data.segment.TextSegment;
14 import dev.langchain4j.model.output.Response; 14 import dev.langchain4j.model.output.Response;
15 import org.apache.commons.io.FilenameUtils; 15 import org.apache.commons.io.FilenameUtils;
16 -import org.apache.poi.xwpf.usermodel.IBodyElement;  
17 -import org.apache.poi.xwpf.usermodel.XWPFDocument;  
18 -import org.apache.poi.xwpf.usermodel.XWPFParagraph;  
19 -import org.apache.poi.xwpf.usermodel.XWPFTable; 16 +import org.apache.poi.xwpf.usermodel.*;
20 import org.jeecg.common.api.vo.Result; 17 import org.jeecg.common.api.vo.Result;
21 import org.jeecg.modules.airag.app.entity.QuestionEmbedding; 18 import org.jeecg.modules.airag.app.entity.QuestionEmbedding;
22 import org.jeecg.modules.airag.app.mapper.QuestionEmbeddingMapper; 19 import org.jeecg.modules.airag.app.mapper.QuestionEmbeddingMapper;
@@ -67,12 +64,14 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { @@ -67,12 +64,14 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService {
67 64
68 @Value("${jeecg.upload.path}") 65 @Value("${jeecg.upload.path}")
69 private String uploadPath; 66 private String uploadPath;
  67 + @Value("${jeecg.ai-chat.embedId}")
  68 + private String embedId;
70 69
71 private static final Set<String> ALLOWED_EXTENSIONS = Set.of("txt", "doc", "docx"); 70 private static final Set<String> ALLOWED_EXTENSIONS = Set.of("txt", "doc", "docx");
72 private static final Pattern SPECIAL_CHARS_PATTERN = Pattern.compile("[^a-zA-Z0-9\\u4e00-\\u9fa5\\s]"); 71 private static final Pattern SPECIAL_CHARS_PATTERN = Pattern.compile("[^a-zA-Z0-9\\u4e00-\\u9fa5\\s]");
73 private static final Pattern UUID_PATTERN = Pattern.compile("_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"); 72 private static final Pattern UUID_PATTERN = Pattern.compile("_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}");
74 73
75 - // 新增:数据库连接配置 74 + // 数据库连接配置
76 private static final String DB_URL = "jdbc:postgresql://192.168.100.104:5432/postgres"; 75 private static final String DB_URL = "jdbc:postgresql://192.168.100.104:5432/postgres";
77 private static final String DB_USER = "postgres"; 76 private static final String DB_USER = "postgres";
78 private static final String DB_PASSWORD = "postgres"; 77 private static final String DB_PASSWORD = "postgres";
@@ -188,56 +187,59 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { @@ -188,56 +187,59 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService {
188 saveSegmentsToDatabase(segments, originalFileName, storedFileName, knowledgeId); 187 saveSegmentsToDatabase(segments, originalFileName, storedFileName, knowledgeId);
189 188
190 // 新增逻辑:同时保存到embeddings表 189 // 新增逻辑:同时保存到embeddings表
191 - saveToEmbeddingsTable(segments, originalFileName, knowledgeId);  
192 - } 190 + saveToEmbeddingsTable(segments, originalFileName, storedFileName, knowledgeId);
193 191
194 - // 新增方法:将内容保存到embeddings表  
195 - private void saveToEmbeddingsTable(List<String> segments, String originalFileName, String knowledgeId) {  
196 - if (segments.isEmpty()) {  
197 - return;  
198 - } 192 + }
  193 + private void saveToEmbeddingsTable(List<String> segments, String originalFileName, String storedFileName, String knowledgeId) {
  194 + if (segments.isEmpty()) return;
199 195
200 - // 获取无UUID和扩展名的文件名用于显示  
201 String displayFileName = removeUuidSuffix(originalFileName); 196 String displayFileName = removeUuidSuffix(originalFileName);
202 displayFileName = FilenameUtils.removeExtension(displayFileName); 197 displayFileName = FilenameUtils.removeExtension(displayFileName);
203 198
204 - // 为整个文档生成一个唯一的docId  
205 - String docId = UUID.randomUUID().toString();  
206 -  
207 - // 合并所有段落作为完整内容  
208 - String fullContent = String.join("\n\n", segments);  
209 -  
210 try (Connection conn = getConnection()) { 199 try (Connection conn = getConnection()) {
211 - // 准备元数据  
212 - Map<String, Object> metadata = new HashMap<>();  
213 - metadata.put("docId", docId);  
214 - metadata.put("docName", originalFileName);  
215 - metadata.put("knowledgeId", knowledgeId);  
216 -  
217 - // 获取文本的向量表示  
218 - Response<Embedding> embeddingResponse = aiModelUtils.getEmbedding("1925730210204721154", displayFileName + ": " + fullContent);  
219 - float[] embeddingVector = embeddingResponse.content().vector();  
220 -  
221 - // 插入到embeddings表  
222 - String sql = "INSERT INTO embeddings (embedding_id, embedding, text, metadata) VALUES (?, ?, ?, ?::jsonb)";  
223 - try (PreparedStatement stmt = conn.prepareStatement(sql)) {  
224 - stmt.setString(1, UUID.randomUUID().toString());  
225 - stmt.setObject(2, new PGvector(embeddingVector));  
226 - stmt.setString(3, fullContent);  
227 -  
228 - PGobject jsonObject = new PGobject();  
229 - jsonObject.setType("json");  
230 - jsonObject.setValue(new ObjectMapper().writeValueAsString(metadata));  
231 - stmt.setObject(4, jsonObject);  
232 -  
233 - stmt.executeUpdate(); 200 + for (String segment : segments) {
  201 + if (segment.trim().isEmpty()) continue;
  202 +
  203 + // 回答内容是整个段落
  204 + String[] parts = segment.split("\\r?\\n", 2);
  205 + if (parts.length < 2) continue;
  206 +
  207 + String titlePath = parts[0].trim();
  208 + String answer = segment.trim(); // 整个回答段(含标题 + 内容)
  209 +
  210 + // 获取 embedding
  211 + Response<Embedding> embeddingResponse = aiModelUtils.getEmbedding(embedId, answer);
  212 + float[] embeddingVector = embeddingResponse.content().vector();
  213 +
  214 + // 准备 metadata
  215 + Map<String, Object> metadata = new HashMap<>();
  216 + metadata.put("docName", originalFileName);
  217 + metadata.put("storedFileName", storedFileName);
  218 + metadata.put("knowledgeId", knowledgeId);
  219 + metadata.put("title", displayFileName + ": " + titlePath);
  220 +
  221 + // 插入
  222 + String sql = "INSERT INTO embeddings (embedding_id, embedding, text, metadata) VALUES (?, ?, ?, ?::jsonb)";
  223 + try (PreparedStatement stmt = conn.prepareStatement(sql)) {
  224 + stmt.setString(1, UUID.randomUUID().toString());
  225 + stmt.setObject(2, new PGvector(embeddingVector));
  226 + stmt.setString(3, answer);
  227 +
  228 + PGobject jsonObject = new PGobject();
  229 + jsonObject.setType("json");
  230 + jsonObject.setValue(new ObjectMapper().writeValueAsString(metadata));
  231 + stmt.setObject(4, jsonObject);
  232 +
  233 + stmt.executeUpdate();
  234 + }
234 } 235 }
235 } catch (Exception e) { 236 } catch (Exception e) {
236 - log.error("保存到embeddings表失败", e); 237 + log.error("保存分段到embeddings表失败", e);
237 } 238 }
238 } 239 }
239 240
240 - // 新增方法:获取数据库连接 241 +
  242 + // 获取数据库连接
241 private Connection getConnection() throws SQLException { 243 private Connection getConnection() throws SQLException {
242 return DriverManager.getConnection(DB_URL, DB_USER, DB_PASSWORD); 244 return DriverManager.getConnection(DB_URL, DB_USER, DB_PASSWORD);
243 } 245 }
@@ -253,8 +255,8 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { @@ -253,8 +255,8 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService {
253 } 255 }
254 256
255 private String cleanText(String text) { 257 private String cleanText(String text) {
256 - // 保留基本的标点符号,包括 . : - 等  
257 - Pattern preservedCharsPattern = Pattern.compile("[^a-zA-Z0-9\\u4e00-\\u9fa5\\s.,:、,:;。;-]"); 258 + // 保留基本的标点符号
  259 + Pattern preservedCharsPattern = Pattern.compile("[^a-zA-Z0-9\\u4e00-\\u9fa5\\s.,:、,:;。;#;-]");
258 text = preservedCharsPattern.matcher(text).replaceAll(""); 260 text = preservedCharsPattern.matcher(text).replaceAll("");
259 261
260 // 将多个换行符缩减为一个换行符 262 // 将多个换行符缩减为一个换行符
@@ -299,182 +301,183 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { @@ -299,182 +301,183 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService {
299 .collect(Collectors.toList()); 301 .collect(Collectors.toList());
300 } 302 }
301 303
302 - public List<String> splitWordDocument(String filePath) throws Exception {  
303 - List<String> result = new ArrayList<>();  
304 - String ext = FilenameUtils.getExtension(filePath).toLowerCase();  
305 - StringBuilder fullContent = new StringBuilder();  
306 304
307 - // 获取无UUID的文件名用于显示  
308 - String displayFileName = removeUuidSuffix(new File(filePath).getName());  
309 - displayFileName = FilenameUtils.removeExtension(displayFileName); 305 + // 后备分割方案:按段落结构分割
  306 + private List<String> splitByContentStructure(XWPFDocument doc) {
  307 + List<String> segments = new ArrayList<>();
  308 + StringBuilder currentSegment = new StringBuilder();
  309 + final int MAX_SEGMENT_LENGTH = 1000; // 最大分段长度
310 310
311 - if (ext.equals("docx")) {  
312 - try (XWPFDocument doc = new XWPFDocument(new FileInputStream(filePath))) {  
313 - StringBuilder currentSection = new StringBuilder();  
314 - boolean isTableSection = false;  
315 -  
316 - for (IBodyElement element : doc.getBodyElements()) {  
317 - if (element instanceof XWPFParagraph) {  
318 - XWPFParagraph para = (XWPFParagraph) element;  
319 - String text = cleanText(para.getText());  
320 - fullContent.append(text).append("\n");  
321 -  
322 - if (isTableSection) {  
323 - result.add(currentSection.toString().trim());  
324 - currentSection = new StringBuilder();  
325 - isTableSection = false;  
326 - } 311 + for (IBodyElement element : doc.getBodyElements()) {
  312 + String text = "";
  313 + if (element instanceof XWPFParagraph) {
  314 + text = ((XWPFParagraph) element).getText().trim();
  315 + } else if (element instanceof XWPFTable) {
  316 + text = extractTableContent((XWPFTable) element);
  317 + }
327 318
328 - String style = para.getStyle();  
329 - if (style != null && style.matches("Heading\\d")) {  
330 - if (!currentSection.isEmpty()) {  
331 - result.add(currentSection.toString().trim());  
332 - }  
333 - currentSection = new StringBuilder(text).append("\n");  
334 - } else {  
335 - currentSection.append(text).append("\n");  
336 - }  
337 - } else if (element instanceof XWPFTable) {  
338 - String tableContent = extractTableContent((XWPFTable) element);  
339 - fullContent.append(tableContent).append("\n");  
340 -  
341 - if (!isTableSection) {  
342 - if (!currentSection.isEmpty()) {  
343 - result.add(currentSection.toString().trim());  
344 - }  
345 - currentSection = new StringBuilder();  
346 - isTableSection = true;  
347 - }  
348 - currentSection.append(tableContent).append("\n");  
349 - }  
350 - } 319 + if (text.isEmpty()) continue;
351 320
352 - if (!currentSection.isEmpty()) {  
353 - result.add(currentSection.toString().trim());  
354 - } 321 + // 当遇到空行或达到最大长度时分段
  322 + if (currentSegment.length() + text.length() > MAX_SEGMENT_LENGTH
  323 + && currentSegment.length() > 0) {
  324 + segments.add(currentSegment.toString().trim());
  325 + currentSegment = new StringBuilder();
355 } 326 }
356 - } else if (ext.equals("doc")) {  
357 - try (HWPFDocument doc = new HWPFDocument(new FileInputStream(filePath))) {  
358 - Range range = doc.getRange();  
359 - StringBuilder currentSection = new StringBuilder();  
360 - boolean isTableSection = false;  
361 -  
362 - for (int i = 0; i < range.numParagraphs(); i++) {  
363 - Paragraph para = range.getParagraph(i);  
364 - String text = cleanText(para.text());  
365 - fullContent.append(text).append("\n");  
366 -  
367 - if (para.isInTable()) {  
368 - if (!isTableSection) {  
369 - if (!currentSection.isEmpty()) {  
370 - result.add(currentSection.toString().trim());  
371 - }  
372 - currentSection = new StringBuilder();  
373 - isTableSection = true;  
374 - }  
375 - currentSection.append(text).append("\n");  
376 - } else {  
377 - if (isTableSection) {  
378 - result.add(currentSection.toString().trim());  
379 - currentSection = new StringBuilder();  
380 - isTableSection = false;  
381 - }  
382 327
383 - if (isHeading(para, range)) {  
384 - if (!currentSection.isEmpty()) {  
385 - result.add(currentSection.toString().trim());  
386 - }  
387 - currentSection = new StringBuilder(text).append("\n");  
388 - } else {  
389 - currentSection.append(text).append("\n");  
390 - }  
391 - }  
392 - } 328 + currentSegment.append(text).append("\n\n");
  329 + }
  330 +
  331 + if (currentSegment.length() > 0) {
  332 + segments.add(currentSegment.toString().trim());
  333 + }
  334 + return segments;
  335 + }
393 336
394 - if (!currentSection.isEmpty()) {  
395 - result.add(currentSection.toString().trim()); 337 + // 按标题分割文本
  338 + private List<String> splitByHeadings(String content) {
  339 + List<String> segments = new ArrayList<>();
  340 + StringBuilder currentSegment = new StringBuilder();
  341 + String[] lines = content.split("\\r?\\n");
  342 +
  343 + for (String line : lines) {
  344 + // 检测标题行(以1-6个#开头,后面跟着空格)
  345 + if (line.trim().matches("^#{1,6}\\s+.*")) {
  346 + // 保存当前分段
  347 + if (!currentSegment.isEmpty()) {
  348 + segments.add(currentSegment.toString().trim());
  349 + currentSegment = new StringBuilder();
396 } 350 }
397 } 351 }
  352 + currentSegment.append(line).append("\n");
398 } 353 }
399 354
400 - if (fullContent.length() < 1000) {  
401 - return Collections.singletonList(displayFileName + "\n" + fullContent.toString().trim()); 355 + // 添加最后一个分段
  356 + if (!currentSegment.isEmpty()) {
  357 + segments.add(currentSegment.toString().trim());
402 } 358 }
403 359
404 - return result; 360 + return segments;
405 } 361 }
406 362
407 private String extractTableContent(XWPFTable table) { 363 private String extractTableContent(XWPFTable table) {
408 - StringBuilder tableContent = new StringBuilder("\n"); // 表格前加换行 364 + StringBuilder tableContent = new StringBuilder();
409 table.getRows().forEach(row -> { 365 table.getRows().forEach(row -> {
  366 + StringBuilder rowContent = new StringBuilder("| ");
410 row.getTableCells().forEach(cell -> { 367 row.getTableCells().forEach(cell -> {
411 - // 处理单元格内容中的多个换行  
412 - String cellText = cleanText(cell.getText()).replaceAll("(\r?\n){2,}", "\n");  
413 - tableContent.append("| ").append(cellText).append(" "); 368 + String cellText = cell.getText().replaceAll("(\r?\n){2,}", " ");
  369 + rowContent.append(cellText).append(" | ");
414 }); 370 });
415 - tableContent.append("|\n"); 371 + tableContent.append(rowContent.toString().trim()).append("\n");
416 }); 372 });
417 return tableContent.toString(); 373 return tableContent.toString();
418 } 374 }
  375 + public List<String> splitWordDocument(String filePath) throws Exception {
  376 + List<String> result = new ArrayList<>();
  377 + String ext = FilenameUtils.getExtension(filePath).toLowerCase();
419 378
  379 + if (!ext.equals("docx")) return result;
  380 +
  381 + try (XWPFDocument doc = new XWPFDocument(new FileInputStream(filePath))) {
  382 + StringBuilder currentContent = new StringBuilder();
  383 + List<String> titlePath = new ArrayList<>();
  384 + String lastOutput = null;
  385 +
  386 + for (IBodyElement element : doc.getBodyElements()) {
  387 + if (element instanceof XWPFParagraph) {
  388 + XWPFParagraph para = (XWPFParagraph) element;
  389 + String text = para.getText().trim();
  390 + if (text.isEmpty()) continue;
  391 +
  392 + int headingLevel = getHeadingLevel(para);
  393 + if (headingLevel > 0) {
  394 + // 存在之前内容,保存上一个段
  395 + if (currentContent.length() > 0 && !titlePath.isEmpty()) {
  396 + String fullBlock = String.join("", titlePath) + "\n" + currentContent.toString().trim();
  397 + result.add(fullBlock);
  398 + currentContent.setLength(0);
  399 + }
420 400
421 - private void saveSegmentsToDatabase(List<String> segments, String originalFileName, String storedFileName, String knowledgeId) {  
422 - if (segments.isEmpty()) {  
423 - return; 401 + // 更新标题路径
  402 + while (titlePath.size() >= headingLevel) {
  403 + titlePath.remove(titlePath.size() - 1);
  404 + }
  405 + titlePath.add(text);
  406 + } else {
  407 + currentContent.append(text).append("\n");
  408 + }
  409 + } else if (element instanceof XWPFTable) {
  410 + String tableText = extractTableContent((XWPFTable) element);
  411 + currentContent.append(tableText).append("\n");
  412 + }
  413 + }
  414 +
  415 + // 最后一段
  416 + if (currentContent.length() > 0 && !titlePath.isEmpty()) {
  417 + String fullBlock = String.join("", titlePath) + "\n" + currentContent.toString().trim();
  418 + result.add(fullBlock);
  419 + }
  420 + }
  421 +
  422 + return result;
  423 + }
  424 +
  425 + // 获取标题等级
  426 + private int getHeadingLevel(XWPFParagraph para) {
  427 + String style = para.getStyle();
  428 + if (style != null && style.matches("Heading\\d|标题\\d|\\d")) {
  429 + return Integer.parseInt(style.replaceAll("[^\\d]", ""));
424 } 430 }
425 431
426 - // 从存储文件名中提取UUID部分作为docId  
427 - String docId = storedFileName.substring(  
428 - storedFileName.lastIndexOf('_') + 1,  
429 - storedFileName.lastIndexOf('.')  
430 - ); 432 + if (para.getRuns().size() > 0) {
  433 + XWPFRun run = para.getRuns().get(0);
  434 + if (run.isBold() || (run.getFontSize() > 12 && run.getFontSize() != -1)) {
  435 + return 2; // 可能是二级标题
  436 + }
  437 + }
431 438
432 - // 获取无UUID和扩展名的文件名用于显示 439 + return 0;
  440 + }
  441 + private void saveSegmentsToDatabase(List<String> segments, String originalFileName, String storedFileName, String knowledgeId) {
  442 + if (segments.isEmpty()) return;
  443 +
  444 + String docId = storedFileName.substring(storedFileName.lastIndexOf('_') + 1, storedFileName.lastIndexOf('.'));
433 String displayFileName = removeUuidSuffix(originalFileName); 445 String displayFileName = removeUuidSuffix(originalFileName);
434 displayFileName = FilenameUtils.removeExtension(displayFileName); 446 displayFileName = FilenameUtils.removeExtension(displayFileName);
435 447
436 - // 判断是否是单一段落  
437 - boolean isSingleSegment = segments.size() == 1;  
438 -  
439 for (String segment : segments) { 448 for (String segment : segments) {
440 - if (segment.trim().isEmpty()) {  
441 - continue;  
442 - } 449 + if (segment.trim().isEmpty()) continue;
443 450
444 - QuestionEmbedding record = new QuestionEmbedding();  
445 - record.setId(UUID.randomUUID().toString()); 451 + String[] parts = segment.split("\\r?\\n", 2);
  452 + if (parts.length < 2) continue;
446 453
447 - if (isSingleSegment) {  
448 - record.setQuestion(displayFileName);  
449 - record.setAnswer(segment.trim());  
450 - } else {  
451 - String firstLine = segment.lines().findFirst().orElse("");  
452 - record.setQuestion(displayFileName + ": " + cleanText(firstLine));  
453 - record.setAnswer(segment.trim());  
454 - } 454 + String titleLine = parts[0].trim(); // 如 掌静脉设备: 产品特点
  455 + String content = parts[1].trim();
  456 +
  457 + // 构造问题:文件名: 标题层级路径
  458 + String question = displayFileName + titleLine;
455 459
  460 + QuestionEmbedding record = new QuestionEmbedding();
  461 + record.setId(UUID.randomUUID().toString());
  462 + record.setQuestion(question);
  463 + record.setAnswer(titleLine + "\n" + content);
456 record.setText(""); 464 record.setText("");
457 465
458 - // 构建metadata JSON对象  
459 - Map<String, String> metadata = new LinkedHashMap<>(); // 使用LinkedHashMap保持字段顺序 466 + Map<String, String> metadata = new LinkedHashMap<>();
460 metadata.put("docId", docId); 467 metadata.put("docId", docId);
461 - metadata.put("docName", originalFileName); // 上传前的原始文件名  
462 - metadata.put("storedFileName", storedFileName); // 上传后的带UUID的文件名 468 + metadata.put("docName", originalFileName);
  469 + metadata.put("storedFileName", storedFileName);
463 metadata.put("knowledgeId", knowledgeId); 470 metadata.put("knowledgeId", knowledgeId);
464 471
465 - // 使用ObjectMapper转换为JSON字符串  
466 try { 472 try {
467 record.setMetadata(new ObjectMapper().writeValueAsString(metadata)); 473 record.setMetadata(new ObjectMapper().writeValueAsString(metadata));
468 } catch (JsonProcessingException e) { 474 } catch (JsonProcessingException e) {
469 log.error("生成metadata JSON失败", e); 475 log.error("生成metadata JSON失败", e);
470 - // 使用默认值  
471 - record.setMetadata(String.format(  
472 - "{\"docId\":\"%s\",\"docName\":\"%s\",\"storedFileName\":\"%s\",\"knowledgeId\":\"%s\"}",  
473 - docId, originalFileName, storedFileName, knowledgeId  
474 - ));  
475 } 476 }
476 477
477 - Response<Embedding> embeddingResponse = aiModelUtils.getEmbedding("1925730210204721154", record.getQuestion()); 478 + log.info("保存分段: title={}, content_length={}", question, segment.length());
  479 +
  480 + Response<Embedding> embeddingResponse = aiModelUtils.getEmbedding(embedId, record.getQuestion());
478 record.setEmbedding(embeddingResponse.content().vector()); 481 record.setEmbedding(embeddingResponse.content().vector());
479 record.setKnowledgeId(knowledgeId); 482 record.setKnowledgeId(knowledgeId);
480 questionEmbeddingMapper.insert(record); 483 questionEmbeddingMapper.insert(record);
@@ -258,6 +258,7 @@ public class EmbeddingHandler implements IEmbeddingHandler { @@ -258,6 +258,7 @@ public class EmbeddingHandler implements IEmbeddingHandler {
258 258
259 Map<String, Object> metadataMap = new HashMap<>(); 259 Map<String, Object> metadataMap = new HashMap<>();
260 metadataMap.put("docId", metadata.getString("docId")); // 假设metadata中有docId字段 260 metadataMap.put("docId", metadata.getString("docId")); // 假设metadata中有docId字段
  261 + metadataMap.put("storedFileName", metadata.getString("storedFileName"));
261 metadataMap.put("docName", metadata.getString(EMBED_STORE_METADATA_DOCNAME)); 262 metadataMap.put("docName", metadata.getString(EMBED_STORE_METADATA_DOCNAME));
262 metadataMap.put("index", metadata.getInteger("index")); 263 metadataMap.put("index", metadata.getInteger("index"));
263 ObjectMapper objectMapper = new ObjectMapper(); 264 ObjectMapper objectMapper = new ObjectMapper();
@@ -61,39 +61,39 @@ public class ZdyRagController { @@ -61,39 +61,39 @@ public class ZdyRagController {
61 61
62 HashMap<String, Object> resMap = new HashMap<>(); 62 HashMap<String, Object> resMap = new HashMap<>();
63 //根据问题相似度进行查询 63 //根据问题相似度进行查询
64 - List<QuestionEmbedding> questionEmbeddings = questionEmbeddingService.similaritySearchByQuestion(questionText, 1,0.8);  
65 - for (QuestionEmbedding questionEmbedding : questionEmbeddings) {  
66 - resMap.put("question", questionText);  
67 - resMap.put("answer", questionEmbedding.getAnswer());  
68 - resMap.put("similarity", questionEmbedding.getSimilarity());  
69 -  
70 - ObjectMapper objectMapper = new ObjectMapper();  
71 - Map<String, String> metadata = objectMapper.readValue(questionEmbedding.getMetadata(), Map.class);  
72 - // 获取docName和docId  
73 - if (metadata != null) {  
74 - String docName = metadata.get("docName");  
75 - resMap.put("fileName", docName);  
76 - String fileName = generateFilePath(questionEmbedding.getMetadata());  
77 -  
78 - if (StringUtils.isNotBlank(fileName)) {  
79 - resMap.put("fileBase64", FileToBase64Util.fileToBase64(uploadPath + fileName));  
80 - }  
81 - }  
82 - // 记录日志 - 从问题库匹配  
83 - logRecord.setAnswer(questionEmbedding.getAnswer());  
84 - logRecord.setAnswerType(1);  
85 - airagLogService.save(logRecord);  
86 -  
87 - log.info("questionEmbedding.getMetadata() = " + questionEmbedding.getMetadata());  
88 - log.info("questionEmbedding.getQuestion() = " + questionEmbedding.getQuestion());  
89 - log.info("questionEmbedding.getAnswer() = " + questionEmbedding.getAnswer());  
90 - log.info("questionEmbedding.getSimilarity() = " + questionEmbedding.getSimilarity());  
91 - log.info("-------------------------------------------------------------");  
92 - }  
93 - //返回问题库命中的问题  
94 - if (!questionEmbeddings.isEmpty()) {  
95 - return Result.OK(resMap);  
96 - } 64 +// List<QuestionEmbedding> questionEmbeddings = questionEmbeddingService.similaritySearchByQuestion(questionText, 1,0.8);
  65 +// for (QuestionEmbedding questionEmbedding : questionEmbeddings) {
  66 +// resMap.put("question", questionText);
  67 +// resMap.put("answer", questionEmbedding.getAnswer());
  68 +// resMap.put("similarity", questionEmbedding.getSimilarity());
  69 +//
  70 +// ObjectMapper objectMapper = new ObjectMapper();
  71 +// Map<String, String> metadata = objectMapper.readValue(questionEmbedding.getMetadata(), Map.class);
  72 +// // 获取docName和docId
  73 +// if (metadata != null) {
  74 +// String docName = metadata.get("docName");
  75 +// resMap.put("fileName", docName);
  76 +// String fileName = generateFilePath(questionEmbedding.getMetadata());
  77 +//
  78 +// if (StringUtils.isNotBlank(fileName)) {
  79 +// resMap.put("fileBase64", FileToBase64Util.fileToBase64(uploadPath + fileName));
  80 +// }
  81 +// }
  82 +// // 记录日志 - 从问题库匹配
  83 +// logRecord.setAnswer(questionEmbedding.getAnswer());
  84 +// logRecord.setAnswerType(1);
  85 +// airagLogService.save(logRecord);
  86 +//
  87 +// log.info("questionEmbedding.getMetadata() = " + questionEmbedding.getMetadata());
  88 +// log.info("questionEmbedding.getQuestion() = " + questionEmbedding.getQuestion());
  89 +// log.info("questionEmbedding.getAnswer() = " + questionEmbedding.getAnswer());
  90 +// log.info("questionEmbedding.getSimilarity() = " + questionEmbedding.getSimilarity());
  91 +// log.info("-------------------------------------------------------------");
  92 +// }
  93 +// //返回问题库命中的问题
  94 +// if (!questionEmbeddings.isEmpty()) {
  95 +// return Result.OK(resMap);
  96 +// }
97 97
98 List<Map<String, Object>> maps = embeddingHandler.searchEmbedding(knowId, questionText, topNumber, similarity); 98 List<Map<String, Object>> maps = embeddingHandler.searchEmbedding(knowId, questionText, topNumber, similarity);
99 if (CollectionUtil.isEmpty(maps)) { 99 if (CollectionUtil.isEmpty(maps)) {
@@ -129,9 +129,10 @@ public class ZdyRagController { @@ -129,9 +129,10 @@ public class ZdyRagController {
129 resMap.put("question", questionText); 129 resMap.put("question", questionText);
130 resMap.put("answer", chat); 130 resMap.put("answer", chat);
131 resMap.put("similarity", maps.get(0).get("score").toString()); 131 resMap.put("similarity", maps.get(0).get("score").toString());
132 - String fileName = generateFilePath(maps.get(0).get("metadata").toString()); 132 + String fileName = generateFileDocName(maps.get(0).get("metadata").toString());
  133 + String storedFileName = generateFilePath(maps.get(0).get("metadata").toString());
133 resMap.put("fileName", fileName); 134 resMap.put("fileName", fileName);
134 - resMap.put("fileBase64",FileToBase64Util.fileToBase64(uploadPath + fileName)); 135 + resMap.put("fileBase64",FileToBase64Util.fileToBase64(uploadPath + storedFileName));
135 136
136 137
137 // 记录日志 - 从知识库生成回答 138 // 记录日志 - 从知识库生成回答
@@ -153,19 +154,19 @@ public class ZdyRagController { @@ -153,19 +154,19 @@ public class ZdyRagController {
153 Map<String, String> metadata = objectMapper.readValue(metadataJson, Map.class); 154 Map<String, String> metadata = objectMapper.readValue(metadataJson, Map.class);
154 155
155 // 获取docName和docId 156 // 获取docName和docId
156 - String docName = metadata.get("docName");  
157 - String docId = metadata.get("docId"); 157 + return metadata.get("storedFileName");
158 158
159 - // 分离文件名和扩展名  
160 - if(StringUtils.isEmpty(docName)){  
161 - return null; 159 + }
  160 + private String generateFileDocName(String metadataJson) throws Exception {
  161 + if (StringUtils.isEmpty(metadataJson)) {
  162 + return "";
162 } 163 }
163 - int dotIndex = docName.lastIndexOf('.');  
164 - String baseName = (dotIndex > 0) ? docName.substring(0, dotIndex) : docName;  
165 - String extension = (dotIndex > 0) ? docName.substring(dotIndex) : ""; 164 + ObjectMapper objectMapper = new ObjectMapper();
  165 + // 解析JSON字符串
  166 + Map<String, String> metadata = objectMapper.readValue(metadataJson, Map.class);
  167 +
  168 + return metadata.get("docName");
166 169
167 - // 组合成新文件名  
168 - return baseName + "_" + docId + extension;  
169 } 170 }
170 171
171 public static void main(String[] args) { 172 public static void main(String[] args) {
1 server: 1 server:
2 port: 8080 2 port: 8080
3 undertow: 3 undertow:
  4 + max-http-post-size: 100MB
  5 + max-headers: 20000
  6 + max-parameters: 20000
  7 + max-buffered-request-size: 1MB
4 # max-http-post-size: 10MB # 平替 tomcat server.tomcat.max-swallow-siz, undertow该值默认为-1 8 # max-http-post-size: 10MB # 平替 tomcat server.tomcat.max-swallow-siz, undertow该值默认为-1
5 worker-threads: 16 # 4核CPU标准配置 9 worker-threads: 16 # 4核CPU标准配置
6 buffers: 10 buffers:
@@ -33,6 +37,7 @@ spring: @@ -33,6 +37,7 @@ spring:
33 multipart: 37 multipart:
34 max-file-size: 10MB 38 max-file-size: 10MB
35 max-request-size: 10MB 39 max-request-size: 10MB
  40 + enabled: true
36 mail: 41 mail:
37 # 定时任务发送邮件 42 # 定时任务发送邮件
38 timeJobSend: false 43 timeJobSend: false
@@ -152,7 +157,7 @@ spring: @@ -152,7 +157,7 @@ spring:
152 slow-sql-millis: 5000 157 slow-sql-millis: 5000
153 datasource: 158 datasource:
154 master: 159 master:
155 - url: jdbc:mysql://127.0.0.1:3306/jeecg-boot?characterEncoding=UTF-8&useUnicode=true&useSSL=false&tinyInt1isBit=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai 160 + url: jdbc:mysql://localhost:3306/jeecg-boot?characterEncoding=UTF-8&useUnicode=true&useSSL=false&tinyInt1isBit=false&allowPublicKeyRetrieval=true&serverTimezone=Asia/Shanghai
156 username: root 161 username: root
157 password: 1234 162 password: 1234
158 driver-class-name: com.mysql.cj.jdbc.Driver 163 driver-class-name: com.mysql.cj.jdbc.Driver
@@ -181,7 +186,7 @@ mybatis-plus: @@ -181,7 +186,7 @@ mybatis-plus:
181 table-underline: true 186 table-underline: true
182 configuration: 187 configuration:
183 # 这个配置会将执行的sql打印出来,在开发或测试的时候可以用 188 # 这个配置会将执行的sql打印出来,在开发或测试的时候可以用
184 - #log-impl: org.apache.ibatis.logging.stdout.StdOutImpl 189 + log-impl: org.apache.ibatis.logging.stdout.StdOutImpl
185 # 返回类型为Map,显示null对应的字段 190 # 返回类型为Map,显示null对应的字段
186 call-setters-on-nulls: true 191 call-setters-on-nulls: true
187 #jeecg专用配置 192 #jeecg专用配置
@@ -189,9 +194,10 @@ minidao: @@ -189,9 +194,10 @@ minidao:
189 base-package: org.jeecg.modules.jmreport.*,org.jeecg.modules.drag.* 194 base-package: org.jeecg.modules.jmreport.*,org.jeecg.modules.drag.*
190 jeecg: 195 jeecg:
191 upload: 196 upload:
192 - path: D:\\upload\\ 197 + path: D:\upload\
193 # AI集成 198 # AI集成
194 ai-chat: 199 ai-chat:
  200 + embedId: 1937039670944731137
195 enabled: true 201 enabled: true
196 model: deepseek-chat 202 model: deepseek-chat
197 apiKey: ?? 203 apiKey: ??
@@ -200,7 +206,7 @@ jeecg: @@ -200,7 +206,7 @@ jeecg:
200 # AIRag向量库 206 # AIRag向量库
201 ai-rag: 207 ai-rag:
202 embed-store: 208 embed-store:
203 - host: 192.168.100.103 209 + host: 192.168.100.104
204 port: 5432 210 port: 5432
205 database: postgres 211 database: postgres
206 user: postgres 212 user: postgres