正在显示
11 个修改的文件
包含
427 行增加
和
143 行删除
| @@ -20,6 +20,7 @@ import org.springframework.web.servlet.ModelAndView; | @@ -20,6 +20,7 @@ import org.springframework.web.servlet.ModelAndView; | ||
| 20 | import javax.servlet.http.HttpServletRequest; | 20 | import javax.servlet.http.HttpServletRequest; |
| 21 | import javax.servlet.http.HttpServletResponse; | 21 | import javax.servlet.http.HttpServletResponse; |
| 22 | import java.util.Arrays; | 22 | import java.util.Arrays; |
| 23 | +import java.util.List; | ||
| 23 | 24 | ||
| 24 | /** | 25 | /** |
| 25 | * @Description: 按钮表单 | 26 | * @Description: 按钮表单 |
| @@ -58,6 +59,22 @@ public class AiragButtonController extends JeecgController<AiragButton, IAiragBu | @@ -58,6 +59,22 @@ public class AiragButtonController extends JeecgController<AiragButton, IAiragBu | ||
| 58 | return Result.OK(pageList); | 59 | return Result.OK(pageList); |
| 59 | } | 60 | } |
| 60 | 61 | ||
| 62 | + | ||
| 63 | + /** | ||
| 64 | + * 分页列表查询 | ||
| 65 | + * | ||
| 66 | + * @return | ||
| 67 | + */ | ||
| 68 | + @Operation(summary="按钮表单-智能助手按钮列表") | ||
| 69 | + @GetMapping(value = "/buttonList") | ||
| 70 | + public Result<List<AiragButton>> buttonList() { | ||
| 71 | + QueryWrapper<AiragButton> queryWrapper = new QueryWrapper<>(); | ||
| 72 | + queryWrapper.eq("button_switch", "Y"); | ||
| 73 | + List<AiragButton> list = airagButtonService.list(queryWrapper); | ||
| 74 | + return Result.OK(list); | ||
| 75 | + } | ||
| 76 | + | ||
| 77 | + | ||
| 61 | /** | 78 | /** |
| 62 | * 添加 | 79 | * 添加 |
| 63 | * | 80 | * |
| 1 | package org.jeecg.modules.airag.app.controller; | 1 | package org.jeecg.modules.airag.app.controller; |
| 2 | 2 | ||
| 3 | +import lombok.extern.slf4j.Slf4j; | ||
| 4 | +import org.apache.commons.lang3.StringUtils; | ||
| 3 | import org.jeecg.common.api.vo.Result; | 5 | import org.jeecg.common.api.vo.Result; |
| 4 | import org.jeecg.modules.airag.app.entity.QuestionEmbedding; | 6 | import org.jeecg.modules.airag.app.entity.QuestionEmbedding; |
| 5 | import org.jeecg.modules.airag.app.service.IQuestionEmbeddingService; | 7 | import org.jeecg.modules.airag.app.service.IQuestionEmbeddingService; |
| 8 | +import org.jeecg.modules.airag.app.utils.JsonUtils; | ||
| 9 | +import org.jeecg.modules.airag.llm.entity.AiragKnowledge; | ||
| 10 | +import org.jeecg.modules.airag.llm.service.IAiragKnowledgeService; | ||
| 6 | import org.springframework.beans.factory.annotation.Autowired; | 11 | import org.springframework.beans.factory.annotation.Autowired; |
| 7 | import org.springframework.transaction.annotation.Transactional; | 12 | import org.springframework.transaction.annotation.Transactional; |
| 8 | import org.springframework.web.bind.annotation.*; | 13 | import org.springframework.web.bind.annotation.*; |
| 9 | import org.springframework.web.multipart.MultipartFile; | 14 | import org.springframework.web.multipart.MultipartFile; |
| 10 | 15 | ||
| 11 | import java.util.List; | 16 | import java.util.List; |
| 17 | +import java.util.Map; | ||
| 18 | +import java.util.stream.Collectors; | ||
| 12 | 19 | ||
| 13 | @RestController | 20 | @RestController |
| 14 | @RequestMapping("/question/embedding") | 21 | @RequestMapping("/question/embedding") |
| 22 | +@Slf4j | ||
| 15 | public class QuestionEmbeddingController { | 23 | public class QuestionEmbeddingController { |
| 16 | @Autowired | 24 | @Autowired |
| 17 | private IQuestionEmbeddingService questionEmbeddingService; | 25 | private IQuestionEmbeddingService questionEmbeddingService; |
| 26 | + @Autowired | ||
| 27 | + private IAiragKnowledgeService airagKnowledgeService; | ||
| 18 | 28 | ||
| 19 | @GetMapping("/list") | 29 | @GetMapping("/list") |
| 20 | public Result<List<QuestionEmbedding>> findAll() { | 30 | public Result<List<QuestionEmbedding>> findAll() { |
| 21 | List<QuestionEmbedding> list = questionEmbeddingService.findAll(); | 31 | List<QuestionEmbedding> list = questionEmbeddingService.findAll(); |
| 32 | + Map<String, String> airagKnowledgeMap = airagKnowledgeService.list() | ||
| 33 | + .stream() | ||
| 34 | + .collect(Collectors.toMap(AiragKnowledge::getId, AiragKnowledge::getName)); | ||
| 35 | + | ||
| 36 | + list.forEach(item -> { | ||
| 37 | + String metadata = item.getMetadata(); | ||
| 38 | + if (StringUtils.isNotBlank(metadata)) { | ||
| 39 | + Map<String, String> jsonMap = JsonUtils.jsonUtils(metadata); | ||
| 40 | + if (jsonMap.containsKey("knowledgeId")) { | ||
| 41 | + item.setKnowledgeName(airagKnowledgeMap.get(jsonMap.get("knowledgeId"))); | ||
| 42 | + item.setKnowledgeId(jsonMap.get("knowledgeId")); | ||
| 43 | + } | ||
| 44 | + } | ||
| 45 | + | ||
| 46 | + }); | ||
| 22 | return Result.OK(list); | 47 | return Result.OK(list); |
| 23 | } | 48 | } |
| 24 | 49 | ||
| @@ -51,8 +76,41 @@ public class QuestionEmbeddingController { | @@ -51,8 +76,41 @@ public class QuestionEmbeddingController { | ||
| 51 | 76 | ||
| 52 | @PostMapping("/uploadZip") | 77 | @PostMapping("/uploadZip") |
| 53 | @Transactional(rollbackFor = {Exception.class}) | 78 | @Transactional(rollbackFor = {Exception.class}) |
| 54 | - public Result<?> uploadZip(@RequestParam("file") MultipartFile file) { | ||
| 55 | - return questionEmbeddingService.processZipUpload(file); | 79 | + public Result<?> uploadZip( |
| 80 | + @RequestParam("file") MultipartFile file, | ||
| 81 | + @RequestParam("knowledgeId") String knowledgeId) { | ||
| 82 | + | ||
| 83 | + // 增强日志记录 | ||
| 84 | + log.info("收到上传请求 - 文件名: {}, 大小: {} bytes, 知识库ID: {}", | ||
| 85 | + file.getOriginalFilename(), | ||
| 86 | + file.getSize(), | ||
| 87 | + knowledgeId); | ||
| 88 | + | ||
| 89 | + if (file.isEmpty()) { | ||
| 90 | + log.warn("上传文件为空"); | ||
| 91 | + return Result.error("请上传有效的ZIP文件"); | ||
| 56 | } | 92 | } |
| 57 | 93 | ||
| 94 | + try { | ||
| 95 | + // 验证文件类型 | ||
| 96 | + if (!"application/zip".equals(file.getContentType()) && | ||
| 97 | + !"application/x-zip-compressed".equals(file.getContentType())) { | ||
| 98 | + log.warn("不支持的文件类型: {}", file.getContentType()); | ||
| 99 | + return Result.error("仅支持ZIP格式文件"); | ||
| 100 | + } | ||
| 101 | + | ||
| 102 | + // 处理文件 | ||
| 103 | + return questionEmbeddingService.processZipUpload(file, knowledgeId); | ||
| 104 | + } catch (Exception e) { | ||
| 105 | + log.error("处理ZIP文件上传失败", e); | ||
| 106 | + return Result.error("处理ZIP文件失败: " + e.getMessage()); | ||
| 107 | + } | ||
| 108 | + } | ||
| 109 | + | ||
| 110 | + | ||
| 111 | + public static void main(String[] args) { | ||
| 112 | + String str = "{\"docId\": \"1930453193934766081\", \"docName\": \"在校学生身份证换证、补办手续办理\", \"knowledgeId\": \"1930453193934766080\"}"; | ||
| 113 | + Map<String, String> jsonMap = JsonUtils.jsonUtils(str); | ||
| 114 | + System.out.println(jsonMap.get("docName")); | ||
| 115 | + } | ||
| 58 | } | 116 | } |
| 1 | package org.jeecg.modules.airag.app.entity; | 1 | package org.jeecg.modules.airag.app.entity; |
| 2 | 2 | ||
| 3 | +import com.baomidou.mybatisplus.annotation.TableField; | ||
| 3 | import lombok.AllArgsConstructor; | 4 | import lombok.AllArgsConstructor; |
| 4 | import lombok.Data; | 5 | import lombok.Data; |
| 5 | import lombok.NoArgsConstructor; | 6 | import lombok.NoArgsConstructor; |
| @@ -10,11 +11,44 @@ import java.util.Map; | @@ -10,11 +11,44 @@ import java.util.Map; | ||
| 10 | @AllArgsConstructor | 11 | @AllArgsConstructor |
| 11 | @NoArgsConstructor | 12 | @NoArgsConstructor |
| 12 | public class QuestionEmbedding { | 13 | public class QuestionEmbedding { |
| 14 | + /** | ||
| 15 | + * 主键id | ||
| 16 | + */ | ||
| 13 | private String id; | 17 | private String id; |
| 18 | + /** | ||
| 19 | + * 原文 | ||
| 20 | + */ | ||
| 14 | private String text; | 21 | private String text; |
| 22 | + /** | ||
| 23 | + * 问题 | ||
| 24 | + */ | ||
| 15 | private String question; | 25 | private String question; |
| 26 | + /** | ||
| 27 | + * 回答 | ||
| 28 | + */ | ||
| 16 | private String answer; | 29 | private String answer; |
| 30 | + /** | ||
| 31 | + * 元数据 | ||
| 32 | + */ | ||
| 17 | private String metadata; | 33 | private String metadata; |
| 34 | + /** | ||
| 35 | + * 向量 | ||
| 36 | + */ | ||
| 18 | private float[] embedding; | 37 | private float[] embedding; |
| 38 | + | ||
| 39 | + /** | ||
| 40 | + * 相似度 | ||
| 41 | + */ | ||
| 19 | private Double similarity; | 42 | private Double similarity; |
| 43 | + /** | ||
| 44 | + * 知识库名称 | ||
| 45 | + */ | ||
| 46 | + @TableField(exist = false) | ||
| 47 | + private String knowledgeName; | ||
| 48 | + /** | ||
| 49 | + * 知识库id | ||
| 50 | + */ | ||
| 51 | + @TableField(exist = false) | ||
| 52 | + private String knowledgeId; | ||
| 53 | + | ||
| 20 | } | 54 | } |
| @@ -20,7 +20,7 @@ import java.util.*; | @@ -20,7 +20,7 @@ import java.util.*; | ||
| 20 | public class PgVectorMapper { | 20 | public class PgVectorMapper { |
| 21 | 21 | ||
| 22 | // PostgreSQL连接参数(实际项目中应从配置读取) | 22 | // PostgreSQL连接参数(实际项目中应从配置读取) |
| 23 | - private static final String URL = "jdbc:postgresql://192.168.100.103:5432/postgres"; | 23 | + private static final String URL = "jdbc:postgresql://192.168.100.104:5432/postgres"; |
| 24 | private static final String USER = "postgres"; | 24 | private static final String USER = "postgres"; |
| 25 | private static final String PASSWORD = "postgres"; | 25 | private static final String PASSWORD = "postgres"; |
| 26 | 26 |
| 1 | package org.jeecg.modules.airag.app.mapper; | 1 | package org.jeecg.modules.airag.app.mapper; |
| 2 | 2 | ||
| 3 | +import com.alibaba.fastjson2.JSONObject; | ||
| 3 | import com.fasterxml.jackson.core.JsonProcessingException; | 4 | import com.fasterxml.jackson.core.JsonProcessingException; |
| 4 | import com.fasterxml.jackson.core.type.TypeReference; | 5 | import com.fasterxml.jackson.core.type.TypeReference; |
| 5 | import com.fasterxml.jackson.databind.ObjectMapper; | 6 | import com.fasterxml.jackson.databind.ObjectMapper; |
| @@ -25,7 +26,7 @@ public class QuestionEmbeddingMapper { | @@ -25,7 +26,7 @@ public class QuestionEmbeddingMapper { | ||
| 25 | private AiModelUtils aiModelUtils; | 26 | private AiModelUtils aiModelUtils; |
| 26 | 27 | ||
| 27 | // PostgreSQL连接参数(应与项目配置一致) | 28 | // PostgreSQL连接参数(应与项目配置一致) |
| 28 | - private static final String URL = "jdbc:postgresql://192.168.100.103:5432/postgres"; | 29 | + private static final String URL = "jdbc:postgresql://192.168.100.104:5432/postgres"; |
| 29 | private static final String USER = "postgres"; | 30 | private static final String USER = "postgres"; |
| 30 | private static final String PASSWORD = "postgres"; | 31 | private static final String PASSWORD = "postgres"; |
| 31 | 32 | ||
| @@ -86,7 +87,11 @@ public class QuestionEmbeddingMapper { | @@ -86,7 +87,11 @@ public class QuestionEmbeddingMapper { | ||
| 86 | stmt.setString(4, record.getAnswer()); | 87 | stmt.setString(4, record.getAnswer()); |
| 87 | PGobject jsonObject = new PGobject(); | 88 | PGobject jsonObject = new PGobject(); |
| 88 | jsonObject.setType("json"); | 89 | jsonObject.setType("json"); |
| 89 | - jsonObject.setValue("{\"name\":\"John\", \"age\":30}"); | 90 | + |
| 91 | +// JSONObject mataData = new JSONObject(); | ||
| 92 | +// mataData.put("knowledgeId",record.getKnowledgeId()); | ||
| 93 | + | ||
| 94 | + jsonObject.setValue(record.getMetadata()); | ||
| 90 | stmt.setObject(5, jsonObject); | 95 | stmt.setObject(5, jsonObject); |
| 91 | Response<Embedding> embedding = aiModelUtils.getEmbedding("1925730210204721154", record.getQuestion()); | 96 | Response<Embedding> embedding = aiModelUtils.getEmbedding("1925730210204721154", record.getQuestion()); |
| 92 | stmt.setObject(6, embedding.content().vector()); | 97 | stmt.setObject(6, embedding.content().vector()); |
| @@ -109,7 +114,11 @@ public class QuestionEmbeddingMapper { | @@ -109,7 +114,11 @@ public class QuestionEmbeddingMapper { | ||
| 109 | stmt.setString(3, record.getAnswer()); | 114 | stmt.setString(3, record.getAnswer()); |
| 110 | PGobject jsonObject = new PGobject(); | 115 | PGobject jsonObject = new PGobject(); |
| 111 | jsonObject.setType("json"); | 116 | jsonObject.setType("json"); |
| 112 | - jsonObject.setValue("{\"name\":\"John\", \"age\":30}"); | 117 | + |
| 118 | + JSONObject mataData = new JSONObject(); | ||
| 119 | +// mataData.put("knowledgeId",record.getKnowledgeId()); | ||
| 120 | + | ||
| 121 | + jsonObject.setValue(mataData.toJSONString()); | ||
| 113 | stmt.setObject(4, jsonObject); | 122 | stmt.setObject(4, jsonObject); |
| 114 | 123 | ||
| 115 | Response<Embedding> embedding = aiModelUtils.getEmbedding("1925730210204721154", record.getQuestion()); | 124 | Response<Embedding> embedding = aiModelUtils.getEmbedding("1925730210204721154", record.getQuestion()); |
| @@ -240,7 +249,7 @@ public class QuestionEmbeddingMapper { | @@ -240,7 +249,7 @@ public class QuestionEmbeddingMapper { | ||
| 240 | 249 | ||
| 241 | String metadataJson = rs.getString("metadata"); | 250 | String metadataJson = rs.getString("metadata"); |
| 242 | if (StringUtils.isNotBlank(metadataJson)) { | 251 | if (StringUtils.isNotBlank(metadataJson)) { |
| 243 | - record.setMetadata(""); | 252 | + record.setMetadata(metadataJson); |
| 244 | } | 253 | } |
| 245 | 254 | ||
| 246 | return record; | 255 | return record; |
| @@ -15,5 +15,5 @@ public interface IQuestionEmbeddingService { | @@ -15,5 +15,5 @@ public interface IQuestionEmbeddingService { | ||
| 15 | List<QuestionEmbedding> similaritySearchByQuestion(String question, int limit, Double minSimilarity); | 15 | List<QuestionEmbedding> similaritySearchByQuestion(String question, int limit, Double minSimilarity); |
| 16 | List<QuestionEmbedding> similaritySearch(float[] vector, int limit); | 16 | List<QuestionEmbedding> similaritySearch(float[] vector, int limit); |
| 17 | 17 | ||
| 18 | - Result<?> processZipUpload(MultipartFile file); | 18 | + Result<?> processZipUpload(MultipartFile file, String knowledgeId); |
| 19 | } | 19 | } |
| @@ -6,13 +6,9 @@ import org.apache.poi.hwpf.usermodel.Paragraph; | @@ -6,13 +6,9 @@ import org.apache.poi.hwpf.usermodel.Paragraph; | ||
| 6 | import org.apache.poi.hwpf.usermodel.Range; | 6 | import org.apache.poi.hwpf.usermodel.Range; |
| 7 | import dev.langchain4j.data.document.Document; | 7 | import dev.langchain4j.data.document.Document; |
| 8 | import dev.langchain4j.data.document.DocumentSplitter; | 8 | import dev.langchain4j.data.document.DocumentSplitter; |
| 9 | -import dev.langchain4j.data.document.loader.FileSystemDocumentLoader; | ||
| 10 | -import dev.langchain4j.data.document.parser.TextDocumentParser; | ||
| 11 | import dev.langchain4j.data.document.splitter.DocumentByParagraphSplitter; | 9 | import dev.langchain4j.data.document.splitter.DocumentByParagraphSplitter; |
| 12 | -import dev.langchain4j.data.document.splitter.DocumentSplitters; | ||
| 13 | import dev.langchain4j.data.embedding.Embedding; | 10 | import dev.langchain4j.data.embedding.Embedding; |
| 14 | import dev.langchain4j.data.segment.TextSegment; | 11 | import dev.langchain4j.data.segment.TextSegment; |
| 15 | -import dev.langchain4j.model.embedding.EmbeddingModel; | ||
| 16 | import dev.langchain4j.model.output.Response; | 12 | import dev.langchain4j.model.output.Response; |
| 17 | import org.apache.commons.io.FilenameUtils; | 13 | import org.apache.commons.io.FilenameUtils; |
| 18 | import org.apache.poi.xwpf.usermodel.IBodyElement; | 14 | import org.apache.poi.xwpf.usermodel.IBodyElement; |
| @@ -20,7 +16,6 @@ import org.apache.poi.xwpf.usermodel.XWPFDocument; | @@ -20,7 +16,6 @@ import org.apache.poi.xwpf.usermodel.XWPFDocument; | ||
| 20 | import org.apache.poi.xwpf.usermodel.XWPFParagraph; | 16 | import org.apache.poi.xwpf.usermodel.XWPFParagraph; |
| 21 | import org.apache.poi.xwpf.usermodel.XWPFTable; | 17 | import org.apache.poi.xwpf.usermodel.XWPFTable; |
| 22 | import org.jeecg.common.api.vo.Result; | 18 | import org.jeecg.common.api.vo.Result; |
| 23 | -import org.jeecg.common.util.CommonUtils; | ||
| 24 | import org.jeecg.modules.airag.app.entity.QuestionEmbedding; | 19 | import org.jeecg.modules.airag.app.entity.QuestionEmbedding; |
| 25 | import org.jeecg.modules.airag.app.mapper.QuestionEmbeddingMapper; | 20 | import org.jeecg.modules.airag.app.mapper.QuestionEmbeddingMapper; |
| 26 | import org.jeecg.modules.airag.app.service.IQuestionEmbeddingService; | 21 | import org.jeecg.modules.airag.app.service.IQuestionEmbeddingService; |
| @@ -36,6 +31,7 @@ import org.springframework.web.multipart.MultipartFile; | @@ -36,6 +31,7 @@ import org.springframework.web.multipart.MultipartFile; | ||
| 36 | import java.io.File; | 31 | import java.io.File; |
| 37 | import java.io.FileInputStream; | 32 | import java.io.FileInputStream; |
| 38 | import java.io.IOException; | 33 | import java.io.IOException; |
| 34 | +import java.nio.charset.Charset; | ||
| 39 | import java.nio.file.Files; | 35 | import java.nio.file.Files; |
| 40 | import java.nio.file.Path; | 36 | import java.nio.file.Path; |
| 41 | import java.nio.file.Paths; | 37 | import java.nio.file.Paths; |
| @@ -65,6 +61,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | @@ -65,6 +61,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | ||
| 65 | 61 | ||
| 66 | private static final Set<String> ALLOWED_EXTENSIONS = Set.of("txt", "doc", "docx"); | 62 | private static final Set<String> ALLOWED_EXTENSIONS = Set.of("txt", "doc", "docx"); |
| 67 | private static final Pattern SPECIAL_CHARS_PATTERN = Pattern.compile("[^a-zA-Z0-9\\u4e00-\\u9fa5\\s]"); | 63 | private static final Pattern SPECIAL_CHARS_PATTERN = Pattern.compile("[^a-zA-Z0-9\\u4e00-\\u9fa5\\s]"); |
| 64 | + private static final Pattern UUID_PATTERN = Pattern.compile("_[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"); | ||
| 68 | 65 | ||
| 69 | @Override | 66 | @Override |
| 70 | public List<QuestionEmbedding> findAll() { | 67 | public List<QuestionEmbedding> findAll() { |
| @@ -101,7 +98,8 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | @@ -101,7 +98,8 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | ||
| 101 | return questionEmbeddingMapper.similaritySearch(vector, limit); | 98 | return questionEmbeddingMapper.similaritySearch(vector, limit); |
| 102 | } | 99 | } |
| 103 | 100 | ||
| 104 | - public Result<?> processZipUpload(MultipartFile zipFile) { | 101 | + @Override |
| 102 | + public Result<?> processZipUpload(MultipartFile zipFile, String knowledgeId) { | ||
| 105 | try { | 103 | try { |
| 106 | Path tempDir = Files.createTempDirectory("zip_upload_"); | 104 | Path tempDir = Files.createTempDirectory("zip_upload_"); |
| 107 | List<Path> validFiles = extractAndFilterZip(zipFile, tempDir); | 105 | List<Path> validFiles = extractAndFilterZip(zipFile, tempDir); |
| @@ -111,7 +109,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | @@ -111,7 +109,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | ||
| 111 | } | 109 | } |
| 112 | 110 | ||
| 113 | for (Path filePath : validFiles) { | 111 | for (Path filePath : validFiles) { |
| 114 | - processSingleFile(filePath); | 112 | + processSingleFile(filePath, knowledgeId); |
| 115 | } | 113 | } |
| 116 | 114 | ||
| 117 | return Result.OK("文件上传和处理成功"); | 115 | return Result.OK("文件上传和处理成功"); |
| @@ -124,7 +122,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | @@ -124,7 +122,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | ||
| 124 | private List<Path> extractAndFilterZip(MultipartFile zipFile, Path tempDir) throws IOException { | 122 | private List<Path> extractAndFilterZip(MultipartFile zipFile, Path tempDir) throws IOException { |
| 125 | List<Path> validFiles = new ArrayList<>(); | 123 | List<Path> validFiles = new ArrayList<>(); |
| 126 | 124 | ||
| 127 | - try (ZipInputStream zipIn = new ZipInputStream(zipFile.getInputStream())) { | 125 | + try (ZipInputStream zipIn = new ZipInputStream(zipFile.getInputStream(), Charset.forName("GBK"))) { |
| 128 | ZipEntry entry; | 126 | ZipEntry entry; |
| 129 | while ((entry = zipIn.getNextEntry()) != null) { | 127 | while ((entry = zipIn.getNextEntry()) != null) { |
| 130 | if (!entry.isDirectory()) { | 128 | if (!entry.isDirectory()) { |
| @@ -144,11 +142,13 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | @@ -144,11 +142,13 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | ||
| 144 | return validFiles; | 142 | return validFiles; |
| 145 | } | 143 | } |
| 146 | 144 | ||
| 147 | - private void processSingleFile(Path filePath) throws Exception { | 145 | + private void processSingleFile(Path filePath, String knowledgeId) throws Exception { |
| 148 | String originalFileName = filePath.getFileName().toString(); | 146 | String originalFileName = filePath.getFileName().toString(); |
| 149 | String fileExt = FilenameUtils.getExtension(originalFileName); | 147 | String fileExt = FilenameUtils.getExtension(originalFileName); |
| 150 | - String newFileName = FilenameUtils.removeExtension(originalFileName) + "_" + UUID.randomUUID() + "." + fileExt; | ||
| 151 | - Path targetPath = Paths.get(uploadPath, newFileName); | 148 | + |
| 149 | + // 生成带UUID的文件名用于存储 | ||
| 150 | + String storedFileName = generateStoredFileName(originalFileName); | ||
| 151 | + Path targetPath = Paths.get(uploadPath, storedFileName); | ||
| 152 | Files.move(filePath, targetPath, StandardCopyOption.REPLACE_EXISTING); | 152 | Files.move(filePath, targetPath, StandardCopyOption.REPLACE_EXISTING); |
| 153 | 153 | ||
| 154 | List<String> segments; | 154 | List<String> segments; |
| @@ -160,7 +160,13 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | @@ -160,7 +160,13 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | ||
| 160 | segments = splitWordDocument(targetPath.toString()); | 160 | segments = splitWordDocument(targetPath.toString()); |
| 161 | } | 161 | } |
| 162 | 162 | ||
| 163 | - saveSegmentsToDatabase(segments, originalFileName, newFileName); | 163 | + saveSegmentsToDatabase(segments, originalFileName, storedFileName, knowledgeId); |
| 164 | + } | ||
| 165 | + | ||
| 166 | + private String generateStoredFileName(String originalFileName) { | ||
| 167 | + String baseName = FilenameUtils.removeExtension(originalFileName); | ||
| 168 | + String ext = FilenameUtils.getExtension(originalFileName); | ||
| 169 | + return baseName + "_" + UUID.randomUUID() + "." + ext; | ||
| 164 | } | 170 | } |
| 165 | 171 | ||
| 166 | private String readFileContent(Path filePath) throws IOException { | 172 | private String readFileContent(Path filePath) throws IOException { |
| @@ -168,10 +174,44 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | @@ -168,10 +174,44 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | ||
| 168 | } | 174 | } |
| 169 | 175 | ||
| 170 | private String cleanText(String text) { | 176 | private String cleanText(String text) { |
| 171 | - text = SPECIAL_CHARS_PATTERN.matcher(text).replaceAll(""); | 177 | + // 保留基本的标点符号,包括 . : - 等 |
| 178 | + Pattern preservedCharsPattern = Pattern.compile("[^a-zA-Z0-9\\u4e00-\\u9fa5\\s.,:、,:;。;-]"); | ||
| 179 | + text = preservedCharsPattern.matcher(text).replaceAll(""); | ||
| 180 | + | ||
| 181 | + // 将多个换行符缩减为一个换行符 | ||
| 182 | + text = text.replaceAll("(\r?\n){2,}", "\n"); | ||
| 183 | + | ||
| 184 | + // 处理其他空白字符 | ||
| 172 | return text.replaceAll("\\s+", " ").trim(); | 185 | return text.replaceAll("\\s+", " ").trim(); |
| 173 | } | 186 | } |
| 174 | 187 | ||
| 188 | + // 修改isHeading方法中的判断条件,不再排除包含.的文本 | ||
| 189 | + private static boolean isHeading(Paragraph para, Range range) { | ||
| 190 | + int styleIndex = para.getStyleIndex(); | ||
| 191 | + if (styleIndex >= 1 && styleIndex <= 9) { | ||
| 192 | + return true; | ||
| 193 | + } | ||
| 194 | + | ||
| 195 | + try { | ||
| 196 | + CharacterRun run = para.getCharacterRun(0); | ||
| 197 | + if (run.isBold() || run.getFontSize() > 12) { | ||
| 198 | + return true; | ||
| 199 | + } | ||
| 200 | + } catch (Exception e) { | ||
| 201 | + log.warn("获取字符格式失败", e); | ||
| 202 | + } | ||
| 203 | + | ||
| 204 | + String text = para.text().trim(); | ||
| 205 | + return text.toUpperCase().equals(text) && | ||
| 206 | + text.length() < 100 && | ||
| 207 | + !text.contains("\t"); // 移除了 !text.contains(".") 的判断 | ||
| 208 | + } | ||
| 209 | + | ||
| 210 | + private String removeUuidSuffix(String fileName) { | ||
| 211 | + // 移除UUID后缀部分 | ||
| 212 | + return UUID_PATTERN.matcher(fileName).replaceFirst(""); | ||
| 213 | + } | ||
| 214 | + | ||
| 175 | private List<String> splitTxtDocument(String content) { | 215 | private List<String> splitTxtDocument(String content) { |
| 176 | DocumentSplitter splitter = new DocumentByParagraphSplitter(1000, 200); | 216 | DocumentSplitter splitter = new DocumentByParagraphSplitter(1000, 200); |
| 177 | Document document = Document.from(content); | 217 | Document document = Document.from(content); |
| @@ -185,8 +225,10 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | @@ -185,8 +225,10 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | ||
| 185 | List<String> result = new ArrayList<>(); | 225 | List<String> result = new ArrayList<>(); |
| 186 | String ext = FilenameUtils.getExtension(filePath).toLowerCase(); | 226 | String ext = FilenameUtils.getExtension(filePath).toLowerCase(); |
| 187 | StringBuilder fullContent = new StringBuilder(); | 227 | StringBuilder fullContent = new StringBuilder(); |
| 188 | - String fileName = new File(filePath).getName(); | ||
| 189 | - fileName = fileName.substring(0, fileName.lastIndexOf('.')); // 去掉后缀 | 228 | + |
| 229 | + // 获取无UUID的文件名用于显示 | ||
| 230 | + String displayFileName = removeUuidSuffix(new File(filePath).getName()); | ||
| 231 | + displayFileName = FilenameUtils.removeExtension(displayFileName); | ||
| 190 | 232 | ||
| 191 | if (ext.equals("docx")) { | 233 | if (ext.equals("docx")) { |
| 192 | try (XWPFDocument doc = new XWPFDocument(new FileInputStream(filePath))) { | 234 | try (XWPFDocument doc = new XWPFDocument(new FileInputStream(filePath))) { |
| @@ -207,7 +249,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | @@ -207,7 +249,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | ||
| 207 | 249 | ||
| 208 | String style = para.getStyle(); | 250 | String style = para.getStyle(); |
| 209 | if (style != null && style.matches("Heading\\d")) { | 251 | if (style != null && style.matches("Heading\\d")) { |
| 210 | - if (currentSection.length() > 0) { | 252 | + if (!currentSection.isEmpty()) { |
| 211 | result.add(currentSection.toString().trim()); | 253 | result.add(currentSection.toString().trim()); |
| 212 | } | 254 | } |
| 213 | currentSection = new StringBuilder(text).append("\n"); | 255 | currentSection = new StringBuilder(text).append("\n"); |
| @@ -219,7 +261,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | @@ -219,7 +261,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | ||
| 219 | fullContent.append(tableContent).append("\n"); | 261 | fullContent.append(tableContent).append("\n"); |
| 220 | 262 | ||
| 221 | if (!isTableSection) { | 263 | if (!isTableSection) { |
| 222 | - if (currentSection.length() > 0) { | 264 | + if (!currentSection.isEmpty()) { |
| 223 | result.add(currentSection.toString().trim()); | 265 | result.add(currentSection.toString().trim()); |
| 224 | } | 266 | } |
| 225 | currentSection = new StringBuilder(); | 267 | currentSection = new StringBuilder(); |
| @@ -229,7 +271,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | @@ -229,7 +271,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | ||
| 229 | } | 271 | } |
| 230 | } | 272 | } |
| 231 | 273 | ||
| 232 | - if (currentSection.length() > 0) { | 274 | + if (!currentSection.isEmpty()) { |
| 233 | result.add(currentSection.toString().trim()); | 275 | result.add(currentSection.toString().trim()); |
| 234 | } | 276 | } |
| 235 | } | 277 | } |
| @@ -246,7 +288,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | @@ -246,7 +288,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | ||
| 246 | 288 | ||
| 247 | if (para.isInTable()) { | 289 | if (para.isInTable()) { |
| 248 | if (!isTableSection) { | 290 | if (!isTableSection) { |
| 249 | - if (currentSection.length() > 0) { | 291 | + if (!currentSection.isEmpty()) { |
| 250 | result.add(currentSection.toString().trim()); | 292 | result.add(currentSection.toString().trim()); |
| 251 | } | 293 | } |
| 252 | currentSection = new StringBuilder(); | 294 | currentSection = new StringBuilder(); |
| @@ -261,7 +303,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | @@ -261,7 +303,7 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | ||
| 261 | } | 303 | } |
| 262 | 304 | ||
| 263 | if (isHeading(para, range)) { | 305 | if (isHeading(para, range)) { |
| 264 | - if (currentSection.length() > 0) { | 306 | + if (!currentSection.isEmpty()) { |
| 265 | result.add(currentSection.toString().trim()); | 307 | result.add(currentSection.toString().trim()); |
| 266 | } | 308 | } |
| 267 | currentSection = new StringBuilder(text).append("\n"); | 309 | currentSection = new StringBuilder(text).append("\n"); |
| @@ -271,59 +313,49 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | @@ -271,59 +313,49 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | ||
| 271 | } | 313 | } |
| 272 | } | 314 | } |
| 273 | 315 | ||
| 274 | - if (currentSection.length() > 0) { | 316 | + if (!currentSection.isEmpty()) { |
| 275 | result.add(currentSection.toString().trim()); | 317 | result.add(currentSection.toString().trim()); |
| 276 | } | 318 | } |
| 277 | } | 319 | } |
| 278 | } | 320 | } |
| 279 | 321 | ||
| 280 | if (fullContent.length() < 1000) { | 322 | if (fullContent.length() < 1000) { |
| 281 | - return Collections.singletonList(fileName + "\n" + fullContent.toString().trim()); | 323 | + return Collections.singletonList(displayFileName + "\n" + fullContent.toString().trim()); |
| 282 | } | 324 | } |
| 283 | 325 | ||
| 284 | return result; | 326 | return result; |
| 285 | } | 327 | } |
| 286 | 328 | ||
| 287 | private String extractTableContent(XWPFTable table) { | 329 | private String extractTableContent(XWPFTable table) { |
| 288 | - StringBuilder tableContent = new StringBuilder(); | 330 | + StringBuilder tableContent = new StringBuilder("\n"); // 表格前加换行 |
| 289 | table.getRows().forEach(row -> { | 331 | table.getRows().forEach(row -> { |
| 290 | row.getTableCells().forEach(cell -> { | 332 | row.getTableCells().forEach(cell -> { |
| 291 | - tableContent.append("| ").append(cleanText(cell.getText())).append(" "); | 333 | + // 处理单元格内容中的多个换行 |
| 334 | + String cellText = cleanText(cell.getText()).replaceAll("(\r?\n){2,}", "\n"); | ||
| 335 | + tableContent.append("| ").append(cellText).append(" "); | ||
| 292 | }); | 336 | }); |
| 293 | tableContent.append("|\n"); | 337 | tableContent.append("|\n"); |
| 294 | }); | 338 | }); |
| 295 | return tableContent.toString(); | 339 | return tableContent.toString(); |
| 296 | } | 340 | } |
| 297 | 341 | ||
| 298 | - private static boolean isHeading(Paragraph para, Range range) { | ||
| 299 | - int styleIndex = para.getStyleIndex(); | ||
| 300 | - if (styleIndex >= 1 && styleIndex <= 9) { | ||
| 301 | - return true; | ||
| 302 | - } | ||
| 303 | - | ||
| 304 | - try { | ||
| 305 | - CharacterRun run = para.getCharacterRun(0); | ||
| 306 | - if (run.isBold() || run.getFontSize() > 12) { | ||
| 307 | - return true; | ||
| 308 | - } | ||
| 309 | - } catch (Exception e) { | ||
| 310 | - log.warn("获取字符格式失败", e); | ||
| 311 | - } | ||
| 312 | - | ||
| 313 | - String text = para.text().trim(); | ||
| 314 | - return text.toUpperCase().equals(text) && | ||
| 315 | - text.length() < 100 && | ||
| 316 | - !text.contains(".") && | ||
| 317 | - !text.contains("\t"); | ||
| 318 | - } | ||
| 319 | - | ||
| 320 | - private void saveSegmentsToDatabase(List<String> segments, String originalFileName, String storedFileName) { | 342 | + private void saveSegmentsToDatabase(List<String> segments, String originalFileName, String storedFileName, String knowledgeId) { |
| 321 | if (segments.isEmpty()) { | 343 | if (segments.isEmpty()) { |
| 322 | return; | 344 | return; |
| 323 | } | 345 | } |
| 324 | 346 | ||
| 325 | - String fileNameWithoutExt = originalFileName.substring(0, originalFileName.lastIndexOf('.')); | ||
| 326 | - String question = segments.size() == 1 ? fileNameWithoutExt : null; | 347 | + // 从存储文件名中提取UUID部分 |
| 348 | + String uuid = storedFileName.substring( | ||
| 349 | + storedFileName.lastIndexOf('_') + 1, | ||
| 350 | + storedFileName.lastIndexOf('.') | ||
| 351 | + ); | ||
| 352 | + | ||
| 353 | + // 获取无UUID和扩展名的文件名用于显示 | ||
| 354 | + String displayFileName = removeUuidSuffix(originalFileName); | ||
| 355 | + displayFileName = FilenameUtils.removeExtension(displayFileName); | ||
| 356 | + | ||
| 357 | + // 判断是否是单一段落 | ||
| 358 | + boolean isSingleSegment = segments.size() == 1; | ||
| 327 | 359 | ||
| 328 | for (String segment : segments) { | 360 | for (String segment : segments) { |
| 329 | if (segment.trim().isEmpty()) { | 361 | if (segment.trim().isEmpty()) { |
| @@ -333,20 +365,26 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | @@ -333,20 +365,26 @@ public class QuestionEmbeddingServiceImpl implements IQuestionEmbeddingService { | ||
| 333 | QuestionEmbedding record = new QuestionEmbedding(); | 365 | QuestionEmbedding record = new QuestionEmbedding(); |
| 334 | record.setId(UUID.randomUUID().toString()); | 366 | record.setId(UUID.randomUUID().toString()); |
| 335 | 367 | ||
| 336 | - if (question != null) { | ||
| 337 | - record.setQuestion(question); | 368 | + if (isSingleSegment) { |
| 369 | + record.setQuestion(displayFileName); | ||
| 370 | + record.setAnswer(segment.trim()); | ||
| 338 | } else { | 371 | } else { |
| 339 | - String firstLine = segment.lines().findFirst().orElse("未命名问题"); | ||
| 340 | - record.setQuestion(cleanText(firstLine)); | 372 | + String firstLine = segment.lines().findFirst().orElse(""); |
| 373 | + record.setQuestion(displayFileName + ": " + cleanText(firstLine)); | ||
| 374 | + record.setAnswer(segment.trim()); | ||
| 341 | } | 375 | } |
| 342 | 376 | ||
| 343 | - record.setAnswer(segment.trim()); | ||
| 344 | record.setText(""); | 377 | record.setText(""); |
| 345 | - record.setMetadata("{\"fileName\":\"" + storedFileName + "\"}"); | 378 | + record.setMetadata(String.format( |
| 379 | + "{\"docId\":\"%s\",\"docName\":\"%s\",\"knowledgeId\":\"%s\"}", | ||
| 380 | + uuid, | ||
| 381 | + originalFileName, | ||
| 382 | + knowledgeId | ||
| 383 | + )); | ||
| 346 | 384 | ||
| 347 | Response<Embedding> embeddingResponse = aiModelUtils.getEmbedding("1925730210204721154", record.getQuestion()); | 385 | Response<Embedding> embeddingResponse = aiModelUtils.getEmbedding("1925730210204721154", record.getQuestion()); |
| 348 | record.setEmbedding(embeddingResponse.content().vector()); | 386 | record.setEmbedding(embeddingResponse.content().vector()); |
| 349 | - | 387 | + record.setKnowledgeId(knowledgeId); |
| 350 | questionEmbeddingMapper.insert(record); | 388 | questionEmbeddingMapper.insert(record); |
| 351 | } | 389 | } |
| 352 | } | 390 | } |
| 1 | +package org.jeecg.modules.airag.app.utils; | ||
| 2 | + | ||
| 3 | +import lombok.extern.slf4j.Slf4j; | ||
| 4 | +import me.zhyd.oauth.log.Log; | ||
| 5 | + | ||
| 6 | +import java.io.IOException; | ||
| 7 | +import java.nio.file.Files; | ||
| 8 | +import java.nio.file.Path; | ||
| 9 | +import java.nio.file.Paths; | ||
| 10 | +import java.util.Base64; | ||
| 11 | +@Slf4j | ||
| 12 | +public class FileToBase64Util { | ||
| 13 | + | ||
| 14 | + /** | ||
| 15 | + * 将文件转换为Base64字符串 | ||
| 16 | + * @param filePath 文件路径 | ||
| 17 | + * @return Base64编码的字符串 | ||
| 18 | + * @throws IOException 如果文件读取失败 | ||
| 19 | + */ | ||
| 20 | + public static String fileToBase64(String filePath) throws IOException { | ||
| 21 | + // 1. 参数校验 | ||
| 22 | + if (filePath == null || filePath.trim().isEmpty()) { | ||
| 23 | + log.error("文件路径为空"); | ||
| 24 | + return null; | ||
| 25 | + } | ||
| 26 | + | ||
| 27 | + // 2. 检查文件是否存在 | ||
| 28 | + Path path = Paths.get(filePath); | ||
| 29 | + if (!Files.exists(path)) { | ||
| 30 | + String errorMsg = String.format("文件不存在: %s", filePath); | ||
| 31 | + log.error(errorMsg); | ||
| 32 | + return null; | ||
| 33 | + } | ||
| 34 | + // 读取文件字节 | ||
| 35 | + byte[] fileBytes = Files.readAllBytes(path); | ||
| 36 | + // 使用Base64编码器进行编码 | ||
| 37 | + return Base64.getEncoder().encodeToString(fileBytes); | ||
| 38 | + } | ||
| 39 | + | ||
| 40 | + /** | ||
| 41 | + * 测试方法 | ||
| 42 | + */ | ||
| 43 | + public static void main(String[] args) { | ||
| 44 | + try { | ||
| 45 | + // 示例:转换一个文件为Base64 | ||
| 46 | + String base64String = fileToBase64("D:\\upload\\新生落户手续办理_f75c658b-6a8c-4b19-94ff-28d3c469be9a.docx"); | ||
| 47 | + System.out.println("Base64编码结果:"); | ||
| 48 | + System.out.println(base64String); | ||
| 49 | + } catch (IOException e) { | ||
| 50 | + System.err.println("文件转换失败: " + e.getMessage()); | ||
| 51 | + } | ||
| 52 | + } | ||
| 53 | +} |
| 1 | -package org.jeecg.modules.airag.app.utils; | ||
| 2 | - | ||
| 3 | -import java.sql.*; | ||
| 4 | -import java.util.ArrayList; | ||
| 5 | -import java.util.HashMap; | ||
| 6 | -import java.util.List; | ||
| 7 | -import java.util.Map; | ||
| 8 | - | ||
| 9 | -public class PureJdbcVectorQuery { | ||
| 10 | - | ||
| 11 | - // PostgreSQL连接参数 | ||
| 12 | - private static final String URL = "jdbc:postgresql://192.168.100.103:5432/postgres"; | ||
| 13 | - private static final String USER = "postgres"; | ||
| 14 | - private static final String PASSWORD = "postgres"; | ||
| 15 | - | ||
| 16 | - public static List<Map<String, Object>> queryVectors() { | ||
| 17 | - List<Map<String, Object>> results = new ArrayList<>(); | ||
| 18 | - | ||
| 19 | - try (Connection conn = DriverManager.getConnection(URL, USER, PASSWORD); | ||
| 20 | - PreparedStatement stmt = conn.prepareStatement("SELECT * FROM embeddings"); | ||
| 21 | - ResultSet rs = stmt.executeQuery()) { | ||
| 22 | - | ||
| 23 | - ResultSetMetaData metaData = rs.getMetaData(); | ||
| 24 | - int columnCount = metaData.getColumnCount(); | ||
| 25 | - | ||
| 26 | - while (rs.next()) { | ||
| 27 | - Map<String, Object> row = new HashMap<>(); | ||
| 28 | - for (int i = 1; i <= columnCount; i++) { | ||
| 29 | - String columnName = metaData.getColumnName(i); | ||
| 30 | - Object value = "embedding".equals(columnName) | ||
| 31 | - ? rs.getBytes(i) // 向量字段特殊处理 | ||
| 32 | - : rs.getObject(i); | ||
| 33 | - row.put(columnName, value); | ||
| 34 | - } | ||
| 35 | - results.add(row); | ||
| 36 | - } | ||
| 37 | - } catch (SQLException e) { | ||
| 38 | - e.printStackTrace(); | ||
| 39 | - } | ||
| 40 | - return results; | ||
| 41 | - } | ||
| 42 | - | ||
| 43 | - | ||
| 44 | - public static void main(String[] args) { | ||
| 45 | - List<Map<String, Object>> maps = queryVectors(); | ||
| 46 | - for (Map<String, Object> map : maps) { | ||
| 47 | - System.out.println(map); | ||
| 48 | - } | ||
| 49 | - } | ||
| 50 | -} | ||
| 1 | +//package org.jeecg.modules.airag.app.utils; | ||
| 2 | +// | ||
| 3 | +//import java.sql.*; | ||
| 4 | +//import java.util.ArrayList; | ||
| 5 | +//import java.util.HashMap; | ||
| 6 | +//import java.util.List; | ||
| 7 | +//import java.util.Map; | ||
| 8 | +// | ||
| 9 | +//public class PureJdbcVectorQuery { | ||
| 10 | +// | ||
| 11 | +// // PostgreSQL连接参数 | ||
| 12 | +// private static final String URL = "jdbc:postgresql://192.168.100.103:5432/postgres"; | ||
| 13 | +// private static final String USER = "postgres"; | ||
| 14 | +// private static final String PASSWORD = "postgres"; | ||
| 15 | +// | ||
| 16 | +// public static List<Map<String, Object>> queryVectors() { | ||
| 17 | +// List<Map<String, Object>> results = new ArrayList<>(); | ||
| 18 | +// | ||
| 19 | +// try (Connection conn = DriverManager.getConnection(URL, USER, PASSWORD); | ||
| 20 | +// PreparedStatement stmt = conn.prepareStatement("SELECT * FROM embeddings"); | ||
| 21 | +// ResultSet rs = stmt.executeQuery()) { | ||
| 22 | +// | ||
| 23 | +// ResultSetMetaData metaData = rs.getMetaData(); | ||
| 24 | +// int columnCount = metaData.getColumnCount(); | ||
| 25 | +// | ||
| 26 | +// while (rs.next()) { | ||
| 27 | +// Map<String, Object> row = new HashMap<>(); | ||
| 28 | +// for (int i = 1; i <= columnCount; i++) { | ||
| 29 | +// String columnName = metaData.getColumnName(i); | ||
| 30 | +// Object value = "embedding".equals(columnName) | ||
| 31 | +// ? rs.getBytes(i) // 向量字段特殊处理 | ||
| 32 | +// : rs.getObject(i); | ||
| 33 | +// row.put(columnName, value); | ||
| 34 | +// } | ||
| 35 | +// results.add(row); | ||
| 36 | +// } | ||
| 37 | +// } catch (SQLException e) { | ||
| 38 | +// e.printStackTrace(); | ||
| 39 | +// } | ||
| 40 | +// return results; | ||
| 41 | +// } | ||
| 42 | +// | ||
| 43 | +// | ||
| 44 | +// public static void main(String[] args) { | ||
| 45 | +// List<Map<String, Object>> maps = queryVectors(); | ||
| 46 | +// for (Map<String, Object> map : maps) { | ||
| 47 | +// System.out.println(map); | ||
| 48 | +// } | ||
| 49 | +// } | ||
| 50 | +//} |
| 1 | package org.jeecg.modules.airag.llm.handler; | 1 | package org.jeecg.modules.airag.llm.handler; |
| 2 | 2 | ||
| 3 | import com.alibaba.fastjson.JSONObject; | 3 | import com.alibaba.fastjson.JSONObject; |
| 4 | +import com.fasterxml.jackson.core.JsonProcessingException; | ||
| 5 | +import com.fasterxml.jackson.databind.ObjectMapper; | ||
| 4 | import com.google.common.collect.Lists; | 6 | import com.google.common.collect.Lists; |
| 5 | import dev.langchain4j.data.document.Document; | 7 | import dev.langchain4j.data.document.Document; |
| 6 | import dev.langchain4j.data.document.DocumentSplitter; | 8 | import dev.langchain4j.data.document.DocumentSplitter; |
| @@ -251,6 +253,22 @@ public class EmbeddingHandler implements IEmbeddingHandler { | @@ -251,6 +253,22 @@ public class EmbeddingHandler implements IEmbeddingHandler { | ||
| 251 | data.put("content", matchRes.embedded().text()); | 253 | data.put("content", matchRes.embedded().text()); |
| 252 | Metadata metadata = matchRes.embedded().metadata(); | 254 | Metadata metadata = matchRes.embedded().metadata(); |
| 253 | data.put("chunk", metadata.getInteger("index")); | 255 | data.put("chunk", metadata.getInteger("index")); |
| 256 | + | ||
| 257 | + | ||
| 258 | + | ||
| 259 | + Map<String, Object> metadataMap = new HashMap<>(); | ||
| 260 | + metadataMap.put("docId", metadata.getString("docId")); // 假设metadata中有docId字段 | ||
| 261 | + metadataMap.put("docName", metadata.getString(EMBED_STORE_METADATA_DOCNAME)); | ||
| 262 | + metadataMap.put("index", metadata.getInteger("index")); | ||
| 263 | + ObjectMapper objectMapper = new ObjectMapper(); | ||
| 264 | + | ||
| 265 | + try { | ||
| 266 | + data.put("metadata", objectMapper.writeValueAsString(metadataMap)); | ||
| 267 | + } catch (JsonProcessingException e) { | ||
| 268 | + throw new RuntimeException(e); | ||
| 269 | + } | ||
| 270 | + | ||
| 271 | + | ||
| 254 | data.put(EMBED_STORE_METADATA_DOCNAME, metadata.getString(EMBED_STORE_METADATA_DOCNAME)); | 272 | data.put(EMBED_STORE_METADATA_DOCNAME, metadata.getString(EMBED_STORE_METADATA_DOCNAME)); |
| 255 | return data; | 273 | return data; |
| 256 | }).collect(Collectors.toList()); | 274 | }).collect(Collectors.toList()); |
| 1 | package org.jeecg.modules.airag.zdyrag.controller; | 1 | package org.jeecg.modules.airag.zdyrag.controller; |
| 2 | 2 | ||
| 3 | +import cn.hutool.core.collection.CollectionUtil; | ||
| 4 | +import com.fasterxml.jackson.databind.ObjectMapper; | ||
| 3 | import dev.langchain4j.data.message.ChatMessage; | 5 | import dev.langchain4j.data.message.ChatMessage; |
| 4 | import dev.langchain4j.data.message.UserMessage; | 6 | import dev.langchain4j.data.message.UserMessage; |
| 5 | import dev.langchain4j.service.TokenStream; | 7 | import dev.langchain4j.service.TokenStream; |
| 6 | import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore; | 8 | import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore; |
| 7 | import io.swagger.v3.oas.annotations.Operation; | 9 | import io.swagger.v3.oas.annotations.Operation; |
| 10 | +import lombok.extern.slf4j.Slf4j; | ||
| 11 | +import org.apache.commons.lang3.StringUtils; | ||
| 8 | import org.jeecg.ai.handler.AIParams; | 12 | import org.jeecg.ai.handler.AIParams; |
| 9 | import org.jeecg.ai.handler.LLMHandler; | 13 | import org.jeecg.ai.handler.LLMHandler; |
| 10 | import org.jeecg.common.api.vo.Result; | 14 | import org.jeecg.common.api.vo.Result; |
| 11 | import org.jeecg.modules.airag.app.entity.QuestionEmbedding; | 15 | import org.jeecg.modules.airag.app.entity.QuestionEmbedding; |
| 12 | import org.jeecg.modules.airag.app.service.IQuestionEmbeddingService; | 16 | import org.jeecg.modules.airag.app.service.IQuestionEmbeddingService; |
| 17 | +import org.jeecg.modules.airag.app.utils.FileToBase64Util; | ||
| 13 | import org.jeecg.modules.airag.common.handler.IAIChatHandler; | 18 | import org.jeecg.modules.airag.common.handler.IAIChatHandler; |
| 14 | import org.jeecg.modules.airag.llm.handler.EmbeddingHandler; | 19 | import org.jeecg.modules.airag.llm.handler.EmbeddingHandler; |
| 20 | +import org.jeecg.modules.airag.llm.service.IAiragKnowledgeService; | ||
| 15 | import org.springframework.beans.factory.annotation.Autowired; | 21 | import org.springframework.beans.factory.annotation.Autowired; |
| 22 | +import org.springframework.beans.factory.annotation.Value; | ||
| 16 | import org.springframework.stereotype.Component; | 23 | import org.springframework.stereotype.Component; |
| 17 | import org.springframework.web.bind.annotation.GetMapping; | 24 | import org.springframework.web.bind.annotation.GetMapping; |
| 18 | import org.springframework.web.bind.annotation.RequestMapping; | 25 | import org.springframework.web.bind.annotation.RequestMapping; |
| @@ -25,35 +32,50 @@ import java.util.Map; | @@ -25,35 +32,50 @@ import java.util.Map; | ||
| 25 | 32 | ||
| 26 | @RestController | 33 | @RestController |
| 27 | @RequestMapping("/airag/zdyRag") | 34 | @RequestMapping("/airag/zdyRag") |
| 35 | +@Slf4j | ||
| 28 | public class ZdyRagController { | 36 | public class ZdyRagController { |
| 29 | @Autowired | 37 | @Autowired |
| 30 | private EmbeddingHandler embeddingHandler; | 38 | private EmbeddingHandler embeddingHandler; |
| 31 | @Autowired | 39 | @Autowired |
| 32 | IAIChatHandler aiChatHandler; | 40 | IAIChatHandler aiChatHandler; |
| 33 | @Autowired | 41 | @Autowired |
| 34 | - LLMHandler llmHandler; | ||
| 35 | - @Autowired | ||
| 36 | private IQuestionEmbeddingService questionEmbeddingService; | 42 | private IQuestionEmbeddingService questionEmbeddingService; |
| 43 | + @Value("${jeecg.upload.path}") | ||
| 44 | + private String uploadPath; | ||
| 37 | 45 | ||
| 38 | 46 | ||
| 39 | @Operation(summary = "send") | 47 | @Operation(summary = "send") |
| 40 | @GetMapping("send") | 48 | @GetMapping("send") |
| 41 | - public Result<Map<String, Object>> send(String questionText) { | 49 | + public Result<Map<String, Object>> send(String questionText) throws Exception { |
| 42 | String knowId = "1926872137990148098"; | 50 | String knowId = "1926872137990148098"; |
| 43 | -// String text = "身份证丢失办理流程"; | ||
| 44 | - Integer topNumber = 3; | 51 | + Integer topNumber = 1; |
| 45 | Double similarity = 0.8; | 52 | Double similarity = 0.8; |
| 46 | HashMap<String, Object> resMap = new HashMap<>(); | 53 | HashMap<String, Object> resMap = new HashMap<>(); |
| 47 | //根据问题相似度进行查询 | 54 | //根据问题相似度进行查询 |
| 48 | List<QuestionEmbedding> questionEmbeddings = questionEmbeddingService.similaritySearchByQuestion(questionText, 1,0.8); | 55 | List<QuestionEmbedding> questionEmbeddings = questionEmbeddingService.similaritySearchByQuestion(questionText, 1,0.8); |
| 49 | for (QuestionEmbedding questionEmbedding : questionEmbeddings) { | 56 | for (QuestionEmbedding questionEmbedding : questionEmbeddings) { |
| 50 | - resMap.put("question", questionEmbedding.getQuestion()); | 57 | + resMap.put("question", questionText); |
| 51 | resMap.put("answer", questionEmbedding.getAnswer()); | 58 | resMap.put("answer", questionEmbedding.getAnswer()); |
| 52 | - resMap.put("similarity", similarity); | ||
| 53 | - System.out.println("questionEmbedding.getQuestion() = " + questionEmbedding.getQuestion()); | ||
| 54 | - System.out.println("questionEmbedding.getAnswer() = " + questionEmbedding.getAnswer()); | ||
| 55 | - System.out.println("questionEmbedding.getSimilarity() = " + questionEmbedding.getSimilarity()); | ||
| 56 | - System.out.println("-------------------------------------------------------------"); | 59 | + resMap.put("similarity", questionEmbedding.getSimilarity()); |
| 60 | + | ||
| 61 | + ObjectMapper objectMapper = new ObjectMapper(); | ||
| 62 | + Map<String, String> metadata = objectMapper.readValue(questionEmbedding.getMetadata(), Map.class); | ||
| 63 | + // 获取docName和docId | ||
| 64 | + if (metadata != null) { | ||
| 65 | + String docName = metadata.get("docName"); | ||
| 66 | + resMap.put("fileName", docName); | ||
| 67 | + String fileName = generateFilePath(questionEmbedding.getMetadata()); | ||
| 68 | + | ||
| 69 | + if (StringUtils.isNotBlank(fileName)) { | ||
| 70 | + resMap.put("fileBase64", FileToBase64Util.fileToBase64(uploadPath + fileName)); | ||
| 71 | + } | ||
| 72 | + } | ||
| 73 | + | ||
| 74 | + log.info("questionEmbedding.getMetadata() = " + questionEmbedding.getMetadata()); | ||
| 75 | + log.info("questionEmbedding.getQuestion() = " + questionEmbedding.getQuestion()); | ||
| 76 | + log.info("questionEmbedding.getAnswer() = " + questionEmbedding.getAnswer()); | ||
| 77 | + log.info("questionEmbedding.getSimilarity() = " + questionEmbedding.getSimilarity()); | ||
| 78 | + log.info("-------------------------------------------------------------"); | ||
| 57 | } | 79 | } |
| 58 | //返回问题库命中的问题 | 80 | //返回问题库命中的问题 |
| 59 | if (!questionEmbeddings.isEmpty()) { | 81 | if (!questionEmbeddings.isEmpty()) { |
| @@ -61,43 +83,78 @@ public class ZdyRagController { | @@ -61,43 +83,78 @@ public class ZdyRagController { | ||
| 61 | } | 83 | } |
| 62 | 84 | ||
| 63 | List<Map<String, Object>> maps = embeddingHandler.searchEmbedding(knowId, questionText, topNumber, similarity); | 85 | List<Map<String, Object>> maps = embeddingHandler.searchEmbedding(knowId, questionText, topNumber, similarity); |
| 86 | + if (CollectionUtil.isEmpty(maps)) { | ||
| 87 | + resMap.put("answer", "该问题未记录在知识库中"); | ||
| 88 | + //未记录在知识库中的问题插入未记录问题表 | ||
| 89 | + | ||
| 90 | + | ||
| 91 | + return Result.OK(resMap); | ||
| 92 | + } | ||
| 93 | + | ||
| 64 | StringBuilder content = new StringBuilder(); | 94 | StringBuilder content = new StringBuilder(); |
| 65 | for (Map<String, Object> map : maps) { | 95 | for (Map<String, Object> map : maps) { |
| 66 | if (Double.parseDouble(map.get("score").toString()) > similarity){ | 96 | if (Double.parseDouble(map.get("score").toString()) > similarity){ |
| 67 | - System.out.println("score = " + map.get("score").toString()); | ||
| 68 | - System.out.println("content = " + map.get("content").toString()); | 97 | + log.info("score = " + map.get("score").toString()); |
| 98 | + log.info("content = " + map.get("content").toString()); | ||
| 69 | content.append(map.get("content").toString()).append("\n"); | 99 | content.append(map.get("content").toString()).append("\n"); |
| 70 | } | 100 | } |
| 71 | } | 101 | } |
| 72 | 102 | ||
| 103 | + | ||
| 73 | List<ChatMessage> messages = new ArrayList<>(); | 104 | List<ChatMessage> messages = new ArrayList<>(); |
| 74 | - String questin = "请整理出与用户所提出的问题相关的信息,舍弃掉与问题无关的内容,进行整理,回答用户的问题" + | 105 | + String questin = "请整理出与用户所提出的问题相关的信息,参考内容中含有与问题无关信息,舍弃掉与问题无关的内容,进行整理,回答用户的问题" + |
| 75 | "问题如下:" + questionText + | 106 | "问题如下:" + questionText + |
| 76 | - "文本信息如下:" + content | ||
| 77 | - ; | 107 | + "参考信息如下:" + content ; |
| 78 | 108 | ||
| 79 | 109 | ||
| 80 | messages.add(new UserMessage("user", questin)); | 110 | messages.add(new UserMessage("user", questin)); |
| 81 | -// AIParams aiParams = new AIParams(); | ||
| 82 | -// aiParams.setBaseUrl("http://localhost:11434"); | ||
| 83 | -// aiParams.setModelName("EntropyYue/chatglm3"); | ||
| 84 | -// aiParams.setProvider("OLLAMA"); | ||
| 85 | String chat = aiChatHandler.completions("1926875898187878401", messages, null); | 111 | String chat = aiChatHandler.completions("1926875898187878401", messages, null); |
| 86 | resMap.put("question", questionText); | 112 | resMap.put("question", questionText); |
| 87 | resMap.put("answer", chat); | 113 | resMap.put("answer", chat); |
| 114 | + resMap.put("similarity", maps.get(0).get("score").toString()); | ||
| 115 | + String fileName = generateFilePath(maps.get(0).get("metadata").toString()); | ||
| 116 | + resMap.put("fileName", fileName); | ||
| 117 | + resMap.put("fileBase64",FileToBase64Util.fileToBase64(uploadPath + fileName)); | ||
| 88 | return Result.OK(resMap); | 118 | return Result.OK(resMap); |
| 89 | } | 119 | } |
| 90 | 120 | ||
| 121 | + | ||
| 122 | + | ||
| 123 | + private String generateFilePath(String metadataJson) throws Exception { | ||
| 124 | + if (StringUtils.isEmpty(metadataJson)) { | ||
| 125 | + return ""; | ||
| 126 | + } | ||
| 127 | + ObjectMapper objectMapper = new ObjectMapper(); | ||
| 128 | + // 解析JSON字符串 | ||
| 129 | + Map<String, String> metadata = objectMapper.readValue(metadataJson, Map.class); | ||
| 130 | + | ||
| 131 | + // 获取docName和docId | ||
| 132 | + String docName = metadata.get("docName"); | ||
| 133 | + String docId = metadata.get("docId"); | ||
| 134 | + | ||
| 135 | + // 分离文件名和扩展名 | ||
| 136 | + if(StringUtils.isEmpty(docName)){ | ||
| 137 | + return null; | ||
| 138 | + } | ||
| 139 | + int dotIndex = docName.lastIndexOf('.'); | ||
| 140 | + String baseName = (dotIndex > 0) ? docName.substring(0, dotIndex) : docName; | ||
| 141 | + String extension = (dotIndex > 0) ? docName.substring(dotIndex) : ""; | ||
| 142 | + | ||
| 143 | + // 组合成新文件名 | ||
| 144 | + return baseName + "_" + docId + extension; | ||
| 145 | + } | ||
| 146 | + | ||
| 91 | public static void main(String[] args) { | 147 | public static void main(String[] args) { |
| 92 | - List<ChatMessage> messages = new ArrayList<>(); | ||
| 93 | - messages.add(new UserMessage("user", "你好,你是谁?")); | ||
| 94 | - LLMHandler llmHandler1 = new LLMHandler(); | ||
| 95 | - AIParams aiParams = new AIParams(); | ||
| 96 | - aiParams.setBaseUrl("http://localhost:11434"); | ||
| 97 | - aiParams.setModelName("EntropyYue/chatglm3"); | ||
| 98 | - aiParams.setProvider("OLLAMA"); | ||
| 99 | - TokenStream chat = llmHandler1.chat(messages, aiParams); | ||
| 100 | - System.out.println("chat = " + chat); | 148 | + String s = "学生户口复印_efde055d-1207-4b6f-8d46-79eb557ca711.docx"; |
| 149 | + | ||
| 150 | + String s1 = StringUtils.substringBefore(s, "."); | ||
| 151 | + log.info("s1 = " + s1); | ||
| 152 | + | ||
| 153 | + | ||
| 154 | + String[] split = s.split("_"); | ||
| 155 | + for (String string : split) { | ||
| 156 | + log.info("string = " + string); | ||
| 157 | + } | ||
| 101 | 158 | ||
| 102 | } | 159 | } |
| 103 | 160 |
-
请 注册 或 登录 后发表评论