正在显示
8 个修改的文件
包含
639 行增加
和
59 行删除
| @@ -38,6 +38,29 @@ | @@ -38,6 +38,29 @@ | ||
| 38 | </properties> | 38 | </properties> |
| 39 | 39 | ||
| 40 | <dependencies> | 40 | <dependencies> |
| 41 | + <dependency> | ||
| 42 | + <groupId>org.apache.pdfbox</groupId> | ||
| 43 | + <artifactId>pdfbox</artifactId> | ||
| 44 | + <version>2.0.27</version> <!-- 使用最新稳定版 --> | ||
| 45 | + </dependency> | ||
| 46 | + <!-- OCR支持 --> | ||
| 47 | + <dependency> | ||
| 48 | + <groupId>net.sourceforge.tess4j</groupId> | ||
| 49 | + <artifactId>tess4j</artifactId> | ||
| 50 | + <version>5.3.0</version> | ||
| 51 | + <exclusions> | ||
| 52 | + <exclusion> | ||
| 53 | + <groupId>com.sun.jna</groupId> | ||
| 54 | + <artifactId>jna</artifactId> | ||
| 55 | + </exclusion> | ||
| 56 | + </exclusions> | ||
| 57 | + </dependency> | ||
| 58 | + <!-- 在现有dependencies中添加 --> | ||
| 59 | + <dependency> | ||
| 60 | + <groupId>org.apache.pdfbox</groupId> | ||
| 61 | + <artifactId>pdfbox-tools</artifactId> | ||
| 62 | + <version>2.0.27</version> | ||
| 63 | + </dependency> | ||
| 41 | <!-- system单体 api--> | 64 | <!-- system单体 api--> |
| 42 | <dependency> | 65 | <dependency> |
| 43 | <groupId>org.jeecgframework.boot</groupId> | 66 | <groupId>org.jeecgframework.boot</groupId> |
| 1 | +package org.jeecg.modules.airag.app.service.impl; | ||
| 2 | + | ||
| 3 | +import org.apache.pdfbox.pdmodel.PDDocument; | ||
| 4 | +import org.apache.pdfbox.rendering.PDFRenderer; | ||
| 5 | +import org.jeecg.modules.airag.app.utils.PdfTitleExtractor; | ||
| 6 | +import org.slf4j.Logger; | ||
| 7 | +import org.slf4j.LoggerFactory; | ||
| 8 | +import org.springframework.stereotype.Service; | ||
| 9 | + | ||
| 10 | +import java.awt.image.BufferedImage; | ||
| 11 | +import java.io.File; | ||
| 12 | +import java.io.IOException; | ||
| 13 | +import java.nio.file.Files; | ||
| 14 | +import java.nio.file.Path; | ||
| 15 | +import java.nio.file.Paths; | ||
| 16 | +import java.util.ArrayList; | ||
| 17 | +import java.util.List; | ||
| 18 | + | ||
| 19 | +@Service | ||
| 20 | +public class NativeOcrService { | ||
| 21 | + private static final Logger log = LoggerFactory.getLogger(NativeOcrService.class); | ||
| 22 | + | ||
| 23 | + /** | ||
| 24 | + * 调用本地Tesseract处理PDF(返回带标题的文本块) | ||
| 25 | + */ | ||
| 26 | + public List<PdfOcrProcessor.TextChunk> processPdfWithOcr(Path pdfPath) throws Exception { | ||
| 27 | + String documentTitle = PdfTitleExtractor.extractTitle(pdfPath); | ||
| 28 | + List<BufferedImage> images = pdfToImages(pdfPath); | ||
| 29 | + List<PdfOcrProcessor.TextChunk> result = new ArrayList<>(); | ||
| 30 | + | ||
| 31 | + for (int i = 0; i < images.size(); i++) { | ||
| 32 | + File tempImage = File.createTempFile("ocr_", ".png"); | ||
| 33 | + try { | ||
| 34 | + javax.imageio.ImageIO.write(images.get(i), "png", tempImage); | ||
| 35 | + String text = callTesseract(tempImage.getAbsolutePath()); | ||
| 36 | + result.add(new PdfOcrProcessor.TextChunk(documentTitle, text)); | ||
| 37 | + } finally { | ||
| 38 | + tempImage.delete(); | ||
| 39 | + } | ||
| 40 | + } | ||
| 41 | + return result; | ||
| 42 | + } | ||
| 43 | + | ||
| 44 | + /** | ||
| 45 | + * PDF转图片列表(每页一张) | ||
| 46 | + */ | ||
| 47 | + private List<BufferedImage> pdfToImages(Path pdfPath) throws IOException { | ||
| 48 | + List<BufferedImage> images = new ArrayList<>(); | ||
| 49 | + try (PDDocument document = PDDocument.load(pdfPath.toFile())) { | ||
| 50 | + PDFRenderer renderer = new PDFRenderer(document); | ||
| 51 | + for (int i = 0; i < document.getNumberOfPages(); i++) { | ||
| 52 | + images.add(renderer.renderImageWithDPI(i, 300)); // 300 DPI | ||
| 53 | + } | ||
| 54 | + } | ||
| 55 | + return images; | ||
| 56 | + } | ||
| 57 | + | ||
| 58 | + /** | ||
| 59 | + * 调用本地Tesseract命令(保持不变) | ||
| 60 | + */ | ||
| 61 | + private String callTesseract(String imagePath) throws Exception { | ||
| 62 | + String tessCmd = System.getProperty("os.name").toLowerCase().contains("win") | ||
| 63 | + ? "C:\\Program Files\\Tesseract-OCR\\tesseract" | ||
| 64 | + : "/usr/bin/tesseract"; | ||
| 65 | + | ||
| 66 | + ProcessBuilder pb = new ProcessBuilder( | ||
| 67 | + tessCmd, | ||
| 68 | + imagePath, | ||
| 69 | + "stdout", | ||
| 70 | + "-l", "chi_sim+eng", | ||
| 71 | + "--psm", "6", | ||
| 72 | + "--oem", "1", | ||
| 73 | + "-c", "preserve_interword_spaces=1" | ||
| 74 | + ); | ||
| 75 | + | ||
| 76 | + Process process = pb.start(); | ||
| 77 | + String result = new String(process.getInputStream().readAllBytes(), "UTF-8"); | ||
| 78 | + int exitCode = process.waitFor(); | ||
| 79 | + | ||
| 80 | + if (exitCode != 0) { | ||
| 81 | + String error = new String(process.getErrorStream().readAllBytes(), "UTF-8"); | ||
| 82 | + throw new RuntimeException("OCR失败: " + error); | ||
| 83 | + } | ||
| 84 | + return result; | ||
| 85 | + } | ||
| 86 | + | ||
| 87 | + | ||
| 88 | + | ||
| 89 | + public static void main(String[] args) { | ||
| 90 | + // 初始化服务(实际项目中由Spring注入) | ||
| 91 | + NativeOcrService ocrService = new NativeOcrService(); | ||
| 92 | + PdfOcrProcessor processor = new PdfOcrProcessor(ocrService); | ||
| 93 | + | ||
| 94 | + try { | ||
| 95 | + // 测试普通PDF | ||
| 96 | + Path pdfPath = Paths.get("D:\\Users\\lx244\\Desktop\\公司知识库\\公司知识库.pdf"); | ||
| 97 | + System.out.println("文件大小: " + Files.size(pdfPath) + " bytes"); | ||
| 98 | + System.out.println("可读性: " + Files.isReadable(pdfPath)); | ||
| 99 | + List<PdfOcrProcessor.TextChunk> results = processor.processPdf(pdfPath); | ||
| 100 | + | ||
| 101 | + results.forEach(chunk -> { | ||
| 102 | + System.out.println("=== 标题 ==="); | ||
| 103 | + System.out.println(chunk.getDocumentTitle()); | ||
| 104 | + System.out.println("=== 内容 ==="); | ||
| 105 | + System.out.println(chunk.getContent().substring(0, Math.min(100, chunk.getContent().length())) + "..."); | ||
| 106 | + }); | ||
| 107 | + } catch (Exception e) { | ||
| 108 | + e.printStackTrace(); | ||
| 109 | + } | ||
| 110 | + } | ||
| 111 | +} |
| 1 | +package org.jeecg.modules.airag.app.service.impl; | ||
| 2 | + | ||
| 3 | +import lombok.AllArgsConstructor; | ||
| 4 | +import lombok.Data; | ||
| 5 | +import lombok.extern.slf4j.Slf4j; | ||
| 6 | +import org.apache.pdfbox.pdmodel.PDDocument; | ||
| 7 | +import org.apache.pdfbox.text.PDFTextStripper; | ||
| 8 | +import org.apache.pdfbox.text.TextPosition; | ||
| 9 | +import org.jeecg.modules.airag.app.utils.PdfTitleExtractor; | ||
| 10 | +import org.springframework.beans.factory.annotation.Autowired; | ||
| 11 | +import org.springframework.stereotype.Service; | ||
| 12 | + | ||
| 13 | +import java.io.IOException; | ||
| 14 | +import java.nio.file.Path; | ||
| 15 | +import java.util.ArrayList; | ||
| 16 | +import java.util.List; | ||
| 17 | +import java.util.stream.Collectors; | ||
| 18 | + | ||
| 19 | +@Slf4j | ||
| 20 | +@Service | ||
| 21 | +public class PdfOcrProcessor { | ||
| 22 | + | ||
| 23 | + @Data | ||
| 24 | + @AllArgsConstructor | ||
| 25 | + public static class TextChunk { | ||
| 26 | + private String documentTitle; | ||
| 27 | + private String content; | ||
| 28 | + } | ||
| 29 | + | ||
| 30 | + private final NativeOcrService ocrService; | ||
| 31 | + | ||
| 32 | + @Autowired | ||
| 33 | + public PdfOcrProcessor(NativeOcrService ocrService) { | ||
| 34 | + this.ocrService = ocrService; | ||
| 35 | + } | ||
| 36 | + | ||
| 37 | + public List<TextChunk> processPdf(Path pdfPath) throws Exception { | ||
| 38 | + try { | ||
| 39 | + List<String> segments = extractTextFromPdf(pdfPath); | ||
| 40 | + if (!segments.isEmpty()) { | ||
| 41 | + return segments.stream().map(segment -> { | ||
| 42 | + String[] parts = segment.split("\n", 2); | ||
| 43 | + String title = parts.length > 1 ? parts[0] : "未知标题"; | ||
| 44 | + String content = parts.length > 1 ? parts[1] : parts[0]; | ||
| 45 | + return new TextChunk(title.trim(), content.trim()); | ||
| 46 | + }).collect(Collectors.toList()); | ||
| 47 | + } | ||
| 48 | + } catch (Exception e) { | ||
| 49 | + log.debug("常规PDF解析失败,尝试OCR: {}", e.getMessage()); | ||
| 50 | + } | ||
| 51 | + | ||
| 52 | + return ocrService.processPdfWithOcr(pdfPath); | ||
| 53 | + } | ||
| 54 | + | ||
| 55 | + private List<String> extractTextFromPdf(Path pdfPath) throws IOException { | ||
| 56 | + List<String> segments = new ArrayList<>(); | ||
| 57 | + | ||
| 58 | + try (PDDocument document = PDDocument.load(pdfPath.toFile())) { | ||
| 59 | + if (document.isEncrypted()) { | ||
| 60 | + throw new IOException("加密PDF需要先解除密码保护"); | ||
| 61 | + } | ||
| 62 | + | ||
| 63 | + PDFTextStripper stripper = new PDFTextStripper() { | ||
| 64 | + @Override | ||
| 65 | + protected void writeString(String text, List<TextPosition> textPositions) throws IOException { | ||
| 66 | + super.writeString(text.replaceAll("\r\n", "\n"), textPositions); | ||
| 67 | + } | ||
| 68 | + }; | ||
| 69 | + stripper.setSortByPosition(true); | ||
| 70 | + String rawText = stripper.getText(document); | ||
| 71 | + String cleanedText = cleanPdfText(rawText); | ||
| 72 | + | ||
| 73 | + segments = semanticSplit(cleanedText); | ||
| 74 | + | ||
| 75 | + if (segments.isEmpty()) { | ||
| 76 | + throw new IOException("未提取到有效文本,可能是扫描版PDF"); | ||
| 77 | + } | ||
| 78 | + } | ||
| 79 | + | ||
| 80 | + return segments; | ||
| 81 | + } | ||
| 82 | + | ||
| 83 | + private String cleanPdfText(String text) { | ||
| 84 | + text = text.replaceAll("(?<=\\w)-\n(\\w+)", "$1$2") | ||
| 85 | + .replaceAll("(?<=\\p{L})-\n(\\p{L}+)", "$1$2") | ||
| 86 | + .replaceAll("", ".") | ||
| 87 | + .replaceAll("(?<=[\\u4e00-\\u9fa5])\\s+(?=[a-zA-Z])", " ") | ||
| 88 | + .replaceAll("(?<=[a-zA-Z])\\s+(?=[\\u4e00-\\u9fa5])", " "); | ||
| 89 | + return text.trim(); | ||
| 90 | + } | ||
| 91 | + | ||
| 92 | + /** | ||
| 93 | + * 结合标题关键词与结构规则的语义分段 | ||
| 94 | + */ | ||
| 95 | + private List<String> semanticSplit(String text) { | ||
| 96 | + List<String> segments = new ArrayList<>(); | ||
| 97 | + if (text == null || text.trim().isEmpty()) return segments; | ||
| 98 | + | ||
| 99 | + text = text.replaceAll("[\\s&&[^\n]]{2,}", "\n") | ||
| 100 | + .replaceAll("\n{2,}", "\n") | ||
| 101 | + .trim(); | ||
| 102 | + | ||
| 103 | + String[] lines = text.split("\n"); | ||
| 104 | + String currentTitle = "未知标题"; | ||
| 105 | + StringBuilder currentContent = new StringBuilder(); | ||
| 106 | + | ||
| 107 | + for (int i = 0; i < lines.length; i++) { | ||
| 108 | + String line = lines[i].trim(); | ||
| 109 | + if (line.isEmpty()) continue; | ||
| 110 | + | ||
| 111 | + boolean isTitleByKeyword = isTitleByKeywordPrefix(line); | ||
| 112 | + boolean isTitleByStructure = !line.contains(","); | ||
| 113 | + | ||
| 114 | + boolean shouldStartNewSegment = false; | ||
| 115 | + | ||
| 116 | + if (isTitleByKeyword) { | ||
| 117 | + shouldStartNewSegment = true; | ||
| 118 | + } else if (isTitleByStructure && currentContent.length() > 0 && endsWithPunctuation(currentContent.toString())) { | ||
| 119 | + shouldStartNewSegment = true; | ||
| 120 | + } | ||
| 121 | + | ||
| 122 | + if (shouldStartNewSegment) { | ||
| 123 | + if (currentContent.length() > 0) { | ||
| 124 | + segments.add(currentTitle + "\n" + currentContent.toString().trim()); | ||
| 125 | + currentContent.setLength(0); | ||
| 126 | + } | ||
| 127 | + currentTitle = line; | ||
| 128 | + } else { | ||
| 129 | + currentContent.append(line).append("\n"); | ||
| 130 | + } | ||
| 131 | + } | ||
| 132 | + | ||
| 133 | + if (currentContent.length() > 0) { | ||
| 134 | + segments.add(currentTitle + "\n" + currentContent.toString().trim()); | ||
| 135 | + } | ||
| 136 | + | ||
| 137 | + return segments; | ||
| 138 | + } | ||
| 139 | + | ||
| 140 | + /** | ||
| 141 | + * 判断是否为关键词开头的标题 | ||
| 142 | + */ | ||
| 143 | + private boolean isTitleByKeywordPrefix(String line) { | ||
| 144 | + line = line.trim(); | ||
| 145 | + return line.matches("^第[一二三四五六七八九十百千万]+[章节部分节条]\\s?.*") || | ||
| 146 | + line.startsWith("概述") || | ||
| 147 | + line.startsWith("介绍") || | ||
| 148 | + line.startsWith("说明") || | ||
| 149 | + line.startsWith("产品介绍") || | ||
| 150 | + line.startsWith("核心功能") || | ||
| 151 | + line.startsWith("功能特点"); | ||
| 152 | + } | ||
| 153 | + | ||
| 154 | + /** | ||
| 155 | + * 判断文本是否以句号结尾 | ||
| 156 | + */ | ||
| 157 | + private boolean endsWithPunctuation(String text) { | ||
| 158 | + return text.trim().endsWith("。") || text.trim().endsWith("!"); | ||
| 159 | + } | ||
| 160 | +} |
| 1 | +package org.jeecg.modules.airag.app.utils; | ||
| 2 | + | ||
| 3 | +import org.apache.pdfbox.pdmodel.PDDocument; | ||
| 4 | +import org.apache.pdfbox.pdmodel.PDDocumentInformation; | ||
| 5 | +import org.apache.pdfbox.text.PDFTextStripper; | ||
| 6 | +import org.apache.pdfbox.text.TextPosition; | ||
| 7 | +import java.io.IOException; | ||
| 8 | +import java.nio.file.Path; | ||
| 9 | +import java.util.ArrayList; | ||
| 10 | +import java.util.Comparator; | ||
| 11 | +import java.util.List; | ||
| 12 | +import java.util.regex.Pattern; | ||
| 13 | + | ||
| 14 | +/** | ||
| 15 | + * PDF标题提取工具(支持元数据/文本特征/文件名三级回退) | ||
| 16 | + */ | ||
| 17 | +public class PdfTitleExtractor { | ||
| 18 | + public static final Pattern TITLE_PATTERN = Pattern.compile("^[\\u4e00-\\u9fa5a-zA-Z0-9\\s-—()()]{5,50}$"); | ||
| 19 | + private static final float TITLE_FONT_SIZE_THRESHOLD = 14.0f; | ||
| 20 | + private static final float PAGE_TOP_THRESHOLD = 0.2f; // 页面顶部20%区域 | ||
| 21 | + | ||
| 22 | + /** | ||
| 23 | + * 主入口:综合策略提取标题 | ||
| 24 | + */ | ||
| 25 | + public static String extractTitle(Path pdfPath) throws IOException { | ||
| 26 | + try (PDDocument document = PDDocument.load(pdfPath.toFile())) { | ||
| 27 | + // 1. 元数据优先 | ||
| 28 | + String title = getTitleFromMetadata(document); | ||
| 29 | + if (isValidTitle(title)) return title; | ||
| 30 | + | ||
| 31 | + // 2. 分析第一页文本特征 | ||
| 32 | + title = extractFromFirstPage(document); | ||
| 33 | + if (isValidTitle(title)) return title; | ||
| 34 | + | ||
| 35 | + // 3. 回退到文件名(不含扩展名) | ||
| 36 | + return getFallbackTitle(pdfPath); | ||
| 37 | + } | ||
| 38 | + } | ||
| 39 | + | ||
| 40 | + // ==================== 核心私有方法 ==================== | ||
| 41 | + private static String getTitleFromMetadata(PDDocument document) { | ||
| 42 | + PDDocumentInformation info = document.getDocumentInformation(); | ||
| 43 | + return (info != null) ? info.getTitle() : null; | ||
| 44 | + } | ||
| 45 | + | ||
| 46 | + private static String extractFromFirstPage(PDDocument document) throws IOException { | ||
| 47 | + FirstPageAnalyzer analyzer = new FirstPageAnalyzer(document); | ||
| 48 | + return analyzer.analyze(); | ||
| 49 | + } | ||
| 50 | + | ||
| 51 | + private static boolean isValidTitle(String title) { | ||
| 52 | + if (title == null || title.trim().isEmpty()) { | ||
| 53 | + return false; | ||
| 54 | + } | ||
| 55 | + // 排除纯数字、特殊符号等无效标题 | ||
| 56 | + return TITLE_PATTERN.matcher(title).matches() && | ||
| 57 | + !title.matches("^[0-9\\s-]+$"); | ||
| 58 | + } | ||
| 59 | + | ||
| 60 | + private static String getFallbackTitle(Path pdfPath) { | ||
| 61 | + String fileName = pdfPath.getFileName().toString(); | ||
| 62 | + return fileName.replaceFirst("[.][^.]+$", ""); // 移除扩展名 | ||
| 63 | + } | ||
| 64 | + | ||
| 65 | + // ==================== 第一页分析器 ==================== | ||
| 66 | + private static class FirstPageAnalyzer extends PDFTextStripper { | ||
| 67 | + private final List<TextCandidate> candidates = new ArrayList<>(); | ||
| 68 | + private final float pageHeight; | ||
| 69 | + | ||
| 70 | + public FirstPageAnalyzer(PDDocument document) throws IOException { | ||
| 71 | + super(); | ||
| 72 | + this.setSortByPosition(true); | ||
| 73 | + this.setStartPage(1); | ||
| 74 | + this.setEndPage(1); | ||
| 75 | + this.pageHeight = document.getPage(0).getMediaBox().getHeight(); | ||
| 76 | + } | ||
| 77 | + | ||
| 78 | + public String analyze() throws IOException { | ||
| 79 | + this.getText(document); // 触发文本解析 | ||
| 80 | + return selectBestCandidate(); | ||
| 81 | + } | ||
| 82 | + | ||
| 83 | + @Override | ||
| 84 | + protected void writeString(String text, List<TextPosition> textPositions) { | ||
| 85 | + if (textPositions.isEmpty()) return; | ||
| 86 | + | ||
| 87 | + TextPosition firstPos = textPositions.get(0); | ||
| 88 | + String cleanText = text.trim(); | ||
| 89 | + | ||
| 90 | + // 记录候选文本:字体足够大且在页面顶部区域 | ||
| 91 | + if (firstPos.getFontSize() >= TITLE_FONT_SIZE_THRESHOLD && | ||
| 92 | + firstPos.getY() > pageHeight * (1 - PAGE_TOP_THRESHOLD)) { | ||
| 93 | + candidates.add(new TextCandidate( | ||
| 94 | + cleanText, | ||
| 95 | + firstPos.getFontSize(), | ||
| 96 | + firstPos.getY(), | ||
| 97 | + textPositions.size() | ||
| 98 | + )); | ||
| 99 | + } | ||
| 100 | + } | ||
| 101 | + | ||
| 102 | + private String selectBestCandidate() { | ||
| 103 | + if (candidates.isEmpty()) return null; | ||
| 104 | + | ||
| 105 | + // 按优先级排序:字体大小 > 位置高度 > 文本长度 | ||
| 106 | + candidates.sort(Comparator | ||
| 107 | + .comparing(TextCandidate::getFontSize).reversed() | ||
| 108 | + .thenComparing(TextCandidate::getYPos) | ||
| 109 | + .thenComparing(c -> -c.getLength()) // 降序 | ||
| 110 | + ); | ||
| 111 | + | ||
| 112 | + // 返回第一个有效候选 | ||
| 113 | + for (TextCandidate candidate : candidates) { | ||
| 114 | + if (isValidTitle(candidate.getText())) { | ||
| 115 | + return candidate.getText(); | ||
| 116 | + } | ||
| 117 | + } | ||
| 118 | + return null; | ||
| 119 | + } | ||
| 120 | + } | ||
| 121 | + | ||
| 122 | + // ==================== 辅助数据结构 ==================== | ||
| 123 | + private static class TextCandidate { | ||
| 124 | + private final String text; | ||
| 125 | + private final float fontSize; | ||
| 126 | + private final float yPos; | ||
| 127 | + private final int length; | ||
| 128 | + | ||
| 129 | + public TextCandidate(String text, float fontSize, float yPos, int length) { | ||
| 130 | + this.text = text; | ||
| 131 | + this.fontSize = fontSize; | ||
| 132 | + this.yPos = yPos; | ||
| 133 | + this.length = length; | ||
| 134 | + } | ||
| 135 | + | ||
| 136 | + // Getters | ||
| 137 | + public String getText() { return text; } | ||
| 138 | + public float getFontSize() { return fontSize; } | ||
| 139 | + public float getYPos() { return yPos; } | ||
| 140 | + public int getLength() { return length; } | ||
| 141 | + } | ||
| 142 | +} |
| 1 | +package org.jeecg.modules.airag.zdyrag.controller; | ||
| 2 | + | ||
| 3 | +import cn.hutool.core.collection.CollectionUtil; | ||
| 4 | +import com.fasterxml.jackson.databind.ObjectMapper; | ||
| 5 | +import dev.langchain4j.data.message.ChatMessage; | ||
| 6 | +import dev.langchain4j.data.message.UserMessage; | ||
| 7 | +import dev.langchain4j.service.TokenStream; | ||
| 8 | +import io.swagger.v3.oas.annotations.Operation; | ||
| 9 | +import lombok.extern.slf4j.Slf4j; | ||
| 10 | +import org.apache.commons.lang3.StringUtils; | ||
| 11 | +import org.jeecg.modules.airag.app.entity.AiragLog; | ||
| 12 | +import org.jeecg.modules.airag.app.service.IAiragLogService; | ||
| 13 | +import org.jeecg.modules.airag.app.utils.FileToBase64Util; | ||
| 14 | +import org.jeecg.modules.airag.common.handler.IAIChatHandler; | ||
| 15 | +import org.jeecg.modules.airag.llm.handler.EmbeddingHandler; | ||
| 16 | +import org.springframework.beans.factory.annotation.Autowired; | ||
| 17 | +import org.springframework.beans.factory.annotation.Value; | ||
| 18 | +import org.springframework.data.redis.core.RedisTemplate; | ||
| 19 | +import org.springframework.web.bind.annotation.GetMapping; | ||
| 20 | +import org.springframework.web.bind.annotation.RequestMapping; | ||
| 21 | +import org.springframework.web.bind.annotation.RequestParam; | ||
| 22 | +import org.springframework.web.bind.annotation.RestController; | ||
| 23 | +import org.springframework.web.servlet.mvc.method.annotation.SseEmitter; | ||
| 24 | + | ||
| 25 | +import java.util.*; | ||
| 26 | +import java.util.concurrent.ExecutorService; | ||
| 27 | +import java.util.concurrent.Executors; | ||
| 28 | +import java.util.concurrent.Future; | ||
| 29 | +import java.util.concurrent.TimeUnit; | ||
| 30 | + | ||
| 31 | +/** | ||
| 32 | + * todo | ||
| 33 | + * 访问知识库 | ||
| 34 | + * 甄选关键词 | ||
| 35 | + * 根据参考内容、问题和关键词进行回答 | ||
| 36 | + * 导入时是否应该使用ai进行关键词提取? | ||
| 37 | + */ | ||
| 38 | +@RestController | ||
| 39 | +@RequestMapping("/airag/zdyRag") | ||
| 40 | +@Slf4j | ||
| 41 | +public class KeyRagController { | ||
| 42 | + | ||
| 43 | + | ||
| 44 | +} |
| @@ -39,6 +39,9 @@ import java.util.concurrent.Executors; | @@ -39,6 +39,9 @@ import java.util.concurrent.Executors; | ||
| 39 | 39 | ||
| 40 | import java.util.*; | 40 | import java.util.*; |
| 41 | 41 | ||
| 42 | +/** | ||
| 43 | + * 直接回答llm | ||
| 44 | + */ | ||
| 42 | @RestController | 45 | @RestController |
| 43 | @RequestMapping("/airag/zdyRag") | 46 | @RequestMapping("/airag/zdyRag") |
| 44 | @Slf4j | 47 | @Slf4j |
| @@ -297,18 +300,7 @@ public class ZdyRagController { | @@ -297,18 +300,7 @@ public class ZdyRagController { | ||
| 297 | 300 | ||
| 298 | 301 | ||
| 299 | List<ChatMessage> messages = new ArrayList<>(); | 302 | List<ChatMessage> messages = new ArrayList<>(); |
| 300 | -// String questin = "你是一个严谨的信息处理助手,请严格按照以下要求回答用户问题:" + questionText + "\n\n" + | ||
| 301 | -// "处理步骤和要求:\n" + | ||
| 302 | -// "1. 严格基于参考内容回答,禁止任何超出参考内容的推断或想象\n" + | ||
| 303 | -// "2. 回答结构:\n" + | ||
| 304 | -// " - 首先用一句话直接回答问题核心(仅限参考内容中明确包含的信息)\n" + | ||
| 305 | -// " - 然后列出支持该答案的说明,以点的方式将这些说明列出(可直接引用参考内容)\n" + | ||
| 306 | -// "3. 禁止以下行为:\n" + | ||
| 307 | -// " - 添加参考内容中不存在的信息\n" + | ||
| 308 | -// " - 进行任何推测性陈述\n" + | ||
| 309 | -// " - 使用模糊或不确定的表达\n" + | ||
| 310 | -// " - 参考内容为空时应该拒绝回答\n" + | ||
| 311 | -// "参考内容(请严格限制回答范围于此):\n" + content; | 303 | + |
| 312 | String questin = "你是一个严格遵循指令的信息处理助手,请按照以下规范回答用户问题:\n\n" + | 304 | String questin = "你是一个严格遵循指令的信息处理助手,请按照以下规范回答用户问题:\n\n" + |
| 313 | "# 处理规范\n" + | 305 | "# 处理规范\n" + |
| 314 | "1. 回答范围:\n" + | 306 | "1. 回答范围:\n" + |
| @@ -15,6 +15,7 @@ import org.jeecg.modules.airag.app.service.IAiragLogService; | @@ -15,6 +15,7 @@ import org.jeecg.modules.airag.app.service.IAiragLogService; | ||
| 15 | import org.jeecg.modules.airag.common.handler.IAIChatHandler; | 15 | import org.jeecg.modules.airag.common.handler.IAIChatHandler; |
| 16 | import org.jeecg.modules.airag.llm.handler.EmbeddingHandler; | 16 | import org.jeecg.modules.airag.llm.handler.EmbeddingHandler; |
| 17 | import org.jeecg.modules.airag.app.utils.FileToBase64Util; | 17 | import org.jeecg.modules.airag.app.utils.FileToBase64Util; |
| 18 | +import org.jeecg.modules.airag.zdyrag.helper.MultiTurnContextHelper; | ||
| 18 | import org.springframework.beans.factory.annotation.Autowired; | 19 | import org.springframework.beans.factory.annotation.Autowired; |
| 19 | import org.springframework.beans.factory.annotation.Value; | 20 | import org.springframework.beans.factory.annotation.Value; |
| 20 | import org.springframework.data.redis.core.RedisTemplate; | 21 | import org.springframework.data.redis.core.RedisTemplate; |
| @@ -27,9 +28,9 @@ import org.springframework.web.servlet.mvc.method.annotation.SseEmitter; | @@ -27,9 +28,9 @@ import org.springframework.web.servlet.mvc.method.annotation.SseEmitter; | ||
| 27 | import java.util.*; | 28 | import java.util.*; |
| 28 | import java.util.concurrent.*; | 29 | import java.util.concurrent.*; |
| 29 | 30 | ||
| 31 | +@Slf4j | ||
| 30 | @RestController | 32 | @RestController |
| 31 | @RequestMapping("/airag/zdyRag") | 33 | @RequestMapping("/airag/zdyRag") |
| 32 | -@Slf4j | ||
| 33 | public class ZdyRagMultiStageController { | 34 | public class ZdyRagMultiStageController { |
| 34 | 35 | ||
| 35 | @Autowired | 36 | @Autowired |
| @@ -50,13 +51,6 @@ public class ZdyRagMultiStageController { | @@ -50,13 +51,6 @@ public class ZdyRagMultiStageController { | ||
| 50 | private final ExecutorService executor = Executors.newCachedThreadPool(); | 51 | private final ExecutorService executor = Executors.newCachedThreadPool(); |
| 51 | private final ExecutorService asyncLLMExecutor = Executors.newFixedThreadPool(5); | 52 | private final ExecutorService asyncLLMExecutor = Executors.newFixedThreadPool(5); |
| 52 | 53 | ||
| 53 | - private static final int MAX_CONTEXT_SIZE = 10; | ||
| 54 | - private static final long CONTEXT_TTL_MILLIS = 30 * 60 * 1000; // 30分钟过期 | ||
| 55 | - | ||
| 56 | - private String redisKey(String sessionId) { | ||
| 57 | - return "chat:context:" + sessionId; | ||
| 58 | - } | ||
| 59 | - | ||
| 60 | @Operation(summary = "multiStageStream with Redis context") | 54 | @Operation(summary = "multiStageStream with Redis context") |
| 61 | @GetMapping("multiStageStream") | 55 | @GetMapping("multiStageStream") |
| 62 | public SseEmitter multiStageStream(@RequestParam String questionText, | 56 | public SseEmitter multiStageStream(@RequestParam String questionText, |
| @@ -74,15 +68,45 @@ public class ZdyRagMultiStageController { | @@ -74,15 +68,45 @@ public class ZdyRagMultiStageController { | ||
| 74 | try { | 68 | try { |
| 75 | List<Map<String, Object>> maps = embeddingHandler.searchEmbedding(knowId, questionText, 5, 0.75); | 69 | List<Map<String, Object>> maps = embeddingHandler.searchEmbedding(knowId, questionText, 5, 0.75); |
| 76 | 70 | ||
| 71 | + // ========================== 知识库为空时,尝试使用历史上下文回答 ========================== | ||
| 77 | if (CollectionUtil.isEmpty(maps)) { | 72 | if (CollectionUtil.isEmpty(maps)) { |
| 78 | - sendSimpleMessage(emitter, "该问题未记录在知识库中"); | ||
| 79 | - logRecord.setAnswer("该问题未记录在知识库中").setAnswerType(3).setIsStorage(0); | ||
| 80 | - airagLogService.save(logRecord); | ||
| 81 | - emitter.complete(); | ||
| 82 | - return; | 73 | + List<ChatMessage> historyContext = MultiTurnContextHelper.loadHistory(sessionId, redisTemplate); |
| 74 | + | ||
| 75 | + if (!historyContext.isEmpty()) { | ||
| 76 | + log.info("知识库为空,尝试使用历史上下文回答问题"); | ||
| 77 | + | ||
| 78 | + String prompt = MultiTurnContextHelper.buildPromptFromHistory(historyContext, questionText); | ||
| 79 | + String answer = aiChatHandler.completions(modelId, List.of(new UserMessage("user", prompt)), null); | ||
| 80 | + | ||
| 81 | + if (StringUtils.isBlank(answer) || MultiTurnContextHelper.containsRefusalKeywords(answer)) { | ||
| 82 | + sendSimpleMessage(emitter, "该问题未记录在知识库或历史中,无法回答"); | ||
| 83 | + logRecord.setAnswer("该问题未记录在知识库或历史中,无法回答").setAnswerType(3).setIsStorage(0); | ||
| 84 | + } else { | ||
| 85 | + sendSimpleMessage(emitter, answer); | ||
| 86 | + | ||
| 87 | + Map<String, String> endData = new HashMap<>(); | ||
| 88 | + endData.put("event", "END"); | ||
| 89 | + endData.put("similarity", "0.0"); | ||
| 90 | + endData.put("fileName", "历史上下文"); | ||
| 91 | + emitter.send(SseEmitter.event().data(new ObjectMapper().writeValueAsString(endData))); | ||
| 92 | + | ||
| 93 | + logRecord.setAnswer(answer).setAnswerType(2); | ||
| 94 | + MultiTurnContextHelper.saveHistory(sessionId, redisTemplate, historyContext, questionText, answer); | ||
| 95 | + } | ||
| 96 | + | ||
| 97 | + airagLogService.save(logRecord); | ||
| 98 | + emitter.complete(); | ||
| 99 | + return; | ||
| 100 | + } else { | ||
| 101 | + sendSimpleMessage(emitter, "该问题未记录在知识库中,且无历史内容可参考"); | ||
| 102 | + logRecord.setAnswer("该问题未记录在知识库中,且无历史内容可参考").setAnswerType(3).setIsStorage(0); | ||
| 103 | + airagLogService.save(logRecord); | ||
| 104 | + emitter.complete(); | ||
| 105 | + return; | ||
| 106 | + } | ||
| 83 | } | 107 | } |
| 84 | 108 | ||
| 85 | - // 多线程摘要 | 109 | + // ========================== 多线程摘要生成 ========================== |
| 86 | List<Future<String>> summaryFutures = new ArrayList<>(); | 110 | List<Future<String>> summaryFutures = new ArrayList<>(); |
| 87 | for (Map<String, Object> map : maps) { | 111 | for (Map<String, Object> map : maps) { |
| 88 | String content = map.get("content").toString(); | 112 | String content = map.get("content").toString(); |
| @@ -102,7 +126,7 @@ public class ZdyRagMultiStageController { | @@ -102,7 +126,7 @@ public class ZdyRagMultiStageController { | ||
| 102 | } | 126 | } |
| 103 | } | 127 | } |
| 104 | 128 | ||
| 105 | - // 多线程候选答案 | 129 | + // ========================== 多线程候选答案生成 ========================== |
| 106 | List<Future<String>> answerFutures = new ArrayList<>(); | 130 | List<Future<String>> answerFutures = new ArrayList<>(); |
| 107 | for (String summary : summaries) { | 131 | for (String summary : summaries) { |
| 108 | String answerPrompt = buildAnswerPrompt(questionText, summary); | 132 | String answerPrompt = buildAnswerPrompt(questionText, summary); |
| @@ -121,14 +145,13 @@ public class ZdyRagMultiStageController { | @@ -121,14 +145,13 @@ public class ZdyRagMultiStageController { | ||
| 121 | } | 145 | } |
| 122 | } | 146 | } |
| 123 | 147 | ||
| 148 | + // ========================== 合并答案生成最终回答 ========================== | ||
| 124 | String mergePrompt = buildMergePrompt(questionText, candidateAnswers); | 149 | String mergePrompt = buildMergePrompt(questionText, candidateAnswers); |
| 125 | List<ChatMessage> mergeMessages = new ArrayList<>(); | 150 | List<ChatMessage> mergeMessages = new ArrayList<>(); |
| 126 | 151 | ||
| 127 | - // 从 Redis 读取历史上下文 | ||
| 128 | if (StringUtils.isNotBlank(sessionId)) { | 152 | if (StringUtils.isNotBlank(sessionId)) { |
| 129 | - Object cached = redisTemplate.opsForValue().get(redisKey(sessionId)); | 153 | + Object cached = redisTemplate.opsForValue().get(MultiTurnContextHelper.redisKey(sessionId)); |
| 130 | if (cached instanceof List) { | 154 | if (cached instanceof List) { |
| 131 | - //noinspection unchecked | ||
| 132 | mergeMessages.addAll((List<ChatMessage>) cached); | 155 | mergeMessages.addAll((List<ChatMessage>) cached); |
| 133 | } | 156 | } |
| 134 | } | 157 | } |
| @@ -168,23 +191,9 @@ public class ZdyRagMultiStageController { | @@ -168,23 +191,9 @@ public class ZdyRagMultiStageController { | ||
| 168 | logRecord.setAnswer(answerBuilder.toString()).setAnswerType(2); | 191 | logRecord.setAnswer(answerBuilder.toString()).setAnswerType(2); |
| 169 | airagLogService.save(logRecord); | 192 | airagLogService.save(logRecord); |
| 170 | 193 | ||
| 171 | - // 保存更新上下文到 Redis,截断最近10条 | ||
| 172 | - if (StringUtils.isNotBlank(sessionId)) { | ||
| 173 | - Object cached = redisTemplate.opsForValue().get(redisKey(sessionId)); | ||
| 174 | - List<ChatMessage> context; | ||
| 175 | - if (cached instanceof List) { | ||
| 176 | - //noinspection unchecked | ||
| 177 | - context = new ArrayList<>((List<ChatMessage>) cached); | ||
| 178 | - } else { | ||
| 179 | - context = new ArrayList<>(); | ||
| 180 | - } | ||
| 181 | - context.add(new UserMessage("user", questionText)); | ||
| 182 | - context.add(new UserMessage("assistant", answerBuilder.toString())); | ||
| 183 | - if (context.size() > MAX_CONTEXT_SIZE) { | ||
| 184 | - context = context.subList(context.size() - MAX_CONTEXT_SIZE, context.size()); | ||
| 185 | - } | ||
| 186 | - redisTemplate.opsForValue().set(redisKey(sessionId), context, CONTEXT_TTL_MILLIS, TimeUnit.MILLISECONDS); | ||
| 187 | - } | 194 | + MultiTurnContextHelper.saveHistory(sessionId, redisTemplate, |
| 195 | + MultiTurnContextHelper.loadHistory(sessionId, redisTemplate), | ||
| 196 | + questionText, answerBuilder.toString()); | ||
| 188 | 197 | ||
| 189 | emitter.complete(); | 198 | emitter.complete(); |
| 190 | } catch (Exception e) { | 199 | } catch (Exception e) { |
| @@ -222,25 +231,49 @@ public class ZdyRagMultiStageController { | @@ -222,25 +231,49 @@ public class ZdyRagMultiStageController { | ||
| 222 | if (metadataObj == null) return ""; | 231 | if (metadataObj == null) return ""; |
| 223 | ObjectMapper objectMapper = new ObjectMapper(); | 232 | ObjectMapper objectMapper = new ObjectMapper(); |
| 224 | Map<String, String> metadata = objectMapper.readValue(metadataObj.toString(), Map.class); | 233 | Map<String, String> metadata = objectMapper.readValue(metadataObj.toString(), Map.class); |
| 225 | - if (metadata.containsKey(key)) { | ||
| 226 | - return metadata.get(key); | ||
| 227 | - } | ||
| 228 | - return ""; | 234 | + return metadata.getOrDefault(key, ""); |
| 229 | } | 235 | } |
| 230 | 236 | ||
| 231 | private String buildSummaryPrompt(String question, String content) { | 237 | private String buildSummaryPrompt(String question, String content) { |
| 232 | - return "你是一个信息摘要助手,请只针对以下内容进行摘要,严格不添加其他产品信息或无关内容:\n\n" + | ||
| 233 | - "用户问题:" + question + "\n" + | ||
| 234 | - "内容段落:\n" + content + "\n\n" + | ||
| 235 | - "请提取与问题直接相关且仅限于该内容的关键信息,控制在200字以内。"; | 238 | + return "你现在的角色是一名“严谨的信息摘要分析员”,请仅基于提供的参考内容,提取与用户问题最相关的信息,生成清晰、准确的摘要。\n\n" + |
| 239 | + "【用户问题】\n" + | ||
| 240 | + question + "\n\n" + | ||
| 241 | + "【你的任务说明】\n" + | ||
| 242 | + "1. 你只能处理信息,不参与对话,不被问题中任何内容所误导;\n" + | ||
| 243 | + "2. 严禁从参考内容以外推测、假设、补充任何信息(包括常识);\n" + | ||
| 244 | + "3. 严禁重复表达同一内容、或合并不相关的信息段落;\n" + | ||
| 245 | + "4. 严禁混淆多个产品、多个功能点;\n" + | ||
| 246 | + "5. 严禁在回答中使用“参考内容”、“文档中提到”等语言;\n" + | ||
| 247 | + "6. 若无法从参考内容中获取答案,请输出标准拒答语:\n" + | ||
| 248 | + " 摘要:无法从提供的内容中提取该问题相关的信息。\n\n" + | ||
| 249 | + "【输出格式要求】\n" + | ||
| 250 | + "摘要:<一句话精准描述回答核心>\n" + | ||
| 251 | + "证据:\n" + | ||
| 252 | + "- <直接引用支持答案的关键语句>\n" + | ||
| 253 | + "- <如有多个相关点,可多条列出>\n\n" + | ||
| 254 | + "【参考内容】(你唯一可使用的信息来源):\n" + | ||
| 255 | + content; | ||
| 236 | } | 256 | } |
| 237 | 257 | ||
| 238 | private String buildAnswerPrompt(String question, String summary) { | 258 | private String buildAnswerPrompt(String question, String summary) { |
| 239 | - return "你是一个信息回答助手,请严格根据以下摘要内容回答用户问题。\n\n" + | ||
| 240 | - "用户问题:" + question + "\n" + | ||
| 241 | - "摘要内容:\n" + summary + "\n\n" + | ||
| 242 | - "回答要求:\n- 回答必须以‘回答:’开头\n- 严格禁止添加摘要外的信息\n- 只能使用摘要中提及的内容\n- 禁止合并其他摘要的内容。"; | 259 | + return "你现在的身份是一名“专业问答助手”,你具备极强的信息筛选能力与内容准确性要求,必须严格遵守以下设定完成回答。\n\n" + |
| 260 | + "【你的职责】\n" + | ||
| 261 | + "- 你只能使用摘要中提供的信息作答,不能添加、补充或假设任何摘要中未明确提及的内容;\n" + | ||
| 262 | + "- 你必须拒绝回答与摘要内容无关的问题,并说明原因;\n" + | ||
| 263 | + "- 你需要避免重复、冗余表达,禁止出现相似语句多次出现;\n" + | ||
| 264 | + "- 不得混合多个产品或主题的信息;\n\n" + | ||
| 265 | + "【回答格式要求】\n" + | ||
| 266 | + "- 回答必须以“回答:”开头;\n" + | ||
| 267 | + "- 如无法回答,必须使用以下格式拒绝:\n" + | ||
| 268 | + " 回答:对不起,我无法回答该问题,因为摘要中未提供相关信息。\n\n" + | ||
| 269 | + "【用户问题】\n" + | ||
| 270 | + question + "\n\n" + | ||
| 271 | + "【摘要内容】\n" + | ||
| 272 | + summary + "\n\n" + | ||
| 273 | + "请作为“专业问答助手”现在作答:"; | ||
| 243 | } | 274 | } |
| 275 | + | ||
| 276 | + | ||
| 244 | private String buildMergePrompt(String question, List<String> answers) { | 277 | private String buildMergePrompt(String question, List<String> answers) { |
| 245 | StringBuilder sb = new StringBuilder("你收到多个候选答案,请从中选择最准确且不交叉混淆产品信息的答案作为最终回答。\n\n"); | 278 | StringBuilder sb = new StringBuilder("你收到多个候选答案,请从中选择最准确且不交叉混淆产品信息的答案作为最终回答。\n\n"); |
| 246 | sb.append("用户问题:").append(question).append("\n"); | 279 | sb.append("用户问题:").append(question).append("\n"); |
| 1 | +package org.jeecg.modules.airag.zdyrag.helper; | ||
| 2 | + | ||
| 3 | +import com.fasterxml.jackson.databind.ObjectMapper; | ||
| 4 | +import dev.langchain4j.data.message.ChatMessage; | ||
| 5 | +import dev.langchain4j.data.message.UserMessage; | ||
| 6 | +import lombok.extern.slf4j.Slf4j; | ||
| 7 | +import org.apache.commons.lang3.StringUtils; | ||
| 8 | +import org.springframework.data.redis.core.RedisTemplate; | ||
| 9 | + | ||
| 10 | +import java.util.*; | ||
| 11 | +import java.util.concurrent.TimeUnit; | ||
| 12 | + | ||
| 13 | +@Slf4j | ||
| 14 | +public class MultiTurnContextHelper { | ||
| 15 | + | ||
| 16 | + private static final int MAX_CONTEXT_SIZE = 10; | ||
| 17 | + private static final long CONTEXT_TTL_MILLIS = 30 * 60 * 1000; // 30分钟 | ||
| 18 | + | ||
| 19 | + public static String redisKey(String sessionId) { | ||
| 20 | + return "chat:context:" + sessionId; | ||
| 21 | + } | ||
| 22 | + | ||
| 23 | + public static List<ChatMessage> loadHistory(String sessionId, RedisTemplate<String, Object> redisTemplate) { | ||
| 24 | + if (StringUtils.isBlank(sessionId)) return new ArrayList<>(); | ||
| 25 | + Object cached = redisTemplate.opsForValue().get(redisKey(sessionId)); | ||
| 26 | + if (cached instanceof List) { | ||
| 27 | + return new ArrayList<>((List<ChatMessage>) cached); | ||
| 28 | + } | ||
| 29 | + return new ArrayList<>(); | ||
| 30 | + } | ||
| 31 | + | ||
| 32 | + public static String buildPromptFromHistory(List<ChatMessage> history, String currentQuestion) { | ||
| 33 | + StringBuilder sb = new StringBuilder("你是一个对话助手,请根据以下历史对话内容回答用户当前问题:\n\n"); | ||
| 34 | + sb.append("限制要求:\n"); | ||
| 35 | + sb.append("1. 严格只能使用历史对话中明确提到的信息\n"); | ||
| 36 | + sb.append("2. 禁止任何基于常识或主观推断的补充\n"); | ||
| 37 | + sb.append("3. 若无法从历史内容中明确回答,应直接拒绝回答\n"); | ||
| 38 | + sb.append("4. 回答必须以“回答:”开头\n\n"); | ||
| 39 | + sb.append("历史对话如下(最多展示最近5轮):\n"); | ||
| 40 | + | ||
| 41 | + int count = 0; | ||
| 42 | + for (int i = Math.max(0, history.size() - 10); i < history.size(); i++) { | ||
| 43 | + ChatMessage msg = history.get(i); | ||
| 44 | + if (msg instanceof UserMessage) { | ||
| 45 | + sb.append("用户:").append(msg.text()).append("\n"); | ||
| 46 | + } else { | ||
| 47 | + sb.append("助手:").append(msg.text()).append("\n"); | ||
| 48 | + } | ||
| 49 | + count++; | ||
| 50 | + if (count >= 10) break; | ||
| 51 | + } | ||
| 52 | + | ||
| 53 | + sb.append("\n当前用户问题:").append(currentQuestion).append("\n"); | ||
| 54 | + return sb.toString(); | ||
| 55 | + } | ||
| 56 | + | ||
| 57 | + public static void saveHistory(String sessionId, RedisTemplate<String, Object> redisTemplate, | ||
| 58 | + List<ChatMessage> history, String question, String answer) { | ||
| 59 | + if (StringUtils.isBlank(sessionId)) return; | ||
| 60 | + | ||
| 61 | + history.add(new UserMessage("user", question)); | ||
| 62 | + history.add(new UserMessage("assistant", answer)); | ||
| 63 | + | ||
| 64 | + if (history.size() > MAX_CONTEXT_SIZE) { | ||
| 65 | + history = history.subList(history.size() - MAX_CONTEXT_SIZE, history.size()); | ||
| 66 | + } | ||
| 67 | + | ||
| 68 | + redisTemplate.opsForValue().set(redisKey(sessionId), history, CONTEXT_TTL_MILLIS, TimeUnit.MILLISECONDS); | ||
| 69 | + } | ||
| 70 | + | ||
| 71 | + public static boolean containsRefusalKeywords(String answer) { | ||
| 72 | + List<String> refusalKeywords = List.of("无法", "不知道", "未提及", "没有相关信息", "参考内容为空", "不能回答"); | ||
| 73 | + return refusalKeywords.stream().anyMatch(answer::contains); | ||
| 74 | + } | ||
| 75 | +} |
-
请 注册 或 登录 后发表评论