123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 |
- """
- 报告生成模块
- 根据研究结果生成结构化报告
- """
- from typing import Dict, List, Any, Optional
- import logging
- from backend.utils.api_client import LLMClient
- from backend.config import REPORT_MAX_LENGTH, REPORT_TEMPERATURE
- logger = logging.getLogger(__name__)
- class ReportGenerator:
- """研究报告生成器"""
-
- def __init__(self):
- self.llm_client = LLMClient()
-
- async def generate_report(
- self,
- research_intent: str,
- keywords: List[str],
- papers: List[Dict[str, Any]],
- clusters: Optional[Dict[str, Any]] = None,
- max_length: int = REPORT_MAX_LENGTH,
- temperature: float = REPORT_TEMPERATURE
- ) -> Dict[str, Any]:
- """
- 生成研究报告
-
- Args:
- research_intent: 用户的研究意图
- keywords: 提取的关键词
- papers: 检索到的论文
- clusters: 聚类结果
- max_length: 报告最大长度
- temperature: 生成参数
-
- Returns:
- 包含报告内容的字典
- """
- try:
- # 1. 准备论文信息
- paper_info = []
- for i, paper in enumerate(papers[:10]): # 只使用前10篇
- paper_info.append(
- f"{i+1}. {paper['title']} ({', '.join(paper['authors'][:3])})"
- f"\n摘要: {paper['summary'][:300]}..."
- )
-
- paper_text = "\n\n".join(paper_info)
-
- # 2. 准备聚类信息
- cluster_text = ""
- if clusters and "cluster_info" in clusters:
- for cluster_id, info in clusters["cluster_info"].items():
- papers_in_cluster = info["papers"]
- if papers_in_cluster:
- titles = [p["title"] for p in papers_in_cluster[:3]]
- cluster_text += f"聚类 {cluster_id+1} (包含 {len(papers_in_cluster)} 篇论文):\n"
- cluster_text += "代表性论文: " + "; ".join(titles) + "\n\n"
-
- # 3. 构造提示
- cluster_section = f"## 文献聚类\n{cluster_text}" if cluster_text else ""
-
- prompt = f"""
- 作为一位科研助手,请基于以下信息为研究主题生成一份结构化的调研报告。
-
- ## 研究主题
- {research_intent}
-
- ## 关键词
- {', '.join(keywords)}
-
- ## 检索到的文献
- {paper_text}
-
- {cluster_section}
-
- 请生成一份完整的调研报告,包括以下部分:
- 1. 研究背景与意义(简要介绍该领域的重要性和研究现状)
- 2. 研究方向分析(基于文献聚类识别主要研究方向)
- 3. 关键技术与方法(总结该领域的主要技术和方法论)
- 4. 代表性研究工作(列举2-3项代表性研究及其贡献)
- 5. 未来研究展望(分析领域挑战与未来可能的突破点)
-
- 报告应当学术严谨、结构清晰、信息准确。长度控制在1000-1500字。
- """
-
- # 4. 生成报告
- logger.info("Generating research report")
- report_content = await self.llm_client.generate_text(
- prompt=prompt,
- temperature=temperature,
- max_tokens=max_length
- )
-
- # 5. 构造结果
- report = {
- "research_intent": research_intent,
- "content": report_content,
- "keywords": keywords,
- "paper_count": len(papers)
- }
-
- return report
-
- except Exception as e:
- logger.error(f"Report generation error: {str(e)}", exc_info=True)
- return {
- "research_intent": research_intent,
- "content": f"报告生成失败: {str(e)}",
- "error": str(e)
- }
|