123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852 |
- """
- 研究流程管理模块
- 整合关键词提取、文献检索和聚类分析
- """
- import asyncio
- from typing import Dict, List, Any, Optional
- import logging
- import time
- import json
- from pathlib import Path
- import uuid
- import contextlib
- from backend.utils.api_client import LLMClient, ArxivClient
- from backend.utils.keywords import extract_keywords, expand_search_queries
- from backend.config import (
- MAX_SEARCH_RESULTS,
- CACHE_DIR,
- ENABLE_CACHE
- )
- logger = logging.getLogger(__name__)
- class ResearchAgent:
- """优化后的研究智能体,支持任务拆分和异步处理"""
-
- def __init__(self):
- self.llm_client = LLMClient()
- self.arxiv_client = ArxivClient()
-
- async def process_research_intent(
- self,
- research_intent: str,
- max_results: int = None
- ) -> Dict[str, Any]:
- """
- 处理研究意图,但使用任务拆分方式
- """
- start_time = time.time()
-
- # 初始化结果字典
- result = {
- "research_intent": research_intent,
- "timestamp": time.time(),
- "language": "auto",
- "english_keywords": [],
- "original_keywords": [],
- "english_directions": [],
- "original_directions": [],
- "papers_by_direction": [],
- "direction_reports": [],
- "status": "processing",
- "progress": 0, # 添加进度跟踪
- "task_id": str(uuid.uuid4()) # 添加唯一任务ID
- }
-
- try:
- # 1. 提取关键词(保持不变)
- keywords_data = await self.llm_client.extract_keywords(research_intent, "auto")
-
- result["language"] = keywords_data["language"]
- result["english_keywords"] = keywords_data["english_keywords"]
- result["original_keywords"] = keywords_data["original_keywords"]
- result["progress"] = 20
-
- # 2. 生成研究方向(保持不变)
- directions_data = await self.llm_client.generate_research_directions(
- result["english_keywords"],
- result["language"]
- )
-
- result["english_directions"] = directions_data["english_directions"]
- result["original_directions"] = directions_data["original_directions"]
- result["progress"] = 30
-
- # 3. 并行处理每个研究方向
- async def process_directions():
- tasks = []
- for i, direction in enumerate(result["english_directions"]):
- original_dir = result["original_directions"][i] if i < len(result["original_directions"]) else direction
- task = self._process_single_direction(direction, original_dir, result["language"])
- tasks.append(task)
-
- # 并行执行所有方向的处理
- direction_results = await asyncio.gather(*tasks, return_exceptions=True)
-
- papers_by_direction = []
- reports = []
-
- for i, res in enumerate(direction_results):
- if not isinstance(res, Exception):
- papers_by_direction.append(res["papers_data"])
- if res["report"]:
- reports.append(res["report"])
- else:
- # 处理异常情况
- logger.error(f"Error processing direction {i}: {str(res)}")
- papers_by_direction.append({
- "direction": result["english_directions"][i],
- "original_direction": result["original_directions"][i] if i < len(result["original_directions"]) else result["english_directions"][i],
- "papers": []
- })
-
- return papers_by_direction, reports
-
- # 执行并行处理
- papers_by_direction, reports = await process_directions()
- result["papers_by_direction"] = papers_by_direction
- result["direction_reports"] = reports
-
- result["status"] = "completed"
- result["progress"] = 100
- result["processing_time"] = time.time() - start_time
-
- # 缓存结果
- if ENABLE_CACHE:
- self._cache_result(result)
-
- return result
-
- except Exception as e:
- logger.error(f"Error in research process: {str(e)}", exc_info=True)
- result["status"] = "error"
- result["error"] = str(e)
- return result
-
- async def _process_single_direction(
- self,
- direction: str,
- original_direction: str,
- language: str
- ) -> Dict[str, Any]:
- """处理单个研究方向的独立任务"""
- try:
- # 1. 为该方向生成搜索查询
- query = await self._generate_search_query(direction)
- logger.info(f"Searching papers with query: {query}")
-
- # 2. 搜索论文
- papers = await self._search_papers_with_fallback(
- query=query,
- max_results=4
- )
-
- # 3. 构建论文数据
- papers_data = {
- "direction": direction,
- "original_direction": original_direction,
- "papers": papers
- }
-
- # 4. 仅当有论文时生成报告
- report = None
- if papers:
- try:
- report = await self._generate_direction_report(
- direction,
- papers,
- language
- )
-
- if report:
- report = {
- "direction": direction,
- "original_direction": original_direction,
- "report": report
- }
- except Exception as e:
- logger.error(f"Report generation error for {direction}: {str(e)}")
-
- return {
- "papers_data": papers_data,
- "report": report
- }
-
- except Exception as e:
- logger.error(f"Error processing direction '{direction}': {str(e)}")
- raise e
-
- async def _generate_search_query(self, research_direction: str) -> str:
- """为研究方向生成有效的文献检索式"""
- prompt = f"""
- 将以下研究方向转换为简洁有效的文献检索式。
-
- 研究方向: {research_direction}
-
- 生成检索式时遵循以下规则:
- 1. 使用2-3个核心术语,用布尔运算符(AND, OR)连接
- 2. 结构简单,优先使用引号包围关键短语
- 3. 避免过度细化导致无搜索结果
- 4. 确保检索式能找到3-5篇相关论文
-
- 只返回检索式字符串,不包含其他说明。
- """
-
- query = await self.llm_client.generate_text(prompt, temperature=0.3)
-
- # 简化复杂查询,确保能找到结果
- simplified_query = self._simplify_query_if_needed(query.strip())
- return simplified_query
-
- def _simplify_query_if_needed(self, query: str) -> str:
- """如果查询过于复杂就简化它"""
- # 计算AND和OR运算符的数量
- and_count = query.upper().count(" AND ")
- or_count = query.upper().count(" OR ")
-
- # 如果太复杂,简化查询
- if and_count > 2:
- # 只保留前两个AND条件
- parts = query.split(" AND ", 2)
- return " AND ".join(parts[:2])
-
- return query
-
- async def _generate_direction_report(
- self,
- direction: str,
- papers: List[Dict[str, Any]],
- target_language: str = "en"
- ) -> Dict[str, Any]:
- """优化后的研究报告生成方法"""
- # 如果没有论文,返回空报告
- if not papers:
- return {
- "english_content": "No papers found for this research direction.",
- "translated_content": "未找到该研究方向的相关论文。"
- }
-
- # 精简论文信息
- papers_text = "\n\n".join([
- f"Title: {paper['title']}\nSummary: {paper['summary'][:300]}..." # 限制摘要长度
- for paper in papers[:3]
- ])
-
- # 定义报告结构
- sections = [
- "Overview (100 words)",
- "Key Findings (150 words)",
- "Future Directions (100 words)"
- ]
-
- english_content = f"# Research Report: {direction}\n\n"
-
- # 为每个部分单独生成内容
- for section in sections:
- section_name = section.split(" (")[0]
-
- section_prompt = f"""
- Write the {section} section for a research report on:
- {direction}
-
- Based on these papers:
- {papers_text}
-
- Be concise, focused and specific.
- """
-
- try:
- section_content = await self.llm_client.generate_text(
- section_prompt,
- temperature=0.4,
- max_tokens=300
- )
-
- english_content += f"## {section_name}\n\n{section_content}\n\n"
- except Exception as e:
- english_content += f"## {section_name}\n\n[Error generating this section: {str(e)}]\n\n"
-
- # 翻译内容
- translated_content = english_content
- if target_language != "en":
- try:
- translated_content = await self.llm_client.translate_text(
- english_content, "en", target_language
- )
- except Exception as e:
- logger.error(f"Translation error: {str(e)}")
-
- return {
- "english_content": english_content,
- "translated_content": translated_content
- }
-
- def _cache_result(self, result: Dict[str, Any]) -> None:
- """缓存研究结果"""
- try:
- cache_file = Path(CACHE_DIR) / f"research_{int(time.time())}.json"
- with open(cache_file, "w", encoding="utf-8") as f:
- json.dump(result, f, ensure_ascii=False, indent=2)
- logger.info(f"Cached result to {cache_file}")
- except Exception as e:
- logger.error(f"Failed to cache result: {str(e)}")
- async def _search_papers_with_fallback(self, query: str, max_results: int = 4) -> List[Dict[str, Any]]:
- """文献检索策略,确保每个研究方向有3-5篇论文"""
- # Validate query before proceeding
- if not query or "Error generating text" in query:
- logger.error(f"Invalid search query: {query}")
- return [] # Return empty list instead of attempting to search with error string
-
- # 第一次尝试:使用完整检索式
- logger.info(f"使用检索式搜索论文: {query}")
- papers = await self.arxiv_client.search_papers(query=query, max_results=max_results)
-
- # 如果结果不足,尝试简化检索式
- if len(papers) < 3:
- simplified_query = self._get_simplified_query(query)
- if simplified_query != query:
- logger.info(f"结果不足,使用简化检索式: {simplified_query}")
- more_papers = await self.arxiv_client.search_papers(
- query=simplified_query,
- max_results=max_results
- )
-
- # 添加不重复的论文
- existing_ids = {p["id"] for p in papers}
- for paper in more_papers:
- if paper["id"] not in existing_ids:
- papers.append(paper)
- if len(papers) >= 5: # 最多5篇
- break
-
- # 如果仍不足3篇,添加示例论文
- if len(papers) < 3:
- logger.info(f"无法获取足够论文,添加示例论文")
- clean_query = query.replace('"', '')
- example_papers = [
- {
- "id": f"example_{i}",
- "title": f"相关研究: {clean_query}",
- "authors": ["研究者 A", "研究者 B"],
- "summary": f"这是一篇关于{clean_query}的研究论文。由于搜索结果有限,系统生成了此示例条目。",
- "published": "2023-01-01T00:00:00",
- "updated": "2023-01-01T00:00:00",
- "link": "#",
- "source": "example"
- }
- for i in range(1, 4 - len(papers))
- ]
- papers.extend(example_papers)
-
- # 确保每个方向返回3-5篇论文
- return papers[:5]
- def _get_simplified_query(self, query: str) -> str:
- """获取简化的检索式"""
- # 提取引号中的关键术语
- import re
- quoted_terms = re.findall(r'"([^"]*)"', query)
-
- if quoted_terms:
- # 使用前两个引号术语
- terms = quoted_terms[:min(2, len(quoted_terms))]
- return ' AND '.join([f'"{term}"' for term in terms])
-
- # 如果没有引号术语,分解AND条件
- if " AND " in query:
- parts = query.split(" AND ")
- return parts[0] # 只用第一个条件
-
- return query
- async def _extract_key_terms(self, direction: str) -> List[str]:
- """从研究方向中提取关键术语"""
- prompt = f"""
- Extract 3 most important search terms from this research direction.
- Return only comma-separated terms, no explanations.
-
- Direction: {direction}
- """
-
- try:
- response = await self.llm_client.generate_text(prompt, temperature=0.1)
- terms = [term.strip() for term in response.split(',') if term.strip()]
- return terms if terms else [direction.split()[0]] # 如果提取失败,使用首个词
- except Exception as e:
- logger.error(f"Error extracting terms: {str(e)}")
- # 回退到简单分词
- words = direction.split()
- return [words[0], words[-1] if len(words) > 1 else words[0]]
- async def _search_papers_with_improved_strategy(self, direction: str, max_results: int = 4) -> List[Dict[str, Any]]:
- """智能论文搜索策略"""
- # 提取关键术语
- terms = await self._extract_key_terms(direction)
-
- # 构建多级查询策略
- queries = []
- if len(terms) >= 2:
- queries.append(f'"{terms[0]}" AND "{terms[1]}"')
- if len(terms) >= 3:
- queries.append(f'"{terms[0]}" AND "{terms[2]}"')
- queries.append(f'"{terms[0]}" OR "{terms[1] if len(terms) > 1 else terms[0]}"')
- queries.append(terms[0])
-
- # 逐一尝试查询
- for query in queries:
- logger.info(f"Trying search query: {query}")
- papers = await self.arxiv_client.search_papers(query, max_results)
- if len(papers) >= 2: # 找到足够论文则返回
- return papers[:max_results]
-
- # 所有查询失败,返回示例论文
- return self._get_example_papers(direction)
- async def extract_keywords_only(self, research_intent):
- """只提取关键词,不进行其他处理"""
- logging.info(f"Extracting keywords for: {research_intent}")
-
- try:
- # 直接使用LLMClient的extract_keywords方法
- result = await self.llm_client.extract_keywords(research_topic=research_intent, original_language="auto")
- return result
- except Exception as e:
- logging.error(f"Error extracting keywords: {str(e)}")
- # 返回一个默认结果,避免None
- return {
- "english_keywords": [],
- "original_keywords": [],
- "language": "en"
- }
- async def generate_directions_only(self, keywords: List[str], language: str) -> Dict[str, Any]:
- """仅生成研究方向"""
- logger.info(f"Generating directions for keywords: {keywords}")
- directions_data = await self.llm_client.generate_research_directions(keywords, language)
- logger.info(f"Generated research directions: {directions_data['english_directions']}")
- return directions_data
- async def search_papers_for_direction(self, direction: str, max_results: int = 4) -> List[Dict[str, Any]]:
- """仅为一个方向搜索论文"""
- query = await self._generate_search_query(direction)
- logger.info(f"Searching papers with query: {query}")
- papers = await self._search_papers_with_fallback(query=query, max_results=max_results)
- return papers
- async def generate_report_for_direction(
- self,
- direction: str,
- papers: List[Dict[str, Any]],
- target_language: str = "en"
- ) -> Dict[str, Any]:
- """仅为一个方向生成报告"""
- return await self._generate_direction_report(direction, papers, target_language)
- async def generate_enhanced_topics(self, keywords, language):
- """
- 基于关键词生成增强的研究主题
- 返回格式: [
- {
- "english_title": "Energy Transition Policies in China",
- "title": "中国的能源转型政策",
- "description": "研究中国在能源转型领域的政策制定与实施",
- "keywords": ["policy", "energy transition", "China"]
- },
- ...
- ]
- """
- prompt = f"""
- Based on the following keywords about a research area: {', '.join(keywords)}
-
- Generate 3-5 specific research topics that would be valuable to explore in this area.
-
- For each topic:
- 1. Provide a specific, focused research title (not too broad)
- 2. Add a brief description of what this research direction would investigate
- 3. List 3-5 specific keywords that best represent this research direction
-
- Format your response as a JSON array of objects with the following structure:
- [
- {{
- "english_title": "The English title",
- "description": "Brief description of this research direction",
- "keywords": ["keyword1", "keyword2", "keyword3"]
- }},
- ...
- ]
- """
-
- # Add logging to track the response
- logger.info(f"Generating enhanced topics for keywords: {keywords}")
-
- response = await self.llm_client.chat_completion(
- messages=[
- {"role": "system", "content": "You are a research assistant helping to identify valuable research topics and directions."},
- {"role": "user", "content": prompt}
- ],
- response_format={"type": "json_object"}
- )
-
- # Log the raw response for debugging
- logger.info(f"Raw LLM response: {response}")
-
- try:
- # 提取JSON响应
- content = response["choices"][0]["message"]["content"]
- logger.info(f"Parsing JSON content: {content}")
-
- # Try to handle different JSON formats that might be returned
- topics_data = json.loads(content)
-
- # 确保我们有一个topics字段,如果返回直接是数组,则进行调整
- if isinstance(topics_data, list):
- topics = topics_data
- elif "topics" in topics_data:
- topics = topics_data.get("topics", [])
- else:
- # Try to find any array in the response
- for key, value in topics_data.items():
- if isinstance(value, list) and len(value) > 0 and "english_title" in value[0]:
- topics = value
- break
- else:
- # If no suitable array is found, create basic topics
- logger.warning("Could not find topics array in response, creating basic topics")
- topics = []
- for keyword in keywords[:3]:
- topics.append({
- "english_title": f"Research on {keyword}",
- "description": f"Investigating various aspects of {keyword}",
- "keywords": [keyword]
- })
-
- # Log the extracted topics
- logger.info(f"Extracted {len(topics)} topics: {topics}")
-
- # 如果需要翻译
- if language != "en":
- translated_topics = []
- for topic in topics:
- # 翻译标题
- translation_prompt = f"Translate the following research title to {language}: \"{topic['english_title']}\""
- translation_response = await self.llm_client.chat_completion(
- messages=[
- {"role": "system", "content": "You are a professional translator."},
- {"role": "user", "content": translation_prompt}
- ]
- )
- translated_title = translation_response["choices"][0]["message"]["content"].strip().strip('"')
-
- # 添加翻译后的标题
- topic["title"] = translated_title
- translated_topics.append(topic)
-
- return translated_topics
- else:
- # 英文环境则复制标题
- for topic in topics:
- topic["title"] = topic["english_title"]
- return topics
-
- except Exception as e:
- logging.error(f"Error parsing enhanced topics: {str(e)}", exc_info=True)
- # 返回基础主题格式作为后备
- basic_topics = []
- for i, keyword in enumerate(keywords[:3]):
- basic_topics.append({
- "english_title": f"Research on {keyword}",
- "title": f"关于{keyword}的研究" if language != "en" else f"Research on {keyword}",
- "description": f"Investigating various aspects of {keyword}",
- "keywords": [keyword]
- })
- return basic_topics
- async def generate_search_keywords_for_topic(self, topic):
- """
- 为特定研究主题生成精确的搜索关键词
- """
- prompt = f"""
- Based on the following research topic:
-
- Title: {topic['english_title']}
- Description: {topic['description']}
- Initial keywords: {', '.join(topic['keywords'])}
-
- Generate 5-7 precise search keywords or phrases that would be most effective for finding
- relevant scientific papers on this specific topic. These should be more specific and targeted
- than the initial keywords.
-
- Format your response as a JSON array of strings, e.g. ["keyword1", "keyword2", ...]
- """
-
- try:
- # 添加错误处理
- response = await self.llm_client.chat_completion(
- messages=[
- {"role": "system", "content": "You are a research assistant helping to generate effective search keywords for academic literature."},
- {"role": "user", "content": prompt}
- ],
- response_format={"type": "json_object"}
- )
-
- content = response["choices"][0]["message"]["content"]
- keywords_data = json.loads(content)
-
- # 处理不同的返回格式
- if isinstance(keywords_data, list):
- return keywords_data
- elif "keywords" in keywords_data:
- return keywords_data["keywords"]
- else:
- # 提取数据中的第一个列表
- for key, value in keywords_data.items():
- if isinstance(value, list):
- return value
- # 后备选项
- return topic['keywords']
- except Exception as e:
- # 增强错误处理:记录详细错误并返回原始关键词
- logging.error(f"Error generating search keywords: {str(e)}", exc_info=True)
- logging.info(f"Falling back to original topic keywords due to API error")
- # 返回原始关键词作为后备
- return topic['keywords']
- async def search_papers_with_keywords(self, keywords):
- """
- 使用关键词搜索相关论文
- """
- all_papers = []
-
- # 对每个关键词进行搜索
- for keyword in keywords:
- try:
- results = await self.arxiv_client.search_papers(
- query=keyword,
- max_results=3 # 每个关键词获取少量相关性高的结果
- )
- all_papers.extend(results)
- except Exception as e:
- logging.error(f"Error searching papers for keyword '{keyword}': {str(e)}")
-
- # 去重
- unique_papers = []
- paper_ids = set()
- for paper in all_papers:
- if paper["id"] not in paper_ids:
- unique_papers.append(paper)
- paper_ids.add(paper["id"])
-
- return unique_papers
- async def cluster_papers_by_keywords(self, papers, search_keywords, topic):
- """
- 根据关键词和主题将论文进行聚类
- """
- if not papers or len(papers) < 3:
- # 如果论文太少,不进行聚类
- return {
- "papers": papers,
- "clusters": [{
- "id": 0,
- "name": topic["english_title"],
- "keywords": search_keywords[:3]
- }]
- }
-
- try:
- # 准备文本数据
- texts = [f"{p['title']} {p['summary']}" for p in papers]
-
- # 使用现有的聚类器或简单实现
- num_clusters = min(3, len(papers) // 2) # 动态确定聚类数
-
- # 调用现有的聚类方法
- cluster_results = self.paper_clusterer.cluster_papers(
- papers=papers,
- num_clusters=num_clusters
- ) if hasattr(self, 'paper_clusterer') else None
-
- if not cluster_results:
- # 简单替代方案:根据关键词匹配进行"聚类"
- clusters = []
- for i, keyword in enumerate(search_keywords[:3]):
- clusters.append({
- "id": i,
- "name": f"Research on {keyword}",
- "keywords": [keyword]
- })
-
- # 分配论文到"聚类"
- for paper in papers:
- best_match = 0
- best_score = 0
- for i, cluster in enumerate(clusters):
- # 简单相似度:关键词在标题或摘要中的出现次数
- score = paper['title'].lower().count(cluster['keywords'][0].lower()) + \
- paper['summary'].lower().count(cluster['keywords'][0].lower())
- if score > best_score:
- best_score = score
- best_match = i
- paper['cluster'] = best_match
-
- return {
- "papers": papers,
- "clusters": clusters
- }
- else:
- # 使用现有聚类结果
- return cluster_results
-
- except Exception as e:
- logging.error(f"Error clustering papers: {str(e)}")
- # 失败时返回未聚类的数据
- return {
- "papers": papers,
- "clusters": [{
- "id": 0,
- "name": topic["english_title"],
- "keywords": search_keywords[:3]
- }]
- }
- async def generate_enhanced_report(self, topic, clustered_papers, search_keywords, language):
- """
- 为主题生成增强研究报告 - 分段处理以减少单次API调用压力
- """
- papers = clustered_papers["papers"]
- clusters = clustered_papers["clusters"]
-
- try:
- # 准备聚类摘要 - 限制每个聚类的论文数量
- cluster_summaries = []
- for cluster in clusters:
- cluster_papers = [p for p in papers if p.get('cluster', 0) == cluster['id']]
- if cluster_papers:
- # 限制每个聚类最多3篇论文
- paper_titles = "\n".join([f"- {p['title']}" for p in cluster_papers[:3]])
- cluster_summaries.append(f"Cluster: {cluster['name']}\nKeywords: {', '.join(cluster['keywords'])}\nPapers:\n{paper_titles}")
-
- all_cluster_summaries = "\n\n".join(cluster_summaries[:3]) # 限制聚类数量
-
- # 拆分报告生成为多个部分
- # 1. 生成报告概述和主要发现
- intro_prompt = f"""
- Generate the introduction and main findings sections for a research report on:
-
- Research Topic: {topic['english_title']}
- Description: {topic['description']}
- Main Keywords: {', '.join(topic['keywords'][:5])}
-
- Based on the following paper clusters:
- {all_cluster_summaries}
-
- Include:
- 1. A brief introduction to the research topic (100 words)
- 2. Summary of main findings from the literature (200 words)
-
- Format with appropriate headings.
- """
-
- # 使用更小的max_tokens以减少单次API调用的压力
- intro_response = await self.llm_client.chat_completion(
- messages=[
- {"role": "system", "content": "You are a research assistant creating academic reports."},
- {"role": "user", "content": intro_prompt}
- ],
- max_tokens=1000
- )
-
- intro_content = intro_response["choices"][0]["message"]["content"]
-
- # 2. 生成研究差距和未来方向
- conclusion_prompt = f"""
- Generate the research gaps and future directions sections for a research report on:
-
- Research Topic: {topic['english_title']}
- Description: {topic['description']}
- Main Keywords: {', '.join(topic['keywords'][:5])}
-
- Based on the provided literature, identify:
- 1. Current research gaps (100 words)
- 2. Potential directions for future research (100 words)
-
- Format with appropriate headings.
- """
-
- conclusion_response = await self.llm_client.chat_completion(
- messages=[
- {"role": "system", "content": "You are a research assistant creating academic reports."},
- {"role": "user", "content": conclusion_prompt}
- ],
- max_tokens=800
- )
-
- conclusion_content = conclusion_response["choices"][0]["message"]["content"]
-
- # 合并内容
- english_content = f"{intro_content}\n\n{conclusion_content}"
-
- # 如果需要翻译
- if language != "en":
- try:
- # 拆分翻译为两部分
- translation_intro = await self.llm_client.chat_completion(
- messages=[
- {"role": "system", "content": "You are a professional translator specialized in academic content."},
- {"role": "user", "content": f"Translate the following research report section to {language}:\n\n{intro_content}"}
- ],
- max_tokens=1200
- )
-
- translation_conclusion = await self.llm_client.chat_completion(
- messages=[
- {"role": "system", "content": "You are a professional translator specialized in academic content."},
- {"role": "user", "content": f"Translate the following research report section to {language}:\n\n{conclusion_content}"}
- ],
- max_tokens=1000
- )
-
- translated_intro = translation_intro["choices"][0]["message"]["content"]
- translated_conclusion = translation_conclusion["choices"][0]["message"]["content"]
-
- translated_content = f"{translated_intro}\n\n{translated_conclusion}"
- except Exception as e:
- # 翻译失败时,使用原始英文内容
- logger.error(f"Translation failed: {str(e)}")
- translated_content = f"(翻译失败,显示原文) {english_content}"
-
- return {
- "english_content": english_content,
- "translated_content": translated_content
- }
- else:
- return {
- "english_content": english_content,
- "translated_content": english_content # 英文环境下两者相同
- }
- except Exception as e:
- # 生成报告失败时,提供一个简单的后备报告
- logger.error(f"Report generation failed: {str(e)}")
- fallback_content = f"""
- # 研究主题: {topic['english_title']}
- ## 概述
- 本报告本应提供关于"{topic['english_title']}"主题的详细研究分析,但由于API调用限制或网络问题,无法生成完整报告。
- ## 可用资源
- 我们已检索到该主题相关的{len(papers)}篇论文,您可以通过查看这些论文获取相关信息。
- ## 主要关键词
- {', '.join(search_keywords[:5] if search_keywords else [])}
- ## 建议
- 您可以重新尝试生成报告,或直接查看检索到的论文以获取更多信息。
- """
-
- return {
- "english_content": fallback_content,
- "translated_content": fallback_content
- }
|