435 lines
15 KiB
Python
435 lines
15 KiB
Python
"""
|
||
Word 文档 LLM 报告导出器
|
||
调用大模型生成专业的任务执行报告
|
||
"""
|
||
|
||
import json
|
||
import os
|
||
import re
|
||
from datetime import datetime
|
||
from typing import Dict, Any, Optional
|
||
|
||
|
||
class DocxLLMExporter:
|
||
"""Word 文档 LLM 报告导出器 - 调用大模型生成报告"""
|
||
|
||
# LLM 配置(从 config.yaml 加载)
|
||
LLM_CONFIG = {
|
||
'OPENAI_API_BASE': None,
|
||
'OPENAI_API_KEY': None,
|
||
'OPENAI_API_MODEL': None,
|
||
}
|
||
|
||
# Prompt 模板
|
||
PROMPT_TEMPLATE = """你是一位专业的项目管理顾问和报告分析师。你的任务是将以下任务执行数据生成一份详细、专业、结构化的执行报告。
|
||
|
||
## 任务基本信息
|
||
- 任务名称:{task_name}
|
||
|
||
## 任务大纲(规划阶段)
|
||
{task_outline}
|
||
|
||
## 执行结果
|
||
{rehearsal_log}
|
||
|
||
## 参与智能体
|
||
{agents}
|
||
|
||
## 智能体评分
|
||
{agent_scores}
|
||
|
||
---
|
||
|
||
## 报告要求
|
||
|
||
请生成一份完整的任务执行报告,包含以下章节:
|
||
|
||
### 1. 执行摘要
|
||
用 2-3 句话概括本次任务的整体执行情况。
|
||
|
||
### 2. 任务概述
|
||
- 任务背景与目标
|
||
- 任务范围与边界
|
||
|
||
### 3. 任务规划分析
|
||
- 任务拆解的合理性
|
||
- 智能体角色分配的优化建议
|
||
- 工作流程设计
|
||
|
||
### 4. 执行过程回顾
|
||
- 各阶段的完成情况
|
||
- 关键决策点
|
||
- 遇到的问题及解决方案
|
||
|
||
### 5. 成果产出分析
|
||
- 产出物的质量评估
|
||
- 产出与预期目标的匹配度
|
||
|
||
### 6. 团队协作分析
|
||
- 智能体之间的协作模式
|
||
- 信息传递效率
|
||
|
||
### 7. 质量评估
|
||
- 整体完成质量评分(1-10分)
|
||
- 各维度的具体评分及理由
|
||
|
||
### 8. 经验教训与改进建议
|
||
- 成功经验
|
||
- 存在的问题与不足
|
||
- 改进建议
|
||
|
||
---
|
||
|
||
## 输出格式要求
|
||
- 使用 Markdown 格式输出
|
||
- 语言:简体中文
|
||
- 适当使用列表、表格增强可读性
|
||
- 报告长度必须达到 4000-6000 字,每个章节都要详细展开,不要遗漏任何章节
|
||
- 每个章节的内容要充实,提供具体的分析和建议
|
||
- 注意:所有加粗标记必须成对出现,如 **文本**,不要单独使用 ** 或缺少结束标记
|
||
- 禁止使用 mermaid、graph TD、flowchart 等图表代码,如果需要描述流程请用纯文字描述
|
||
- 不要生成附录章节(如有关键参数对照表、工艺流程图等),如果确实需要附录再生成
|
||
- 不要在报告中显示"报告总字数"这样的统计信息
|
||
"""
|
||
|
||
def __init__(self):
|
||
self._load_llm_config()
|
||
|
||
def _load_llm_config(self):
|
||
"""从配置文件加载 LLM 配置"""
|
||
try:
|
||
import yaml
|
||
# 尝试多个可能的配置文件路径
|
||
possible_paths = [
|
||
os.path.join(os.path.dirname(os.path.dirname(__file__)), 'config', 'config.yaml'),
|
||
os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'backend', 'config', 'config.yaml'),
|
||
os.path.join(os.getcwd(), 'config', 'config.yaml'),
|
||
]
|
||
|
||
for config_path in possible_paths:
|
||
if os.path.exists(config_path):
|
||
with open(config_path, 'r', encoding='utf-8') as f:
|
||
config = yaml.safe_load(f)
|
||
if config:
|
||
self.LLM_CONFIG['OPENAI_API_BASE'] = config.get('OPENAI_API_BASE')
|
||
self.LLM_CONFIG['OPENAI_API_KEY'] = config.get('OPENAI_API_KEY')
|
||
self.LLM_CONFIG['OPENAI_API_MODEL'] = config.get('OPENAI_API_MODEL')
|
||
print(f"已加载 LLM 配置: {self.LLM_CONFIG['OPENAI_API_MODEL']}")
|
||
return
|
||
except Exception as e:
|
||
print(f"加载 LLM 配置失败: {e}")
|
||
|
||
def generate(self, task_data: Dict[str, Any], file_path: str) -> bool:
|
||
"""生成 Word 文档(调用 LLM 生成报告)"""
|
||
try:
|
||
# 1. 准备数据
|
||
task_name = task_data.get('task_name', '未命名任务')
|
||
task_outline = task_data.get('task_outline')
|
||
rehearsal_log = task_data.get('rehearsal_log')
|
||
agent_scores = task_data.get('agent_scores')
|
||
|
||
# 2. 提取参与智能体(从 task_outline 的 Collaboration Process 中提取)
|
||
agents = self._extract_agents(task_outline)
|
||
|
||
# 3. 过滤 agent_scores(只保留参与当前任务的智能体评分)
|
||
filtered_agent_scores = self._filter_agent_scores(agent_scores, agents)
|
||
|
||
# 4. 格式化数据为 JSON 字符串
|
||
task_outline_str = json.dumps(task_outline, ensure_ascii=False, indent=2) if task_outline else '无'
|
||
rehearsal_log_str = json.dumps(rehearsal_log, ensure_ascii=False, indent=2) if rehearsal_log else '无'
|
||
agents_str = ', '.join(agents) if agents else '无'
|
||
agent_scores_str = json.dumps(filtered_agent_scores, ensure_ascii=False, indent=2) if filtered_agent_scores else '无'
|
||
|
||
# 5. 构建 Prompt
|
||
prompt = self.PROMPT_TEMPLATE.format(
|
||
task_name=task_name,
|
||
task_outline=task_outline_str,
|
||
rehearsal_log=rehearsal_log_str,
|
||
agents=agents_str,
|
||
agent_scores=agent_scores_str
|
||
)
|
||
|
||
# 6. 调用 LLM 生成报告
|
||
print("正在调用大模型生成报告...")
|
||
report_content = self._call_llm(prompt)
|
||
|
||
if not report_content:
|
||
print("LLM 生成报告失败")
|
||
return False
|
||
|
||
# 7. 清理报告内容:去掉开头的"任务执行报告"标题(如果存在)
|
||
report_content = self._clean_report_title(report_content)
|
||
|
||
print(f"报告生成成功,长度: {len(report_content)} 字符")
|
||
|
||
# 8. 将 Markdown 转换为 Word 文档
|
||
self._save_as_word(report_content, file_path)
|
||
|
||
return True
|
||
|
||
except Exception as e:
|
||
print(f"Word LLM 导出失败: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
return False
|
||
|
||
def _clean_report_title(self, content: str) -> str:
|
||
"""清理报告开头的重复标题"""
|
||
lines = content.split('\n')
|
||
if not lines:
|
||
return content
|
||
|
||
# 检查第一行是否是"任务执行报告"
|
||
first_line = lines[0].strip()
|
||
if first_line == '任务执行报告' or first_line == '# 任务执行报告':
|
||
# 去掉第一行
|
||
lines = lines[1:]
|
||
# 去掉可能的空行
|
||
while lines and not lines[0].strip():
|
||
lines.pop(0)
|
||
|
||
return '\n'.join(lines)
|
||
|
||
def _extract_agents(self, task_outline: Any) -> list:
|
||
"""从 task_outline 中提取参与智能体列表"""
|
||
agents = set()
|
||
if not task_outline or not isinstance(task_outline, dict):
|
||
return []
|
||
|
||
collaboration_process = task_outline.get('Collaboration Process', [])
|
||
if not collaboration_process or not isinstance(collaboration_process, list):
|
||
return []
|
||
|
||
for step in collaboration_process:
|
||
if isinstance(step, dict):
|
||
agent_selection = step.get('AgentSelection', [])
|
||
if isinstance(agent_selection, list):
|
||
for agent in agent_selection:
|
||
if agent:
|
||
agents.add(agent)
|
||
|
||
return list(agents)
|
||
|
||
def _filter_agent_scores(self, agent_scores: Any, agents: list) -> dict:
|
||
"""过滤 agent_scores,只保留参与当前任务的智能体评分"""
|
||
if not agent_scores or not isinstance(agent_scores, dict):
|
||
return {}
|
||
|
||
if not agents:
|
||
return {}
|
||
|
||
filtered = {}
|
||
for step_id, step_data in agent_scores.items():
|
||
if not isinstance(step_data, dict):
|
||
continue
|
||
|
||
aspect_list = step_data.get('aspectList', [])
|
||
agent_scores_data = step_data.get('agentScores', {})
|
||
|
||
if not agent_scores_data:
|
||
continue
|
||
|
||
# 只保留在 agents 列表中的智能体评分
|
||
filtered_scores = {}
|
||
for agent_name, scores in agent_scores_data.items():
|
||
if agent_name in agents and isinstance(scores, dict):
|
||
filtered_scores[agent_name] = scores
|
||
|
||
if filtered_scores:
|
||
filtered[step_id] = {
|
||
'aspectList': aspect_list,
|
||
'agentScores': filtered_scores
|
||
}
|
||
|
||
return filtered
|
||
|
||
def _call_llm(self, prompt: str) -> str:
|
||
"""调用大模型 API 生成报告"""
|
||
try:
|
||
import openai
|
||
|
||
# 验证配置
|
||
if not self.LLM_CONFIG['OPENAI_API_KEY']:
|
||
print("错误: OPENAI_API_KEY 未配置")
|
||
return ""
|
||
if not self.LLM_CONFIG['OPENAI_API_BASE']:
|
||
print("错误: OPENAI_API_BASE 未配置")
|
||
return ""
|
||
if not self.LLM_CONFIG['OPENAI_API_MODEL']:
|
||
print("错误: OPENAI_API_MODEL 未配置")
|
||
return ""
|
||
|
||
# 配置 OpenAI 客户端
|
||
client = openai.OpenAI(
|
||
api_key=self.LLM_CONFIG['OPENAI_API_KEY'],
|
||
base_url=self.LLM_CONFIG['OPENAI_API_BASE']
|
||
)
|
||
|
||
# 调用 API
|
||
response = client.chat.completions.create(
|
||
model=self.LLM_CONFIG['OPENAI_API_MODEL'],
|
||
messages=[
|
||
{"role": "user", "content": prompt}
|
||
],
|
||
temperature=0.7,
|
||
max_tokens=12000,
|
||
)
|
||
|
||
if response and response.choices:
|
||
return response.choices[0].message.content
|
||
|
||
return ""
|
||
|
||
except ImportError:
|
||
print("请安装 openai 库: pip install openai")
|
||
return ""
|
||
except Exception as e:
|
||
print(f"调用 LLM 失败: {e}")
|
||
return ""
|
||
|
||
def _save_as_word(self, markdown_content: str, file_path: str):
|
||
"""将 Markdown 内容保存为 Word 文档"""
|
||
try:
|
||
from docx import Document
|
||
from docx.shared import Pt, Inches
|
||
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||
|
||
doc = Document()
|
||
|
||
# 提取文档标题(从第一个 # 标题获取)
|
||
lines = markdown_content.split('\n')
|
||
first_title = None
|
||
content_start = 0
|
||
|
||
for i, line in enumerate(lines):
|
||
line = line.strip()
|
||
if line.startswith('# '):
|
||
first_title = line[2:].strip()
|
||
content_start = i + 1
|
||
break
|
||
|
||
# 添加文档标题
|
||
if first_title:
|
||
title = doc.add_heading(first_title, level=0)
|
||
title.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
||
|
||
# 解析剩余的 Markdown 内容
|
||
remaining_content = '\n'.join(lines[content_start:])
|
||
self._parse_markdown_to_doc(remaining_content, doc)
|
||
|
||
# 添加时间戳
|
||
doc.add_paragraph(f"\n\n导出时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||
|
||
doc.save(file_path)
|
||
print(f"Word 文档已保存: {file_path}")
|
||
|
||
except ImportError:
|
||
print("请安装 python-docx 库: pip install python-docx")
|
||
raise
|
||
except Exception as e:
|
||
print(f"保存 Word 文档失败: {e}")
|
||
raise
|
||
|
||
def _parse_markdown_to_doc(self, markdown_content: str, doc):
|
||
"""解析 Markdown 内容并添加到 Word 文档"""
|
||
lines = markdown_content.split('\n')
|
||
i = 0
|
||
table_rows = []
|
||
in_table = False
|
||
|
||
while i < len(lines):
|
||
line = lines[i].rstrip()
|
||
|
||
# 空行处理
|
||
if not line:
|
||
in_table = False
|
||
if table_rows:
|
||
self._add_table_to_doc(table_rows, doc)
|
||
table_rows = []
|
||
i += 1
|
||
continue
|
||
|
||
# 表格分隔线检测(跳过 |---| 或 |:---| 等格式的行)
|
||
stripped = line.strip()
|
||
if stripped.startswith('|') and stripped.endswith('|') and '---' in stripped:
|
||
i += 1
|
||
continue
|
||
|
||
# 表格检测:检查是否是表格行
|
||
if '|' in line and line.strip().startswith('|'):
|
||
# 收集表格行
|
||
cells = [cell.strip() for cell in line.split('|')[1:-1]]
|
||
if cells and any(cells): # 跳过空行
|
||
table_rows.append(cells)
|
||
in_table = True
|
||
i += 1
|
||
continue
|
||
else:
|
||
# 如果之前在表格中,现在不是表格行了,添加表格
|
||
if in_table and table_rows:
|
||
self._add_table_to_doc(table_rows, doc)
|
||
table_rows = []
|
||
in_table = False
|
||
|
||
# 标题处理
|
||
if line.startswith('### '):
|
||
doc.add_heading(line[4:].strip(), level=3)
|
||
elif line.startswith('## '):
|
||
doc.add_heading(line[3:].strip(), level=1)
|
||
elif line.startswith('# '):
|
||
doc.add_heading(line[2:].strip(), level=0)
|
||
# 无序列表处理(去掉 • 或 - 符号)
|
||
elif line.startswith('- ') or line.startswith('* ') or line.startswith('• '):
|
||
# 去掉列表符号,保留内容
|
||
text = line[2:].strip() if line.startswith(('- ', '* ')) else line[1:].strip()
|
||
self._add_formatted_paragraph(text, doc, 'List Bullet')
|
||
# 普通段落(处理加粗)
|
||
else:
|
||
# 使用格式化方法处理加粗
|
||
self._add_formatted_paragraph(line, doc)
|
||
|
||
i += 1
|
||
|
||
# 处理最后的表格
|
||
if table_rows:
|
||
self._add_table_to_doc(table_rows, doc)
|
||
|
||
def _add_table_to_doc(self, table_rows: list, doc):
|
||
"""将表格行添加到 Word 文档"""
|
||
if not table_rows:
|
||
return
|
||
|
||
# 创建表格
|
||
table = doc.add_table(rows=len(table_rows), cols=len(table_rows[0]))
|
||
table.style = 'Light Grid Accent 1'
|
||
|
||
for i, row_data in enumerate(table_rows):
|
||
row = table.rows[i]
|
||
for j, cell_text in enumerate(row_data):
|
||
cell = row.cells[j]
|
||
cell.text = ''
|
||
|
||
# 处理加粗
|
||
parts = re.split(r'(\*\*.+?\*\*)', cell_text)
|
||
for part in parts:
|
||
if part.startswith('**') and part.endswith('**'):
|
||
run = cell.paragraphs[0].add_run(part[2:-2])
|
||
run.bold = True
|
||
elif part:
|
||
cell.paragraphs[0].add_run(part)
|
||
|
||
def _add_formatted_paragraph(self, text: str, doc, style: str = None):
|
||
"""添加带格式的段落"""
|
||
# 处理加粗文本
|
||
para = doc.add_paragraph(style=style)
|
||
|
||
# 分割文本处理加粗
|
||
parts = re.split(r'(\*\*.+?\*\*)', text)
|
||
for part in parts:
|
||
if part.startswith('**') and part.endswith('**'):
|
||
# 加粗文本
|
||
run = para.add_run(part[2:-2])
|
||
run.bold = True
|
||
else:
|
||
para.add_run(part)
|