AgentCoord/backend/AgentCoord/Export/docx_llm.py

"""
Word 文档 LLM 报告导出器
调用大模型生成专业的任务执行报告
"""

import json
import os
import re
from datetime import datetime
from typing import Dict, Any, Optional


class DocxLLMExporter:
    """Word 文档 LLM 报告导出器 - 调用大模型生成报告"""

    # LLM 配置（从 config.yaml 加载）
    LLM_CONFIG = {
        'OPENAI_API_BASE': None,
        'OPENAI_API_KEY': None,
        'OPENAI_API_MODEL': None,
    }

    # Prompt 模板
    PROMPT_TEMPLATE = """你是一位专业的项目管理顾问和报告分析师。你的任务是将以下任务执行数据生成一份详细、专业、结构化的执行报告。

## 任务基本信息
- 任务名称：{task_name}

## 任务大纲（规划阶段）
{task_outline}

## 执行结果
{rehearsal_log}

## 参与智能体
{agents}

## 智能体评分
{agent_scores}

---

## 报告要求

请生成一份完整的任务执行报告，包含以下章节：

### 1. 执行摘要
用 2-3 句话概括本次任务的整体执行情况。

### 2. 任务概述
- 任务背景与目标
- 任务范围与边界

### 3. 任务规划分析
- 任务拆解的合理性
- 智能体角色分配的优化建议
- 工作流程设计

### 4. 执行过程回顾
- 各阶段的完成情况
- 关键决策点
- 遇到的问题及解决方案

### 5. 成果产出分析
- 产出物的质量评估
- 产出与预期目标的匹配度

### 6. 团队协作分析
- 智能体之间的协作模式
- 信息传递效率

### 7. 质量评估
- 整体完成质量评分（1-10分）
- 各维度的具体评分及理由

### 8. 经验教训与改进建议
- 成功经验
- 存在的问题与不足
- 改进建议

---

## 输出格式要求
- 使用 Markdown 格式输出
- 语言：简体中文
- 适当使用列表、表格增强可读性
- 报告长度必须达到 4000-6000 字，每个章节都要详细展开，不要遗漏任何章节
- 每个章节的内容要充实，提供具体的分析和建议
- 注意：所有加粗标记必须成对出现，如 **文本**，不要单独使用 ** 或缺少结束标记
- 禁止使用 mermaid、graph TD、flowchart 等图表代码，如果需要描述流程请用纯文字描述
- 不要生成附录章节（如有关键参数对照表、工艺流程图等），如果确实需要附录再生成
- 不要在报告中显示"报告总字数"这样的统计信息
"""

    def __init__(self):
        self._load_llm_config()

    def _load_llm_config(self):
        """从配置文件加载 LLM 配置"""
        try:
            import yaml
            # 尝试多个可能的配置文件路径
            possible_paths = [
                os.path.join(os.path.dirname(os.path.dirname(__file__)), 'config', 'config.yaml'),
                os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'backend', 'config', 'config.yaml'),
                os.path.join(os.getcwd(), 'config', 'config.yaml'),
            ]

            for config_path in possible_paths:
                if os.path.exists(config_path):
                    with open(config_path, 'r', encoding='utf-8') as f:
                        config = yaml.safe_load(f)
                        if config:
                            self.LLM_CONFIG['OPENAI_API_BASE'] = config.get('OPENAI_API_BASE')
                            self.LLM_CONFIG['OPENAI_API_KEY'] = config.get('OPENAI_API_KEY')
                            self.LLM_CONFIG['OPENAI_API_MODEL'] = config.get('OPENAI_API_MODEL')
                            print(f"已加载 LLM 配置: {self.LLM_CONFIG['OPENAI_API_MODEL']}")
                            return
        except Exception as e:
            print(f"加载 LLM 配置失败: {e}")

    def generate(self, task_data: Dict[str, Any], file_path: str) -> bool:
        """生成 Word 文档（调用 LLM 生成报告）"""
        try:
            # 1. 准备数据
            task_name = task_data.get('task_name', '未命名任务')
            task_outline = task_data.get('task_outline')
            rehearsal_log = task_data.get('rehearsal_log')
            agent_scores = task_data.get('agent_scores')

            # 2. 提取参与智能体（从 task_outline 的 Collaboration Process 中提取）
            agents = self._extract_agents(task_outline)

            # 3. 过滤 agent_scores（只保留参与当前任务的智能体评分）
            filtered_agent_scores = self._filter_agent_scores(agent_scores, agents)

            # 4. 格式化数据为 JSON 字符串
            task_outline_str = json.dumps(task_outline, ensure_ascii=False, indent=2) if task_outline else '无'
            rehearsal_log_str = json.dumps(rehearsal_log, ensure_ascii=False, indent=2) if rehearsal_log else '无'
            agents_str = ', '.join(agents) if agents else '无'
            agent_scores_str = json.dumps(filtered_agent_scores, ensure_ascii=False, indent=2) if filtered_agent_scores else '无'

            # 5. 构建 Prompt
            prompt = self.PROMPT_TEMPLATE.format(
                task_name=task_name,
                task_outline=task_outline_str,
                rehearsal_log=rehearsal_log_str,
                agents=agents_str,
                agent_scores=agent_scores_str
            )

            # 6. 调用 LLM 生成报告
            print("正在调用大模型生成报告...")
            report_content = self._call_llm(prompt)

            if not report_content:
                print("LLM 生成报告失败")
                return False

            # 7. 清理报告内容：去掉开头的"任务执行报告"标题（如果存在）
            report_content = self._clean_report_title(report_content)

            print(f"报告生成成功，长度: {len(report_content)} 字符")

            # 8. 将 Markdown 转换为 Word 文档
            self._save_as_word(report_content, file_path)

            return True

        except Exception as e:
            print(f"Word LLM 导出失败: {e}")
            import traceback
            traceback.print_exc()
            return False

    def _clean_report_title(self, content: str) -> str:
        """清理报告开头的重复标题"""
        lines = content.split('\n')
        if not lines:
            return content

        # 检查第一行是否是"任务执行报告"
        first_line = lines[0].strip()
        if first_line == '任务执行报告' or first_line == '# 任务执行报告':
            # 去掉第一行
            lines = lines[1:]
            # 去掉可能的空行
            while lines and not lines[0].strip():
                lines.pop(0)

        return '\n'.join(lines)

    def _extract_agents(self, task_outline: Any) -> list:
        """从 task_outline 中提取参与智能体列表"""
        agents = set()
        if not task_outline or not isinstance(task_outline, dict):
            return []

        collaboration_process = task_outline.get('Collaboration Process', [])
        if not collaboration_process or not isinstance(collaboration_process, list):
            return []

        for step in collaboration_process:
            if isinstance(step, dict):
                agent_selection = step.get('AgentSelection', [])
                if isinstance(agent_selection, list):
                    for agent in agent_selection:
                        if agent:
                            agents.add(agent)

        return list(agents)

    def _filter_agent_scores(self, agent_scores: Any, agents: list) -> dict:
        """过滤 agent_scores，只保留参与当前任务的智能体评分"""
        if not agent_scores or not isinstance(agent_scores, dict):
            return {}

        if not agents:
            return {}

        filtered = {}
        for step_id, step_data in agent_scores.items():
            if not isinstance(step_data, dict):
                continue

            aspect_list = step_data.get('aspectList', [])
            agent_scores_data = step_data.get('agentScores', {})

            if not agent_scores_data:
                continue

            # 只保留在 agents 列表中的智能体评分
            filtered_scores = {}
            for agent_name, scores in agent_scores_data.items():
                if agent_name in agents and isinstance(scores, dict):
                    filtered_scores[agent_name] = scores

            if filtered_scores:
                filtered[step_id] = {
                    'aspectList': aspect_list,
                    'agentScores': filtered_scores
                }

        return filtered

    def _call_llm(self, prompt: str) -> str:
        """调用大模型 API 生成报告"""
        try:
            import openai

            # 验证配置
            if not self.LLM_CONFIG['OPENAI_API_KEY']:
                print("错误: OPENAI_API_KEY 未配置")
                return ""
            if not self.LLM_CONFIG['OPENAI_API_BASE']:
                print("错误: OPENAI_API_BASE 未配置")
                return ""
            if not self.LLM_CONFIG['OPENAI_API_MODEL']:
                print("错误: OPENAI_API_MODEL 未配置")
                return ""

            # 配置 OpenAI 客户端
            client = openai.OpenAI(
                api_key=self.LLM_CONFIG['OPENAI_API_KEY'],
                base_url=self.LLM_CONFIG['OPENAI_API_BASE']
            )

            # 调用 API
            response = client.chat.completions.create(
                model=self.LLM_CONFIG['OPENAI_API_MODEL'],
                messages=[
                    {"role": "user", "content": prompt}
                ],
                temperature=0.7,
                max_tokens=12000,
            )

            if response and response.choices:
                return response.choices[0].message.content

            return ""

        except ImportError:
            print("请安装 openai 库: pip install openai")
            return ""
        except Exception as e:
            print(f"调用 LLM 失败: {e}")
            return ""

    def _save_as_word(self, markdown_content: str, file_path: str):
        """将 Markdown 内容保存为 Word 文档"""
        try:
            from docx import Document
            from docx.shared import Pt, Inches
            from docx.enum.text import WD_ALIGN_PARAGRAPH

            doc = Document()

            # 提取文档标题（从第一个 # 标题获取）
            lines = markdown_content.split('\n')
            first_title = None
            content_start = 0

            for i, line in enumerate(lines):
                line = line.strip()
                if line.startswith('# '):
                    first_title = line[2:].strip()
                    content_start = i + 1
                    break

            # 添加文档标题
            if first_title:
                title = doc.add_heading(first_title, level=0)
                title.alignment = WD_ALIGN_PARAGRAPH.CENTER

            # 解析剩余的 Markdown 内容
            remaining_content = '\n'.join(lines[content_start:])
            self._parse_markdown_to_doc(remaining_content, doc)

            # 添加时间戳
            doc.add_paragraph(f"\n\n导出时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

            doc.save(file_path)
            print(f"Word 文档已保存: {file_path}")

        except ImportError:
            print("请安装 python-docx 库: pip install python-docx")
            raise
        except Exception as e:
            print(f"保存 Word 文档失败: {e}")
            raise

    def _parse_markdown_to_doc(self, markdown_content: str, doc):
        """解析 Markdown 内容并添加到 Word 文档"""
        lines = markdown_content.split('\n')
        i = 0
        table_rows = []
        in_table = False

        while i < len(lines):
            line = lines[i].rstrip()

            # 空行处理
            if not line:
                in_table = False
                if table_rows:
                    self._add_table_to_doc(table_rows, doc)
                    table_rows = []
                i += 1
                continue

            # 表格分隔线检测（跳过 |---| 或 |:---| 等格式的行）
            stripped = line.strip()
            if stripped.startswith('|') and stripped.endswith('|') and '---' in stripped:
                i += 1
                continue

            # 表格检测：检查是否是表格行
            if '|' in line and line.strip().startswith('|'):
                # 收集表格行
                cells = [cell.strip() for cell in line.split('|')[1:-1]]
                if cells and any(cells):  # 跳过空行
                    table_rows.append(cells)
                in_table = True
                i += 1
                continue
            else:
                # 如果之前在表格中，现在不是表格行了，添加表格
                if in_table and table_rows:
                    self._add_table_to_doc(table_rows, doc)
                    table_rows = []
                in_table = False

            # 标题处理
            if line.startswith('### '):
                doc.add_heading(line[4:].strip(), level=3)
            elif line.startswith('## '):
                doc.add_heading(line[3:].strip(), level=1)
            elif line.startswith('# '):
                doc.add_heading(line[2:].strip(), level=0)
            # 无序列表处理（去掉 • 或 - 符号）
            elif line.startswith('- ') or line.startswith('* ') or line.startswith('• '):
                # 去掉列表符号，保留内容
                text = line[2:].strip() if line.startswith(('- ', '* ')) else line[1:].strip()
                self._add_formatted_paragraph(text, doc, 'List Bullet')
            # 普通段落（处理加粗）
            else:
                # 使用格式化方法处理加粗
                self._add_formatted_paragraph(line, doc)

            i += 1

        # 处理最后的表格
        if table_rows:
            self._add_table_to_doc(table_rows, doc)

    def _add_table_to_doc(self, table_rows: list, doc):
        """将表格行添加到 Word 文档"""
        if not table_rows:
            return

        # 创建表格
        table = doc.add_table(rows=len(table_rows), cols=len(table_rows[0]))
        table.style = 'Light Grid Accent 1'

        for i, row_data in enumerate(table_rows):
            row = table.rows[i]
            for j, cell_text in enumerate(row_data):
                cell = row.cells[j]
                cell.text = ''

                # 处理加粗
                parts = re.split(r'(\*\*.+?\*\*)', cell_text)
                for part in parts:
                    if part.startswith('**') and part.endswith('**'):
                        run = cell.paragraphs[0].add_run(part[2:-2])
                        run.bold = True
                    elif part:
                        cell.paragraphs[0].add_run(part)

    def _add_formatted_paragraph(self, text: str, doc, style: str = None):
        """添加带格式的段落"""
        # 处理加粗文本
        para = doc.add_paragraph(style=style)

        # 分割文本处理加粗
        parts = re.split(r'(\*\*.+?\*\*)', text)
        for part in parts:
            if part.startswith('**') and part.endswith('**'):
                # 加粗文本
                run = para.add_run(part[2:-2])
                run.bold = True
            else:
                para.add_run(part)