exporter.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. """专栏导出工具"""
  2. import os
  3. import json
  4. from typing import Dict, Any
  5. from datetime import datetime
  6. class ColumnExporter:
  7. """专栏导出工具"""
  8. @staticmethod
  9. def export_to_files(column_data: Dict[str, Any], output_dir: str = "column_output"):
  10. """
  11. 导出专栏到文件
  12. Args:
  13. column_data: 专栏数据
  14. output_dir: 输出目录
  15. """
  16. # 创建输出目录
  17. os.makedirs(output_dir, exist_ok=True)
  18. print(f"\n{'='*70}")
  19. print(f"▸ 开始导出专栏文件...")
  20. print(f"{'='*70}\n")
  21. # 导出完整JSON
  22. json_path = os.path.join(output_dir, 'column_data.json')
  23. with open(json_path, 'w', encoding='utf-8') as f:
  24. json.dump(column_data, f, ensure_ascii=False, indent=2, default=str)
  25. print(f"▸ 已保存完整数据:{json_path}")
  26. # 导出每篇文章
  27. for article in column_data['articles']:
  28. # 安全的文件名
  29. safe_title = "".join(c for c in article['title'] if c.isalnum() or c in (' ', '-', '_')).strip()
  30. filename = f"{article['id']}_{safe_title}.md"
  31. filepath = os.path.join(output_dir, filename)
  32. with open(filepath, 'w', encoding='utf-8') as f:
  33. # 写入文章内容
  34. f.write(article['content'])
  35. # 附加元数据
  36. f.write(f"\n\n---\n\n")
  37. f.write(f"## 文章元数据\n\n")
  38. f.write(f"- **文章ID**: {article['id']}\n")
  39. f.write(f"- **字数**: {article['word_count']}\n")
  40. f.write(f"- **评审分数**: {article['metadata'].get('review_score', 'N/A')}\n")
  41. f.write(f"- **评审等级**: {article['metadata'].get('review_grade', 'N/A')}\n")
  42. if article.get('has_revisions'):
  43. f.write(f"- **修改次数**: {article['revision_count']}\n")
  44. if 'revision_summary' in article['metadata']:
  45. f.write(f"- **主要修改**:\n")
  46. for change in article['metadata']['revision_summary'].get('major_changes', []):
  47. f.write(f" - {change}\n")
  48. print(f"▸ 已保存文章:{filepath}")
  49. # 导出统计报告
  50. report_path = os.path.join(output_dir, 'REPORT.md')
  51. ColumnExporter._export_report(column_data, report_path)
  52. print(f"▸ 已保存统计报告:{report_path}")
  53. print(f"\n{'='*70}")
  54. print(f"▸ 导出完成!输出目录:{output_dir}")
  55. print(f"{'='*70}\n")
  56. @staticmethod
  57. def _export_report(column_data: Dict[str, Any], filepath: str):
  58. """导出统计报告"""
  59. with open(filepath, 'w', encoding='utf-8') as f:
  60. f.write(f"# {column_data['column_info']['title']}\n\n")
  61. f.write(f"## 专栏信息\n\n")
  62. f.write(f"- **简介**: {column_data['column_info']['description']}\n")
  63. f.write(f"- **目标读者**: {column_data['column_info']['target_audience']}\n")
  64. f.write(f"- **文章数量**: {column_data['column_info']['topic_count']}\n\n")
  65. f.write(f"## 内容统计\n\n")
  66. stats = column_data['statistics']
  67. f.write(f"- **总字数**: {stats['total_words']:,}\n")
  68. f.write(f"- **平均每篇**: {stats['avg_words_per_article']:,} 字\n")
  69. f.write(f"- **内容节点**: {stats['total_nodes']}\n")
  70. # 适配旧版字段(如果存在)
  71. if 'approval_rate' in stats:
  72. f.write(f"- **直接通过**: {stats.get('approved_nodes', 0)} ({stats['approval_rate']})\n")
  73. f.write(f"- **修改优化**: {stats.get('revised_nodes', 0)} ({stats['revision_rate']})\n")
  74. # 质量报告(如果有)
  75. if 'quality_report' in column_data:
  76. f.write(f"\n## 质量报告\n\n")
  77. quality = column_data['quality_report']
  78. f.write(f"- **平均分数**: {quality['average_score']:.1f}/100\n")
  79. f.write(f"- **分数范围**: {quality['min_score']}-{quality['max_score']}\n")
  80. f.write(f"- **评估节点数**: {quality['total_evaluated']}\n\n")
  81. f.write(f"### 评级分布\n\n")
  82. for grade, count in quality['grade_distribution'].items():
  83. if count > 0:
  84. percentage = count / quality['total_evaluated'] * 100 if quality['total_evaluated'] > 0 else 0
  85. f.write(f"- **{grade}**: {count} 个 ({percentage:.1f}%)\n")
  86. # Agent 模式信息(新版)
  87. if 'agent_modes' in column_data:
  88. f.write(f"\n## Agent 模式\n\n")
  89. modes = column_data['agent_modes']
  90. f.write(f"- **Planner**: {modes.get('planner', 'N/A')}\n")
  91. f.write(f"- **Writer**: {modes.get('writer', 'N/A')}\n")
  92. # 创作统计
  93. if 'creation_stats' in column_data:
  94. creation = column_data['creation_stats']
  95. if creation.get('start_time') and creation.get('end_time'):
  96. # 处理可能是字符串或datetime对象的情况
  97. start_time = creation['start_time']
  98. end_time = creation['end_time']
  99. if isinstance(start_time, str):
  100. try:
  101. start_time = datetime.fromisoformat(start_time)
  102. end_time = datetime.fromisoformat(end_time)
  103. except:
  104. pass
  105. if isinstance(start_time, datetime) and isinstance(end_time, datetime):
  106. duration = (end_time - start_time).total_seconds()
  107. f.write(f"\n## 创作统计\n\n")
  108. f.write(f"- **开始时间**: {start_time.strftime('%Y-%m-%d %H:%M:%S')}\n")
  109. f.write(f"- **结束时间**: {end_time.strftime('%Y-%m-%d %H:%M:%S')}\n")
  110. f.write(f"- **总耗时**: {duration:.1f} 秒 ({duration/60:.1f} 分钟)\n")
  111. f.write(f"- **生成调用**: {creation.get('total_generations', 0)}\n")
  112. if creation.get('total_reviews') > 0:
  113. f.write(f"- **评审次数**: {creation.get('total_reviews')}\n")
  114. if creation.get('total_revisions') > 0:
  115. f.write(f"- **修改次数**: {creation.get('total_revisions')}\n")
  116. f.write(f"\n## 文章列表\n\n")
  117. for idx, article in enumerate(column_data['articles'], 1):
  118. f.write(f"{idx}. **{article['title']}** ({article['word_count']} 字)\n")
  119. # 显示 Agent 模式生成的元数据
  120. meta = article.get('metadata', {})
  121. if 'agent_mode' in meta:
  122. f.write(f" - 模式: {meta['agent_mode']}\n")
  123. if 'review_score' in meta:
  124. f.write(f" - 评分: {meta['review_score']}/100\n")
  125. f.write("\n")