analyze_dimensions.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327
  1. """
  2. 维度分析主脚本 - 从报告中提取维度并修正themes
  3. 整合报告加载、维度提取、分析和themes修正建议
  4. """
  5. import sys
  6. import json
  7. import yaml
  8. import argparse
  9. from pathlib import Path
  10. from datetime import datetime
  11. from typing import Dict, List
  12. # 设置控制台编码为UTF-8(Windows)
  13. if sys.platform == 'win32':
  14. import io
  15. sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
  16. sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
  17. import dimension_analysis as da
  18. import extract_dimensions as ed
  19. import manage_themes as mt
  20. def load_themes(themes_file: Path) -> List[str]:
  21. """加载themes"""
  22. return mt.load_themes(themes_file)
  23. def save_themes(themes_file: Path, themes: List[str]):
  24. """保存themes"""
  25. return mt.save_themes(themes_file, themes)
  26. def apply_theme_suggestions(suggestions: Dict[str, List[Dict]], themes: List[str], themes_file: Path, selected_indices: Dict[str, List[int]]) -> List[str]:
  27. """应用用户选择的themes建议
  28. Args:
  29. suggestions: 建议字典
  30. themes: 当前themes列表
  31. themes_file: themes文件路径
  32. selected_indices: 用户选择的序号字典,格式:{'add': [1, 3], 'remove': [2]}
  33. """
  34. updated_themes = themes.copy()
  35. # 处理添加建议(序号从1开始)
  36. add_suggestions = suggestions.get('add', [])
  37. for idx in selected_indices.get('add', []):
  38. if 1 <= idx <= len(add_suggestions):
  39. sug = add_suggestions[idx - 1] # 转换为0-based索引
  40. theme = sug.get('theme')
  41. if theme and theme not in updated_themes:
  42. updated_themes.append(theme)
  43. print(f"✅ 已添加theme: {theme}")
  44. # 处理删除建议(序号从1开始)
  45. remove_suggestions = suggestions.get('remove', [])
  46. for idx in selected_indices.get('remove', []):
  47. if 1 <= idx <= len(remove_suggestions):
  48. sug = remove_suggestions[idx - 1] # 转换为0-based索引
  49. theme = sug.get('theme')
  50. if theme and theme in updated_themes:
  51. updated_themes.remove(theme)
  52. print(f"✅ 已删除theme: {theme}")
  53. # 保存
  54. if updated_themes != themes:
  55. save_themes(themes_file, updated_themes)
  56. return updated_themes
  57. return themes
  58. def present_theme_suggestions(suggestions: Dict[str, List[Dict]]):
  59. """展示themes建议"""
  60. print("\n" + "=" * 70)
  61. print("📋 Themes修正建议")
  62. print("=" * 70)
  63. all_count = sum(len(v) for k, v in suggestions.items() if k != 'theme_match_analysis')
  64. if all_count == 0:
  65. print("✅ 暂无themes修正建议")
  66. return
  67. # 展示添加建议
  68. if suggestions.get('add'):
  69. print("\n【添加Theme建议】")
  70. for i, sug in enumerate(suggestions['add'], 1):
  71. print(f" {i}. {sug['theme']}")
  72. print(f" 原因: {sug['reason']}")
  73. print(f" 频率: {sug.get('frequency', 0)*100:.1f}%")
  74. # 展示删除建议
  75. if suggestions.get('remove'):
  76. print("\n【删除Theme建议】")
  77. for i, sug in enumerate(suggestions['remove'], 1):
  78. print(f" {i}. {sug['theme']}")
  79. print(f" 原因: {sug['reason']}")
  80. print(f" 匹配率: {sug.get('match_rate', 0)*100:.1f}%")
  81. print("\n" + "=" * 70)
  82. def get_batch_user_confirmation(add_suggestions: List[Dict], remove_suggestions: List[Dict]) -> Dict[str, List[int]]:
  83. """批量获取用户确认
  84. Args:
  85. add_suggestions: 添加建议列表
  86. remove_suggestions: 删除建议列表
  87. Returns:
  88. Dict包含 'add' 和 'remove' 两个列表,列表中是用户选择的序号(从1开始)
  89. """
  90. selected = {'add': [], 'remove': []}
  91. # 获取添加建议的确认
  92. if add_suggestions:
  93. print("\n" + "=" * 70)
  94. print("📥 添加Theme确认")
  95. print("=" * 70)
  96. print("请输入要添加的Theme序号(多个序号用逗号或空格分隔,如:1,3,5 或 1 3 5)")
  97. print("直接回车表示不添加任何Theme")
  98. while True:
  99. user_input = input("添加序号: ").strip()
  100. if not user_input:
  101. break
  102. # 解析输入(支持逗号或空格分隔)
  103. try:
  104. # 尝试用逗号分隔
  105. if ',' in user_input:
  106. numbers = [int(x.strip()) for x in user_input.split(',') if x.strip()]
  107. else:
  108. # 用空格分隔
  109. numbers = [int(x.strip()) for x in user_input.split() if x.strip()]
  110. # 验证序号范围
  111. valid_numbers = [n for n in numbers if 1 <= n <= len(add_suggestions)]
  112. if len(valid_numbers) != len(numbers):
  113. invalid = [n for n in numbers if n < 1 or n > len(add_suggestions)]
  114. print(f"⚠️ 序号 {invalid} 超出范围(1-{len(add_suggestions)}),已忽略")
  115. selected['add'] = valid_numbers
  116. break
  117. except ValueError:
  118. print("⚠️ 输入格式错误,请输入数字序号(用逗号或空格分隔)")
  119. # 获取删除建议的确认
  120. if remove_suggestions:
  121. print("\n" + "=" * 70)
  122. print("📤 删除Theme确认")
  123. print("=" * 70)
  124. print("请输入要删除的Theme序号(多个序号用逗号或空格分隔,如:1,2 或 1 2)")
  125. print("直接回车表示不删除任何Theme")
  126. while True:
  127. user_input = input("删除序号: ").strip()
  128. if not user_input:
  129. break
  130. # 解析输入(支持逗号或空格分隔)
  131. try:
  132. # 尝试用逗号分隔
  133. if ',' in user_input:
  134. numbers = [int(x.strip()) for x in user_input.split(',') if x.strip()]
  135. else:
  136. # 用空格分隔
  137. numbers = [int(x.strip()) for x in user_input.split() if x.strip()]
  138. # 验证序号范围
  139. valid_numbers = [n for n in numbers if 1 <= n <= len(remove_suggestions)]
  140. if len(valid_numbers) != len(numbers):
  141. invalid = [n for n in numbers if n < 1 or n > len(remove_suggestions)]
  142. print(f"⚠️ 序号 {invalid} 超出范围(1-{len(remove_suggestions)}),已忽略")
  143. selected['remove'] = valid_numbers
  144. break
  145. except ValueError:
  146. print("⚠️ 输入格式错误,请输入数字序号(用逗号或空格分隔)")
  147. return selected
  148. def main():
  149. """主函数"""
  150. parser = argparse.ArgumentParser(description="维度分析工具 - 从报告中提取维度并修正themes")
  151. parser.add_argument(
  152. "--extract",
  153. action="store_true",
  154. help="重新提取维度(从报告文件中)"
  155. )
  156. parser.add_argument(
  157. "--interactive",
  158. action="store_true",
  159. help="交互模式:展示建议并获取用户确认"
  160. )
  161. parser.add_argument(
  162. "--base-dir",
  163. type=str,
  164. default=None,
  165. help="基础目录路径(默认为脚本所在目录)"
  166. )
  167. args = parser.parse_args()
  168. # 确定基础目录
  169. if args.base_dir:
  170. base_dir = Path(args.base_dir)
  171. else:
  172. base_dir = Path(__file__).parent
  173. print("=" * 70)
  174. print("维度分析工具 - 从报告中提取维度并修正themes")
  175. print("=" * 70)
  176. # 1. 加载或提取维度
  177. print("\n📊 正在处理维度提取结果...")
  178. extraction_results = []
  179. if args.extract:
  180. # 重新提取维度
  181. print("🔄 从报告文件中提取维度...")
  182. llm = ed.init_llm()
  183. if not llm:
  184. print("❌ LLM未初始化,无法提取维度")
  185. return
  186. # 加载themes作为参考
  187. themes_file = base_dir / "themes.yaml"
  188. existing_themes = mt.load_themes(themes_file)
  189. extraction_results = ed.batch_extract_dimensions(base_dir, report_type=None, llm=llm, existing_themes=existing_themes)
  190. print(f"✅ 从报告中提取了 {len(extraction_results)} 个维度的提取结果")
  191. else:
  192. # 加载已有的提取结果
  193. extraction_results = ed.load_extraction_results(base_dir)
  194. print(f"✅ 加载了 {len(extraction_results)} 个提取结果")
  195. if len(extraction_results) == 0:
  196. print("⚠️ 未找到提取结果,使用 --extract 参数可以重新提取")
  197. print("💡 提示: 运行 'python extract_dimensions.py' 来提取维度")
  198. if len(extraction_results) == 0:
  199. print("❌ 没有维度提取结果,无法进行分析")
  200. return
  201. # 2. 加载themes
  202. themes_file = base_dir / "themes.yaml"
  203. themes = load_themes(themes_file)
  204. if not themes:
  205. print("⚠️ 当前没有themes,请先设置themes")
  206. print("💡 提示: 运行 'python manage_themes.py' 来管理themes")
  207. # 使用空列表继续,以便生成添加建议
  208. print(f"📋 当前themes: {themes}")
  209. # 3. 统计维度
  210. dim_stats = da.count_dimension_frequency_from_extractions(extraction_results)
  211. print(f"\n📈 维度统计: 发现 {len(dim_stats)} 个不同维度")
  212. if dim_stats:
  213. print(" 维度频率(Top 5):")
  214. sorted_dims = sorted(dim_stats.items(), key=lambda x: x[1]['frequency'], reverse=True)[:5]
  215. for dim, stats in sorted_dims:
  216. print(f" - {dim}: {stats['frequency']}次 ({stats['frequency_rate']*100:.1f}%)")
  217. # 4. 生成themes修正建议
  218. print("\n💡 正在生成themes修正建议...")
  219. suggestions = da.generate_theme_suggestions(extraction_results, themes)
  220. total_suggestions = len(suggestions.get('add', [])) + len(suggestions.get('remove', []))
  221. print(f"✅ 生成 {total_suggestions} 条themes修正建议")
  222. # 5. 生成分析报告
  223. today = datetime.now().strftime("%Y-%m-%d")
  224. analysis_report = {
  225. "analysis_date": today,
  226. "total_extractions": len(extraction_results),
  227. "dimension_statistics": dim_stats,
  228. "current_themes": themes,
  229. "theme_suggestions": {
  230. "add": suggestions.get('add', []),
  231. "remove": suggestions.get('remove', [])
  232. },
  233. "theme_match_analysis": suggestions.get('theme_match_analysis', {})
  234. }
  235. # 6. 保存分析报告
  236. analysis_dir = base_dir / "archive" / "dimension_analysis"
  237. analysis_dir.mkdir(parents=True, exist_ok=True)
  238. analysis_file = analysis_dir / f"{today}_analysis.json"
  239. try:
  240. with open(analysis_file, 'w', encoding='utf-8') as f:
  241. json.dump(analysis_report, f, indent=2, ensure_ascii=False)
  242. print(f"\n💾 分析报告已保存到: {analysis_file}")
  243. except Exception as e:
  244. print(f"❌ 保存分析报告失败: {e}")
  245. # 7. 交互模式:展示建议并获取用户确认
  246. if args.interactive and total_suggestions > 0:
  247. present_theme_suggestions(suggestions)
  248. # 批量获取用户确认
  249. add_suggestions = suggestions.get('add', [])
  250. remove_suggestions = suggestions.get('remove', [])
  251. selected_indices = get_batch_user_confirmation(add_suggestions, remove_suggestions)
  252. # 应用用户选择的建议
  253. updated_themes = apply_theme_suggestions(suggestions, themes, themes_file, selected_indices)
  254. if updated_themes != themes:
  255. print(f"\n✅ Themes已更新: {updated_themes}")
  256. else:
  257. print("\n✅ 未应用任何更改")
  258. elif total_suggestions > 0:
  259. # 非交互模式,只展示建议
  260. present_theme_suggestions(suggestions)
  261. print("\n💡 提示: 使用 --interactive 参数可以查看并处理建议")
  262. print("\n✅ 分析完成!")
  263. if __name__ == "__main__":
  264. main()