Bladeren bron

chore:add 00-CoCreateProject in readme

jjyaoao 5 maanden geleden
bovenliggende
commit
f1ed65acb4
22 gewijzigde bestanden met toevoegingen van 3636 en 3632 verwijderingen
  1. 0 0
      Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/.gitignore
  2. 29 29
      Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/README.md
  3. 327 327
      Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/analyze_dimensions.py
  4. 11 11
      Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/channels.yaml.example
  5. 256 256
      Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/daily_reminder.py
  6. 631 631
      Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/dimension_analysis.py
  7. 16 16
      Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/env.example
  8. 382 382
      Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/extract_dimensions.py
  9. 155 155
      Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/manage_themes.py
  10. 0 0
      Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/person.png
  11. 20 20
      Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/requirements.txt
  12. 18 18
      Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/requirements_simplified.txt
  13. 740 740
      Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/search_youtube_mcp_videos.py
  14. 14 14
      Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/themes.yaml.example
  15. 254 254
      Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/write_report.py
  16. 246 246
      Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/完整使用流程说明.md
  17. 377 377
      Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/桌面提醒设置说明.md
  18. 156 156
      Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/维度分析系统使用说明.md
  19. 1 0
      README.md
  20. 1 0
      README_EN.md
  21. 1 0
      docs/README.md
  22. 1 0
      docs/README_EN.md

+ 0 - 0
Co-creation-projects/Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/.gitignore → Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/.gitignore


+ 29 - 29
Co-creation-projects/Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/README.md → Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/README.md

@@ -1,29 +1,29 @@
-# 提醒图片说明
-
-## 图片要求
-
-请将人物图片放在此目录下,命名为以下任一名称:
-
-- `person.png` (推荐,支持透明背景)
-- `person.jpg`
-- `person.jpeg`
-- `reminder.png`
-- `reminder.jpg`
-
-## 图片规格建议
-
-- **格式**: PNG(推荐,支持透明背景)或 JPG
-- **大小**: 200x200 到 400x400 像素
-- **背景**: 透明(PNG)或纯色背景
-- **内容**: 人物照片或卡通形象
-
-## 使用说明
-
-1. 将图片文件重命名为上述名称之一
-2. 放在 `assets/` 目录下
-3. 运行 `daily_reminder.py` 即可显示
-
-## 眨眼动画
-
-系统会自动在图片的眼睛位置添加眨眼动画效果,无需准备额外的图片。
-
+# 提醒图片说明
+
+## 图片要求
+
+请将人物图片放在此目录下,命名为以下任一名称:
+
+- `person.png` (推荐,支持透明背景)
+- `person.jpg`
+- `person.jpeg`
+- `reminder.png`
+- `reminder.jpg`
+
+## 图片规格建议
+
+- **格式**: PNG(推荐,支持透明背景)或 JPG
+- **大小**: 200x200 到 400x400 像素
+- **背景**: 透明(PNG)或纯色背景
+- **内容**: 人物照片或卡通形象
+
+## 使用说明
+
+1. 将图片文件重命名为上述名称之一
+2. 放在 `assets/` 目录下
+3. 运行 `daily_reminder.py` 即可显示
+
+## 眨眼动画
+
+系统会自动在图片的眼睛位置添加眨眼动画效果,无需准备额外的图片。
+

+ 327 - 327
Co-creation-projects/Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/analyze_dimensions.py → Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/analyze_dimensions.py

@@ -1,327 +1,327 @@
-"""
-维度分析主脚本 - 从报告中提取维度并修正themes
-整合报告加载、维度提取、分析和themes修正建议
-"""
-
-import sys
-import json
-import yaml
-import argparse
-from pathlib import Path
-from datetime import datetime
-from typing import Dict, List
-
-# 设置控制台编码为UTF-8(Windows)
-if sys.platform == 'win32':
-    import io
-    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
-    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
-
-import dimension_analysis as da
-import extract_dimensions as ed
-import manage_themes as mt
-
-
-def load_themes(themes_file: Path) -> List[str]:
-    """加载themes"""
-    return mt.load_themes(themes_file)
-
-
-def save_themes(themes_file: Path, themes: List[str]):
-    """保存themes"""
-    return mt.save_themes(themes_file, themes)
-
-
-def apply_theme_suggestions(suggestions: Dict[str, List[Dict]], themes: List[str], themes_file: Path, selected_indices: Dict[str, List[int]]) -> List[str]:
-    """应用用户选择的themes建议
-    
-    Args:
-        suggestions: 建议字典
-        themes: 当前themes列表
-        themes_file: themes文件路径
-        selected_indices: 用户选择的序号字典,格式:{'add': [1, 3], 'remove': [2]}
-    """
-    updated_themes = themes.copy()
-    
-    # 处理添加建议(序号从1开始)
-    add_suggestions = suggestions.get('add', [])
-    for idx in selected_indices.get('add', []):
-        if 1 <= idx <= len(add_suggestions):
-            sug = add_suggestions[idx - 1]  # 转换为0-based索引
-            theme = sug.get('theme')
-            if theme and theme not in updated_themes:
-                updated_themes.append(theme)
-                print(f"✅ 已添加theme: {theme}")
-    
-    # 处理删除建议(序号从1开始)
-    remove_suggestions = suggestions.get('remove', [])
-    for idx in selected_indices.get('remove', []):
-        if 1 <= idx <= len(remove_suggestions):
-            sug = remove_suggestions[idx - 1]  # 转换为0-based索引
-            theme = sug.get('theme')
-            if theme and theme in updated_themes:
-                updated_themes.remove(theme)
-                print(f"✅ 已删除theme: {theme}")
-    
-    # 保存
-    if updated_themes != themes:
-        save_themes(themes_file, updated_themes)
-        return updated_themes
-    
-    return themes
-
-
-def present_theme_suggestions(suggestions: Dict[str, List[Dict]]):
-    """展示themes建议"""
-    print("\n" + "=" * 70)
-    print("📋 Themes修正建议")
-    print("=" * 70)
-    
-    all_count = sum(len(v) for k, v in suggestions.items() if k != 'theme_match_analysis')
-    if all_count == 0:
-        print("✅ 暂无themes修正建议")
-        return
-    
-    # 展示添加建议
-    if suggestions.get('add'):
-        print("\n【添加Theme建议】")
-        for i, sug in enumerate(suggestions['add'], 1):
-            print(f"  {i}. {sug['theme']}")
-            print(f"     原因: {sug['reason']}")
-            print(f"     频率: {sug.get('frequency', 0)*100:.1f}%")
-    
-    # 展示删除建议
-    if suggestions.get('remove'):
-        print("\n【删除Theme建议】")
-        for i, sug in enumerate(suggestions['remove'], 1):
-            print(f"  {i}. {sug['theme']}")
-            print(f"     原因: {sug['reason']}")
-            print(f"     匹配率: {sug.get('match_rate', 0)*100:.1f}%")
-    
-    print("\n" + "=" * 70)
-
-
-def get_batch_user_confirmation(add_suggestions: List[Dict], remove_suggestions: List[Dict]) -> Dict[str, List[int]]:
-    """批量获取用户确认
-    
-    Args:
-        add_suggestions: 添加建议列表
-        remove_suggestions: 删除建议列表
-    
-    Returns:
-        Dict包含 'add' 和 'remove' 两个列表,列表中是用户选择的序号(从1开始)
-    """
-    selected = {'add': [], 'remove': []}
-    
-    # 获取添加建议的确认
-    if add_suggestions:
-        print("\n" + "=" * 70)
-        print("📥 添加Theme确认")
-        print("=" * 70)
-        print("请输入要添加的Theme序号(多个序号用逗号或空格分隔,如:1,3,5 或 1 3 5)")
-        print("直接回车表示不添加任何Theme")
-        
-        while True:
-            user_input = input("添加序号: ").strip()
-            if not user_input:
-                break
-            
-            # 解析输入(支持逗号或空格分隔)
-            try:
-                # 尝试用逗号分隔
-                if ',' in user_input:
-                    numbers = [int(x.strip()) for x in user_input.split(',') if x.strip()]
-                else:
-                    # 用空格分隔
-                    numbers = [int(x.strip()) for x in user_input.split() if x.strip()]
-                
-                # 验证序号范围
-                valid_numbers = [n for n in numbers if 1 <= n <= len(add_suggestions)]
-                if len(valid_numbers) != len(numbers):
-                    invalid = [n for n in numbers if n < 1 or n > len(add_suggestions)]
-                    print(f"⚠️  序号 {invalid} 超出范围(1-{len(add_suggestions)}),已忽略")
-                
-                selected['add'] = valid_numbers
-                break
-            except ValueError:
-                print("⚠️  输入格式错误,请输入数字序号(用逗号或空格分隔)")
-    
-    # 获取删除建议的确认
-    if remove_suggestions:
-        print("\n" + "=" * 70)
-        print("📤 删除Theme确认")
-        print("=" * 70)
-        print("请输入要删除的Theme序号(多个序号用逗号或空格分隔,如:1,2 或 1 2)")
-        print("直接回车表示不删除任何Theme")
-        
-        while True:
-            user_input = input("删除序号: ").strip()
-            if not user_input:
-                break
-            
-            # 解析输入(支持逗号或空格分隔)
-            try:
-                # 尝试用逗号分隔
-                if ',' in user_input:
-                    numbers = [int(x.strip()) for x in user_input.split(',') if x.strip()]
-                else:
-                    # 用空格分隔
-                    numbers = [int(x.strip()) for x in user_input.split() if x.strip()]
-                
-                # 验证序号范围
-                valid_numbers = [n for n in numbers if 1 <= n <= len(remove_suggestions)]
-                if len(valid_numbers) != len(numbers):
-                    invalid = [n for n in numbers if n < 1 or n > len(remove_suggestions)]
-                    print(f"⚠️  序号 {invalid} 超出范围(1-{len(remove_suggestions)}),已忽略")
-                
-                selected['remove'] = valid_numbers
-                break
-            except ValueError:
-                print("⚠️  输入格式错误,请输入数字序号(用逗号或空格分隔)")
-    
-    return selected
-
-
-def main():
-    """主函数"""
-    parser = argparse.ArgumentParser(description="维度分析工具 - 从报告中提取维度并修正themes")
-    parser.add_argument(
-        "--extract",
-        action="store_true",
-        help="重新提取维度(从报告文件中)"
-    )
-    parser.add_argument(
-        "--interactive",
-        action="store_true",
-        help="交互模式:展示建议并获取用户确认"
-    )
-    parser.add_argument(
-        "--base-dir",
-        type=str,
-        default=None,
-        help="基础目录路径(默认为脚本所在目录)"
-    )
-    args = parser.parse_args()
-    
-    # 确定基础目录
-    if args.base_dir:
-        base_dir = Path(args.base_dir)
-    else:
-        base_dir = Path(__file__).parent
-    
-    print("=" * 70)
-    print("维度分析工具 - 从报告中提取维度并修正themes")
-    print("=" * 70)
-    
-    # 1. 加载或提取维度
-    print("\n📊 正在处理维度提取结果...")
-    
-    extraction_results = []
-    
-    if args.extract:
-        # 重新提取维度
-        print("🔄 从报告文件中提取维度...")
-        llm = ed.init_llm()
-        if not llm:
-            print("❌ LLM未初始化,无法提取维度")
-            return
-        
-        # 加载themes作为参考
-        themes_file = base_dir / "themes.yaml"
-        existing_themes = mt.load_themes(themes_file)
-        
-        extraction_results = ed.batch_extract_dimensions(base_dir, report_type=None, llm=llm, existing_themes=existing_themes)
-        print(f"✅ 从报告中提取了 {len(extraction_results)} 个维度的提取结果")
-    else:
-        # 加载已有的提取结果
-        extraction_results = ed.load_extraction_results(base_dir)
-        print(f"✅ 加载了 {len(extraction_results)} 个提取结果")
-        
-        if len(extraction_results) == 0:
-            print("⚠️  未找到提取结果,使用 --extract 参数可以重新提取")
-            print("💡 提示: 运行 'python extract_dimensions.py' 来提取维度")
-    
-    if len(extraction_results) == 0:
-        print("❌ 没有维度提取结果,无法进行分析")
-        return
-    
-    # 2. 加载themes
-    themes_file = base_dir / "themes.yaml"
-    themes = load_themes(themes_file)
-    
-    if not themes:
-        print("⚠️  当前没有themes,请先设置themes")
-        print("💡 提示: 运行 'python manage_themes.py' 来管理themes")
-        # 使用空列表继续,以便生成添加建议
-    
-    print(f"📋 当前themes: {themes}")
-    
-    # 3. 统计维度
-    dim_stats = da.count_dimension_frequency_from_extractions(extraction_results)
-    print(f"\n📈 维度统计: 发现 {len(dim_stats)} 个不同维度")
-    if dim_stats:
-        print("   维度频率(Top 5):")
-        sorted_dims = sorted(dim_stats.items(), key=lambda x: x[1]['frequency'], reverse=True)[:5]
-        for dim, stats in sorted_dims:
-            print(f"   - {dim}: {stats['frequency']}次 ({stats['frequency_rate']*100:.1f}%)")
-    
-    # 4. 生成themes修正建议
-    print("\n💡 正在生成themes修正建议...")
-    suggestions = da.generate_theme_suggestions(extraction_results, themes)
-    
-    total_suggestions = len(suggestions.get('add', [])) + len(suggestions.get('remove', []))
-    print(f"✅ 生成 {total_suggestions} 条themes修正建议")
-    
-    # 5. 生成分析报告
-    today = datetime.now().strftime("%Y-%m-%d")
-    
-    analysis_report = {
-        "analysis_date": today,
-        "total_extractions": len(extraction_results),
-        "dimension_statistics": dim_stats,
-        "current_themes": themes,
-        "theme_suggestions": {
-            "add": suggestions.get('add', []),
-            "remove": suggestions.get('remove', [])
-        },
-        "theme_match_analysis": suggestions.get('theme_match_analysis', {})
-    }
-    
-    # 6. 保存分析报告
-    analysis_dir = base_dir / "archive" / "dimension_analysis"
-    analysis_dir.mkdir(parents=True, exist_ok=True)
-    analysis_file = analysis_dir / f"{today}_analysis.json"
-    
-    try:
-        with open(analysis_file, 'w', encoding='utf-8') as f:
-            json.dump(analysis_report, f, indent=2, ensure_ascii=False)
-        print(f"\n💾 分析报告已保存到: {analysis_file}")
-    except Exception as e:
-        print(f"❌ 保存分析报告失败: {e}")
-    
-    # 7. 交互模式:展示建议并获取用户确认
-    if args.interactive and total_suggestions > 0:
-        present_theme_suggestions(suggestions)
-        
-        # 批量获取用户确认
-        add_suggestions = suggestions.get('add', [])
-        remove_suggestions = suggestions.get('remove', [])
-        selected_indices = get_batch_user_confirmation(add_suggestions, remove_suggestions)
-        
-        # 应用用户选择的建议
-        updated_themes = apply_theme_suggestions(suggestions, themes, themes_file, selected_indices)
-        
-        if updated_themes != themes:
-            print(f"\n✅ Themes已更新: {updated_themes}")
-        else:
-            print("\n✅ 未应用任何更改")
-    elif total_suggestions > 0:
-        # 非交互模式,只展示建议
-        present_theme_suggestions(suggestions)
-        print("\n💡 提示: 使用 --interactive 参数可以查看并处理建议")
-    
-    print("\n✅ 分析完成!")
-
-
-if __name__ == "__main__":
-    main()
+"""
+维度分析主脚本 - 从报告中提取维度并修正themes
+整合报告加载、维度提取、分析和themes修正建议
+"""
+
+import sys
+import json
+import yaml
+import argparse
+from pathlib import Path
+from datetime import datetime
+from typing import Dict, List
+
+# 设置控制台编码为UTF-8(Windows)
+if sys.platform == 'win32':
+    import io
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
+
+import dimension_analysis as da
+import extract_dimensions as ed
+import manage_themes as mt
+
+
+def load_themes(themes_file: Path) -> List[str]:
+    """加载themes"""
+    return mt.load_themes(themes_file)
+
+
+def save_themes(themes_file: Path, themes: List[str]):
+    """保存themes"""
+    return mt.save_themes(themes_file, themes)
+
+
+def apply_theme_suggestions(suggestions: Dict[str, List[Dict]], themes: List[str], themes_file: Path, selected_indices: Dict[str, List[int]]) -> List[str]:
+    """应用用户选择的themes建议
+    
+    Args:
+        suggestions: 建议字典
+        themes: 当前themes列表
+        themes_file: themes文件路径
+        selected_indices: 用户选择的序号字典,格式:{'add': [1, 3], 'remove': [2]}
+    """
+    updated_themes = themes.copy()
+    
+    # 处理添加建议(序号从1开始)
+    add_suggestions = suggestions.get('add', [])
+    for idx in selected_indices.get('add', []):
+        if 1 <= idx <= len(add_suggestions):
+            sug = add_suggestions[idx - 1]  # 转换为0-based索引
+            theme = sug.get('theme')
+            if theme and theme not in updated_themes:
+                updated_themes.append(theme)
+                print(f"✅ 已添加theme: {theme}")
+    
+    # 处理删除建议(序号从1开始)
+    remove_suggestions = suggestions.get('remove', [])
+    for idx in selected_indices.get('remove', []):
+        if 1 <= idx <= len(remove_suggestions):
+            sug = remove_suggestions[idx - 1]  # 转换为0-based索引
+            theme = sug.get('theme')
+            if theme and theme in updated_themes:
+                updated_themes.remove(theme)
+                print(f"✅ 已删除theme: {theme}")
+    
+    # 保存
+    if updated_themes != themes:
+        save_themes(themes_file, updated_themes)
+        return updated_themes
+    
+    return themes
+
+
+def present_theme_suggestions(suggestions: Dict[str, List[Dict]]):
+    """展示themes建议"""
+    print("\n" + "=" * 70)
+    print("📋 Themes修正建议")
+    print("=" * 70)
+    
+    all_count = sum(len(v) for k, v in suggestions.items() if k != 'theme_match_analysis')
+    if all_count == 0:
+        print("✅ 暂无themes修正建议")
+        return
+    
+    # 展示添加建议
+    if suggestions.get('add'):
+        print("\n【添加Theme建议】")
+        for i, sug in enumerate(suggestions['add'], 1):
+            print(f"  {i}. {sug['theme']}")
+            print(f"     原因: {sug['reason']}")
+            print(f"     频率: {sug.get('frequency', 0)*100:.1f}%")
+    
+    # 展示删除建议
+    if suggestions.get('remove'):
+        print("\n【删除Theme建议】")
+        for i, sug in enumerate(suggestions['remove'], 1):
+            print(f"  {i}. {sug['theme']}")
+            print(f"     原因: {sug['reason']}")
+            print(f"     匹配率: {sug.get('match_rate', 0)*100:.1f}%")
+    
+    print("\n" + "=" * 70)
+
+
+def get_batch_user_confirmation(add_suggestions: List[Dict], remove_suggestions: List[Dict]) -> Dict[str, List[int]]:
+    """批量获取用户确认
+    
+    Args:
+        add_suggestions: 添加建议列表
+        remove_suggestions: 删除建议列表
+    
+    Returns:
+        Dict包含 'add' 和 'remove' 两个列表,列表中是用户选择的序号(从1开始)
+    """
+    selected = {'add': [], 'remove': []}
+    
+    # 获取添加建议的确认
+    if add_suggestions:
+        print("\n" + "=" * 70)
+        print("📥 添加Theme确认")
+        print("=" * 70)
+        print("请输入要添加的Theme序号(多个序号用逗号或空格分隔,如:1,3,5 或 1 3 5)")
+        print("直接回车表示不添加任何Theme")
+        
+        while True:
+            user_input = input("添加序号: ").strip()
+            if not user_input:
+                break
+            
+            # 解析输入(支持逗号或空格分隔)
+            try:
+                # 尝试用逗号分隔
+                if ',' in user_input:
+                    numbers = [int(x.strip()) for x in user_input.split(',') if x.strip()]
+                else:
+                    # 用空格分隔
+                    numbers = [int(x.strip()) for x in user_input.split() if x.strip()]
+                
+                # 验证序号范围
+                valid_numbers = [n for n in numbers if 1 <= n <= len(add_suggestions)]
+                if len(valid_numbers) != len(numbers):
+                    invalid = [n for n in numbers if n < 1 or n > len(add_suggestions)]
+                    print(f"⚠️  序号 {invalid} 超出范围(1-{len(add_suggestions)}),已忽略")
+                
+                selected['add'] = valid_numbers
+                break
+            except ValueError:
+                print("⚠️  输入格式错误,请输入数字序号(用逗号或空格分隔)")
+    
+    # 获取删除建议的确认
+    if remove_suggestions:
+        print("\n" + "=" * 70)
+        print("📤 删除Theme确认")
+        print("=" * 70)
+        print("请输入要删除的Theme序号(多个序号用逗号或空格分隔,如:1,2 或 1 2)")
+        print("直接回车表示不删除任何Theme")
+        
+        while True:
+            user_input = input("删除序号: ").strip()
+            if not user_input:
+                break
+            
+            # 解析输入(支持逗号或空格分隔)
+            try:
+                # 尝试用逗号分隔
+                if ',' in user_input:
+                    numbers = [int(x.strip()) for x in user_input.split(',') if x.strip()]
+                else:
+                    # 用空格分隔
+                    numbers = [int(x.strip()) for x in user_input.split() if x.strip()]
+                
+                # 验证序号范围
+                valid_numbers = [n for n in numbers if 1 <= n <= len(remove_suggestions)]
+                if len(valid_numbers) != len(numbers):
+                    invalid = [n for n in numbers if n < 1 or n > len(remove_suggestions)]
+                    print(f"⚠️  序号 {invalid} 超出范围(1-{len(remove_suggestions)}),已忽略")
+                
+                selected['remove'] = valid_numbers
+                break
+            except ValueError:
+                print("⚠️  输入格式错误,请输入数字序号(用逗号或空格分隔)")
+    
+    return selected
+
+
+def main():
+    """主函数"""
+    parser = argparse.ArgumentParser(description="维度分析工具 - 从报告中提取维度并修正themes")
+    parser.add_argument(
+        "--extract",
+        action="store_true",
+        help="重新提取维度(从报告文件中)"
+    )
+    parser.add_argument(
+        "--interactive",
+        action="store_true",
+        help="交互模式:展示建议并获取用户确认"
+    )
+    parser.add_argument(
+        "--base-dir",
+        type=str,
+        default=None,
+        help="基础目录路径(默认为脚本所在目录)"
+    )
+    args = parser.parse_args()
+    
+    # 确定基础目录
+    if args.base_dir:
+        base_dir = Path(args.base_dir)
+    else:
+        base_dir = Path(__file__).parent
+    
+    print("=" * 70)
+    print("维度分析工具 - 从报告中提取维度并修正themes")
+    print("=" * 70)
+    
+    # 1. 加载或提取维度
+    print("\n📊 正在处理维度提取结果...")
+    
+    extraction_results = []
+    
+    if args.extract:
+        # 重新提取维度
+        print("🔄 从报告文件中提取维度...")
+        llm = ed.init_llm()
+        if not llm:
+            print("❌ LLM未初始化,无法提取维度")
+            return
+        
+        # 加载themes作为参考
+        themes_file = base_dir / "themes.yaml"
+        existing_themes = mt.load_themes(themes_file)
+        
+        extraction_results = ed.batch_extract_dimensions(base_dir, report_type=None, llm=llm, existing_themes=existing_themes)
+        print(f"✅ 从报告中提取了 {len(extraction_results)} 个维度的提取结果")
+    else:
+        # 加载已有的提取结果
+        extraction_results = ed.load_extraction_results(base_dir)
+        print(f"✅ 加载了 {len(extraction_results)} 个提取结果")
+        
+        if len(extraction_results) == 0:
+            print("⚠️  未找到提取结果,使用 --extract 参数可以重新提取")
+            print("💡 提示: 运行 'python extract_dimensions.py' 来提取维度")
+    
+    if len(extraction_results) == 0:
+        print("❌ 没有维度提取结果,无法进行分析")
+        return
+    
+    # 2. 加载themes
+    themes_file = base_dir / "themes.yaml"
+    themes = load_themes(themes_file)
+    
+    if not themes:
+        print("⚠️  当前没有themes,请先设置themes")
+        print("💡 提示: 运行 'python manage_themes.py' 来管理themes")
+        # 使用空列表继续,以便生成添加建议
+    
+    print(f"📋 当前themes: {themes}")
+    
+    # 3. 统计维度
+    dim_stats = da.count_dimension_frequency_from_extractions(extraction_results)
+    print(f"\n📈 维度统计: 发现 {len(dim_stats)} 个不同维度")
+    if dim_stats:
+        print("   维度频率(Top 5):")
+        sorted_dims = sorted(dim_stats.items(), key=lambda x: x[1]['frequency'], reverse=True)[:5]
+        for dim, stats in sorted_dims:
+            print(f"   - {dim}: {stats['frequency']}次 ({stats['frequency_rate']*100:.1f}%)")
+    
+    # 4. 生成themes修正建议
+    print("\n💡 正在生成themes修正建议...")
+    suggestions = da.generate_theme_suggestions(extraction_results, themes)
+    
+    total_suggestions = len(suggestions.get('add', [])) + len(suggestions.get('remove', []))
+    print(f"✅ 生成 {total_suggestions} 条themes修正建议")
+    
+    # 5. 生成分析报告
+    today = datetime.now().strftime("%Y-%m-%d")
+    
+    analysis_report = {
+        "analysis_date": today,
+        "total_extractions": len(extraction_results),
+        "dimension_statistics": dim_stats,
+        "current_themes": themes,
+        "theme_suggestions": {
+            "add": suggestions.get('add', []),
+            "remove": suggestions.get('remove', [])
+        },
+        "theme_match_analysis": suggestions.get('theme_match_analysis', {})
+    }
+    
+    # 6. 保存分析报告
+    analysis_dir = base_dir / "archive" / "dimension_analysis"
+    analysis_dir.mkdir(parents=True, exist_ok=True)
+    analysis_file = analysis_dir / f"{today}_analysis.json"
+    
+    try:
+        with open(analysis_file, 'w', encoding='utf-8') as f:
+            json.dump(analysis_report, f, indent=2, ensure_ascii=False)
+        print(f"\n💾 分析报告已保存到: {analysis_file}")
+    except Exception as e:
+        print(f"❌ 保存分析报告失败: {e}")
+    
+    # 7. 交互模式:展示建议并获取用户确认
+    if args.interactive and total_suggestions > 0:
+        present_theme_suggestions(suggestions)
+        
+        # 批量获取用户确认
+        add_suggestions = suggestions.get('add', [])
+        remove_suggestions = suggestions.get('remove', [])
+        selected_indices = get_batch_user_confirmation(add_suggestions, remove_suggestions)
+        
+        # 应用用户选择的建议
+        updated_themes = apply_theme_suggestions(suggestions, themes, themes_file, selected_indices)
+        
+        if updated_themes != themes:
+            print(f"\n✅ Themes已更新: {updated_themes}")
+        else:
+            print("\n✅ 未应用任何更改")
+    elif total_suggestions > 0:
+        # 非交互模式,只展示建议
+        present_theme_suggestions(suggestions)
+        print("\n💡 提示: 使用 --interactive 参数可以查看并处理建议")
+    
+    print("\n✅ 分析完成!")
+
+
+if __name__ == "__main__":
+    main()

+ 11 - 11
Co-creation-projects/Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/channels.yaml.example → Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/channels.yaml.example

@@ -1,11 +1,11 @@
-# YouTube频道白名单配置示例
-# 复制此文件为 channels.yaml 并修改为你信任的频道
-
-whitelist_channels:
-  - Anthropic          # Anthropic官方频道
-  - OpenAI             # OpenAI官方频道
-  - The Diary Of A CEO # 商业/创业频道
-  # 你可以添加更多信任的频道,例如:
-  # - 你喜欢的频道名称1
-  # - 你喜欢的频道名称2
-
+# YouTube频道白名单配置示例
+# 复制此文件为 channels.yaml 并修改为你信任的频道
+
+whitelist_channels:
+  - Anthropic          # Anthropic官方频道
+  - OpenAI             # OpenAI官方频道
+  - The Diary Of A CEO # 商业/创业频道
+  # 你可以添加更多信任的频道,例如:
+  # - 你喜欢的频道名称1
+  # - 你喜欢的频道名称2
+

+ 256 - 256
Co-creation-projects/Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/daily_reminder.py → Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/daily_reminder.py

@@ -1,256 +1,256 @@
-"""
-每日提醒工具 - 晚上11:30弹出人物提醒写日报
-显示美化窗口
-"""
-
-import sys
-import os
-import subprocess
-from pathlib import Path
-from datetime import datetime
-
-# 设置控制台编码为UTF-8(Windows)
-if sys.platform == 'win32':
-    import io
-    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
-    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
-
-try:
-    import tkinter as tk
-    from tkinter import messagebox
-    TKINTER_AVAILABLE = True
-except ImportError:
-    TKINTER_AVAILABLE = False
-    print("❌ 错误: tkinter 未安装(Python应该自带tkinter)")
-
-try:
-    from PIL import Image, ImageTk
-    PIL_AVAILABLE = True
-except ImportError:
-    PIL_AVAILABLE = False
-    print("⚠️  警告: Pillow 未安装,无法显示图片")
-    print("💡 请运行: pip install Pillow")
-
-
-class DailyReminder:
-    def __init__(self, base_dir=None):
-        self.base_dir = base_dir or Path(__file__).parent
-        self.window = None
-        self.canvas = None
-        self.photo = None
-        self.original_image = None
-        
-        # 窗口设置
-        self.window_width = 160
-        self.window_height = 160
-        self.image_size = (150, 150)  # 图片大小适配窗口
-        
-    def load_image(self):
-        """加载人物图片"""
-        # 尝试多种可能的图片路径和格式
-        image_paths = [
-            self.base_dir / "assets" / "person.png",
-            self.base_dir / "assets" / "person.jpg",
-            self.base_dir / "assets" / "person.jpeg",
-            self.base_dir / "assets" / "reminder.png",
-            self.base_dir / "assets" / "reminder.jpg",
-        ]
-        
-        for img_path in image_paths:
-            if img_path.exists():
-                try:
-                    img = Image.open(img_path)
-                    # 转换为RGBA模式以支持透明背景
-                    if img.mode != 'RGBA':
-                        img = img.convert('RGBA')
-                    # 调整图片大小
-                    img = img.resize(self.image_size, Image.Resampling.LANCZOS)
-                    self.original_image = img
-                    return True
-                except Exception as e:
-                    print(f"⚠️  加载图片失败 {img_path}: {e}")
-                    continue
-        
-        print("❌ 未找到人物图片文件")
-        print("💡 请将图片放在 assets/ 目录下,命名为 person.png 或 person.jpg")
-        return False
-    
-    def show_reminder(self):
-        """显示提醒窗口"""
-        if not TKINTER_AVAILABLE:
-            self.show_system_notification()
-            return
-        
-        if not PIL_AVAILABLE:
-            try:
-                messagebox.showerror("错误", "Pillow 未安装,无法显示图片\n请运行: pip install Pillow")
-            except:
-                print("❌ 错误: Pillow 未安装,无法显示图片\n💡 请运行: pip install Pillow")
-            return
-        
-        if not self.load_image():
-            try:
-                messagebox.showerror("错误", "未找到人物图片文件\n请将图片放在 assets/ 目录下\n支持的名称: person.png, person.jpg, reminder.png")
-            except:
-                print("❌ 错误: 未找到人物图片文件\n💡 请将图片放在 assets/ 目录下")
-            return
-        
-        # 创建主窗口
-        self.window = tk.Toplevel()
-        self.window.title("📝 写日报提醒")
-        
-        # 设置窗口属性
-        self.window.attributes('-topmost', True)  # 置顶
-        self.window.attributes('-alpha', 0.95)   # 半透明
-        
-        # 移除窗口边框(可选,创建无边框窗口)
-        # self.window.overrideredirect(True)
-        
-        # 计算窗口位置(屏幕右下角)
-        screen_width = self.window.winfo_screenwidth()
-        screen_height = self.window.winfo_screenheight()
-        x = screen_width - self.window_width - 20  # 距离右边20像素
-        y = screen_height - self.window_height - 60  # 距离底部60像素(留出任务栏空间)
-        
-        self.window.geometry(f"{self.window_width}x{self.window_height}+{x}+{y}")
-        
-        # 设置窗口背景色
-        self.window.configure(bg='#f0f0f0')
-        
-        # 创建画布
-        self.canvas = tk.Canvas(
-            self.window,
-            width=self.window_width,
-            height=self.window_height,
-            bg='#f0f0f0',
-            highlightthickness=0
-        )
-        self.canvas.pack(fill=tk.BOTH, expand=True)
-        
-        # 显示图片
-        self.update_image()
-        
-        # 绑定点击事件
-        self.canvas.bind('<Button-1>', self.on_click)
-        self.window.bind('<Button-1>', self.on_click)
-        
-        # 淡入动画
-        self.fade_in()
-        
-        # 窗口关闭事件
-        self.window.protocol("WM_DELETE_WINDOW", self.on_close)
-    
-    def update_image(self):
-        """更新显示的图片"""
-        if not self.original_image:
-            return
-        
-        # 转换为PhotoImage
-        self.photo = ImageTk.PhotoImage(self.original_image)
-        
-        # 清除画布并重新绘制
-        self.canvas.delete("image")
-        x = (self.window_width - self.image_size[0]) // 2
-        y = (self.window_height - self.image_size[1]) // 2  # 居中显示
-        self.canvas.create_image(x, y, anchor=tk.NW, image=self.photo, tags="image")
-    
-    def fade_in(self):
-        """淡入动画"""
-        if not self.window:
-            return
-        
-        alpha = 0.0
-        step = 0.05
-        
-        def fade():
-            nonlocal alpha
-            if alpha < 0.95:
-                alpha += step
-                self.window.attributes('-alpha', alpha)
-                self.window.after(20, fade)
-        
-        fade()
-    
-    def on_click(self, event=None):
-        """点击事件处理"""
-        self.on_close()
-        self.start_write_report()
-    
-    def on_close(self):
-        """关闭窗口"""
-        # 淡出动画
-        if self.window:
-            alpha = 0.95
-            def fade_out():
-                nonlocal alpha
-                if alpha > 0:
-                    alpha -= 0.1
-                    try:
-                        self.window.attributes('-alpha', alpha)
-                        self.window.after(30, fade_out)
-                    except:
-                        pass
-                else:
-                    if self.window:
-                        self.window.destroy()
-            fade_out()
-    
-    def start_write_report(self):
-        """启动写日报"""
-        try:
-            write_report_script = self.base_dir / "write_report.py"
-            if not write_report_script.exists():
-                error_msg = f"未找到 write_report.py\n路径: {write_report_script}"
-                try:
-                    messagebox.showerror("错误", error_msg)
-                except:
-                    print(f"❌ {error_msg}")
-                return
-            
-            # 使用subprocess启动写日报脚本,并传递--daily参数
-            python_exe = sys.executable
-            subprocess.Popen(
-                [python_exe, str(write_report_script), "--daily"],
-                cwd=str(self.base_dir),
-                creationflags=subprocess.CREATE_NEW_CONSOLE if sys.platform == 'win32' else 0
-            )
-        except Exception as e:
-            error_msg = f"启动写日报失败: {e}"
-            try:
-                messagebox.showerror("错误", error_msg)
-            except:
-                print(f"❌ {error_msg}")
-    
-    def show_system_notification(self):
-        """显示系统通知(备选方案)"""
-        try:
-            from plyer import notification
-            notification.notify(
-                title="📝 写日报提醒",
-                message="该写日报啦!点击通知打开写日报。",
-                timeout=10
-            )
-        except:
-            print("📝 写日报提醒:该写日报啦!")
-
-
-def main():
-    """主函数"""
-    base_dir = Path(__file__).parent
-    
-    # 检查今天是否已经提醒过(可选功能)
-    # 这里可以添加检查逻辑,避免重复提醒
-    
-    reminder = DailyReminder(base_dir)
-    reminder.show_reminder()
-    
-    # 运行tkinter主循环
-    if TKINTER_AVAILABLE:
-        root = tk.Tk()
-        root.withdraw()  # 隐藏主窗口
-        root.mainloop()
-
-
-if __name__ == "__main__":
-    main()
-
+"""
+每日提醒工具 - 晚上11:30弹出人物提醒写日报
+显示美化窗口
+"""
+
+import sys
+import os
+import subprocess
+from pathlib import Path
+from datetime import datetime
+
+# 设置控制台编码为UTF-8(Windows)
+if sys.platform == 'win32':
+    import io
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
+
+try:
+    import tkinter as tk
+    from tkinter import messagebox
+    TKINTER_AVAILABLE = True
+except ImportError:
+    TKINTER_AVAILABLE = False
+    print("❌ 错误: tkinter 未安装(Python应该自带tkinter)")
+
+try:
+    from PIL import Image, ImageTk
+    PIL_AVAILABLE = True
+except ImportError:
+    PIL_AVAILABLE = False
+    print("⚠️  警告: Pillow 未安装,无法显示图片")
+    print("💡 请运行: pip install Pillow")
+
+
+class DailyReminder:
+    def __init__(self, base_dir=None):
+        self.base_dir = base_dir or Path(__file__).parent
+        self.window = None
+        self.canvas = None
+        self.photo = None
+        self.original_image = None
+        
+        # 窗口设置
+        self.window_width = 160
+        self.window_height = 160
+        self.image_size = (150, 150)  # 图片大小适配窗口
+        
+    def load_image(self):
+        """加载人物图片"""
+        # 尝试多种可能的图片路径和格式
+        image_paths = [
+            self.base_dir / "assets" / "person.png",
+            self.base_dir / "assets" / "person.jpg",
+            self.base_dir / "assets" / "person.jpeg",
+            self.base_dir / "assets" / "reminder.png",
+            self.base_dir / "assets" / "reminder.jpg",
+        ]
+        
+        for img_path in image_paths:
+            if img_path.exists():
+                try:
+                    img = Image.open(img_path)
+                    # 转换为RGBA模式以支持透明背景
+                    if img.mode != 'RGBA':
+                        img = img.convert('RGBA')
+                    # 调整图片大小
+                    img = img.resize(self.image_size, Image.Resampling.LANCZOS)
+                    self.original_image = img
+                    return True
+                except Exception as e:
+                    print(f"⚠️  加载图片失败 {img_path}: {e}")
+                    continue
+        
+        print("❌ 未找到人物图片文件")
+        print("💡 请将图片放在 assets/ 目录下,命名为 person.png 或 person.jpg")
+        return False
+    
+    def show_reminder(self):
+        """显示提醒窗口"""
+        if not TKINTER_AVAILABLE:
+            self.show_system_notification()
+            return
+        
+        if not PIL_AVAILABLE:
+            try:
+                messagebox.showerror("错误", "Pillow 未安装,无法显示图片\n请运行: pip install Pillow")
+            except:
+                print("❌ 错误: Pillow 未安装,无法显示图片\n💡 请运行: pip install Pillow")
+            return
+        
+        if not self.load_image():
+            try:
+                messagebox.showerror("错误", "未找到人物图片文件\n请将图片放在 assets/ 目录下\n支持的名称: person.png, person.jpg, reminder.png")
+            except:
+                print("❌ 错误: 未找到人物图片文件\n💡 请将图片放在 assets/ 目录下")
+            return
+        
+        # 创建主窗口
+        self.window = tk.Toplevel()
+        self.window.title("📝 写日报提醒")
+        
+        # 设置窗口属性
+        self.window.attributes('-topmost', True)  # 置顶
+        self.window.attributes('-alpha', 0.95)   # 半透明
+        
+        # 移除窗口边框(可选,创建无边框窗口)
+        # self.window.overrideredirect(True)
+        
+        # 计算窗口位置(屏幕右下角)
+        screen_width = self.window.winfo_screenwidth()
+        screen_height = self.window.winfo_screenheight()
+        x = screen_width - self.window_width - 20  # 距离右边20像素
+        y = screen_height - self.window_height - 60  # 距离底部60像素(留出任务栏空间)
+        
+        self.window.geometry(f"{self.window_width}x{self.window_height}+{x}+{y}")
+        
+        # 设置窗口背景色
+        self.window.configure(bg='#f0f0f0')
+        
+        # 创建画布
+        self.canvas = tk.Canvas(
+            self.window,
+            width=self.window_width,
+            height=self.window_height,
+            bg='#f0f0f0',
+            highlightthickness=0
+        )
+        self.canvas.pack(fill=tk.BOTH, expand=True)
+        
+        # 显示图片
+        self.update_image()
+        
+        # 绑定点击事件
+        self.canvas.bind('<Button-1>', self.on_click)
+        self.window.bind('<Button-1>', self.on_click)
+        
+        # 淡入动画
+        self.fade_in()
+        
+        # 窗口关闭事件
+        self.window.protocol("WM_DELETE_WINDOW", self.on_close)
+    
+    def update_image(self):
+        """更新显示的图片"""
+        if not self.original_image:
+            return
+        
+        # 转换为PhotoImage
+        self.photo = ImageTk.PhotoImage(self.original_image)
+        
+        # 清除画布并重新绘制
+        self.canvas.delete("image")
+        x = (self.window_width - self.image_size[0]) // 2
+        y = (self.window_height - self.image_size[1]) // 2  # 居中显示
+        self.canvas.create_image(x, y, anchor=tk.NW, image=self.photo, tags="image")
+    
+    def fade_in(self):
+        """淡入动画"""
+        if not self.window:
+            return
+        
+        alpha = 0.0
+        step = 0.05
+        
+        def fade():
+            nonlocal alpha
+            if alpha < 0.95:
+                alpha += step
+                self.window.attributes('-alpha', alpha)
+                self.window.after(20, fade)
+        
+        fade()
+    
+    def on_click(self, event=None):
+        """点击事件处理"""
+        self.on_close()
+        self.start_write_report()
+    
+    def on_close(self):
+        """关闭窗口"""
+        # 淡出动画
+        if self.window:
+            alpha = 0.95
+            def fade_out():
+                nonlocal alpha
+                if alpha > 0:
+                    alpha -= 0.1
+                    try:
+                        self.window.attributes('-alpha', alpha)
+                        self.window.after(30, fade_out)
+                    except:
+                        pass
+                else:
+                    if self.window:
+                        self.window.destroy()
+            fade_out()
+    
+    def start_write_report(self):
+        """启动写日报"""
+        try:
+            write_report_script = self.base_dir / "write_report.py"
+            if not write_report_script.exists():
+                error_msg = f"未找到 write_report.py\n路径: {write_report_script}"
+                try:
+                    messagebox.showerror("错误", error_msg)
+                except:
+                    print(f"❌ {error_msg}")
+                return
+            
+            # 使用subprocess启动写日报脚本,并传递--daily参数
+            python_exe = sys.executable
+            subprocess.Popen(
+                [python_exe, str(write_report_script), "--daily"],
+                cwd=str(self.base_dir),
+                creationflags=subprocess.CREATE_NEW_CONSOLE if sys.platform == 'win32' else 0
+            )
+        except Exception as e:
+            error_msg = f"启动写日报失败: {e}"
+            try:
+                messagebox.showerror("错误", error_msg)
+            except:
+                print(f"❌ {error_msg}")
+    
+    def show_system_notification(self):
+        """显示系统通知(备选方案)"""
+        try:
+            from plyer import notification
+            notification.notify(
+                title="📝 写日报提醒",
+                message="该写日报啦!点击通知打开写日报。",
+                timeout=10
+            )
+        except:
+            print("📝 写日报提醒:该写日报啦!")
+
+
+def main():
+    """主函数"""
+    base_dir = Path(__file__).parent
+    
+    # 检查今天是否已经提醒过(可选功能)
+    # 这里可以添加检查逻辑,避免重复提醒
+    
+    reminder = DailyReminder(base_dir)
+    reminder.show_reminder()
+    
+    # 运行tkinter主循环
+    if TKINTER_AVAILABLE:
+        root = tk.Tk()
+        root.withdraw()  # 隐藏主窗口
+        root.mainloop()
+
+
+if __name__ == "__main__":
+    main()
+

+ 631 - 631
Co-creation-projects/Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/dimension_analysis.py → Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/dimension_analysis.py

@@ -1,631 +1,631 @@
-"""
-维度分析模块 - V1 简化版
-提供维度数据的收集、分析、建议生成和用户交互功能
-"""
-
-import json
-import os
-import sys
-from pathlib import Path
-from datetime import datetime, timedelta
-from typing import List, Dict, Optional, Any
-from collections import defaultdict
-
-# 设置控制台编码为UTF-8(Windows)
-# 注意:只在作为主脚本运行时重定向,避免在被导入时冲突
-if sys.platform == 'win32' and __name__ == "__main__":
-    import io
-    if not isinstance(sys.stdout, io.TextIOWrapper):
-        sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
-    if not isinstance(sys.stderr, io.TextIOWrapper):
-        sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
-
-
-# ==================== 数据收集功能 ====================
-
-def collect_daily_records(archive_dir: Path) -> List[Dict]:
-    """从 archive/youtube/ 目录读取所有日报 JSON"""
-    records = []
-    if not archive_dir.exists():
-        return records
-    
-    for json_file in archive_dir.glob("*.json"):
-        # 跳过 research 报告
-        if json_file.name.endswith("_research.json"):
-            continue
-        
-        try:
-            with open(json_file, 'r', encoding='utf-8') as f:
-                data = json.load(f)
-                # 确保有 dimensions 字段(向后兼容)
-                if 'dimensions' not in data:
-                    data['dimensions'] = []
-                records.append(data)
-        except Exception as e:
-            print(f"⚠️  读取文件失败 {json_file.name}: {e}")
-    
-    return records
-
-
-def collect_weekly_records(weekly_dir: Path) -> List[Dict]:
-    """从指定目录读取周报 JSON"""
-    records = []
-    if not weekly_dir.exists():
-        return records
-    
-    for json_file in weekly_dir.glob("*.json"):
-        try:
-            with open(json_file, 'r', encoding='utf-8') as f:
-                data = json.load(f)
-                if 'dimensions' not in data:
-                    data['dimensions'] = []
-                records.append(data)
-        except Exception as e:
-            print(f"⚠️  读取周报文件失败 {json_file.name}: {e}")
-    
-    return records
-
-
-def collect_monthly_records(monthly_dir: Path) -> List[Dict]:
-    """从指定目录读取月报 JSON"""
-    records = []
-    if not monthly_dir.exists():
-        return records
-    
-    for json_file in monthly_dir.glob("*.json"):
-        try:
-            with open(json_file, 'r', encoding='utf-8') as f:
-                data = json.load(f)
-                if 'dimensions' not in data:
-                    data['dimensions'] = []
-                records.append(data)
-        except Exception as e:
-            print(f"⚠️  读取月报文件失败 {json_file.name}: {e}")
-    
-    return records
-
-
-def load_all_records(base_dir: Path) -> Dict[str, List[Dict]]:
-    """统一加载所有类型的记录"""
-    archive_dir = base_dir / "archive" / "youtube"
-    weekly_dir = base_dir / "archive" / "weekly"
-    monthly_dir = base_dir / "archive" / "monthly"
-    
-    return {
-        "daily": collect_daily_records(archive_dir),
-        "weekly": collect_weekly_records(weekly_dir),
-        "monthly": collect_monthly_records(monthly_dir)
-    }
-
-
-# ==================== 维度分析功能 ====================
-
-def parse_date(date_str: str) -> Optional[datetime]:
-    """解析日期字符串为 datetime 对象"""
-    try:
-        # 支持多种日期格式
-        for fmt in ["%Y-%m-%d", "%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M:%S"]:
-            try:
-                return datetime.strptime(date_str, fmt)
-            except ValueError:
-                continue
-        return None
-    except Exception:
-        return None
-
-
-def count_dimension_frequency(records: List[Dict]) -> Dict[str, Dict]:
-    """统计每个维度的出现频率"""
-    dimension_stats = defaultdict(lambda: {
-        'frequency': 0,
-        'dates': [],
-        'first_seen': None,
-        'last_seen': None
-    })
-    
-    total_records = len(records)
-    
-    for record in records:
-        date_str = record.get('date', '')
-        dimensions = record.get('dimensions', [])
-        
-        if not dimensions:
-            continue
-        
-        record_date = parse_date(date_str)
-        
-        for dim in dimensions:
-            if dim:  # 跳过空字符串
-                dimension_stats[dim]['frequency'] += 1
-                if record_date:
-                    dimension_stats[dim]['dates'].append(record_date)
-                    if dimension_stats[dim]['first_seen'] is None or record_date < dimension_stats[dim]['first_seen']:
-                        dimension_stats[dim]['first_seen'] = record_date
-                    if dimension_stats[dim]['last_seen'] is None or record_date > dimension_stats[dim]['last_seen']:
-                        dimension_stats[dim]['last_seen'] = record_date
-    
-    # 计算频率率和格式化日期
-    result = {}
-    for dim, stats in dimension_stats.items():
-        result[dim] = {
-            'frequency': stats['frequency'],
-            'frequency_rate': stats['frequency'] / total_records if total_records > 0 else 0.0,
-            'first_seen': stats['first_seen'].strftime("%Y-%m-%d") if stats['first_seen'] else None,
-            'last_seen': stats['last_seen'].strftime("%Y-%m-%d") if stats['last_seen'] else None,
-            'dates': [d.strftime("%Y-%m-%d") for d in stats['dates']]
-        }
-    
-    return result
-
-
-def find_missing_dimensions(records: List[Dict], candidate_dimensions: List[str], days_threshold: int = 30) -> List[str]:
-    """查找缺失的维度(超过N天未出现)"""
-    now = datetime.now()
-    missing = []
-    
-    for dim in candidate_dimensions:
-        # 查找该维度最后一次出现的时间
-        last_seen = None
-        for record in records:
-            dimensions = record.get('dimensions', [])
-            if dim in dimensions:
-                date_str = record.get('date', '')
-                record_date = parse_date(date_str)
-                if record_date and (last_seen is None or record_date > last_seen):
-                    last_seen = record_date
-        
-        # 如果从未出现或超过阈值天数,加入缺失列表
-        if last_seen is None:
-            missing.append(dim)
-        else:
-            days_diff = (now - last_seen).days
-            if days_diff > days_threshold:
-                missing.append(dim)
-    
-    return missing
-
-
-# ==================== 优先级计算功能 ====================
-
-def calculate_dimension_priority(records: List[Dict]) -> Dict[str, float]:
-    """计算维度优先级分数(仅基于出现频率)"""
-    stats = count_dimension_frequency(records)
-    priorities = {}
-    
-    for dim, dim_stats in stats.items():
-        priorities[dim] = dim_stats['frequency_rate']
-    
-    return priorities
-
-
-# ==================== 建议生成功能 ====================
-
-def suggest_add_dimensions(records: List[Dict], candidate_dimensions: List[str], threshold_days: int = 30) -> List[Dict]:
-    """建议添加缺失但重要的维度"""
-    missing = find_missing_dimensions(records, candidate_dimensions, threshold_days)
-    suggestions = []
-    
-    for dim in missing:
-        # 计算建议的优先级(如果该维度曾经出现过,使用历史频率;否则使用默认值 0.5)
-        stats = count_dimension_frequency(records)
-        suggested_priority = stats.get(dim, {}).get('frequency_rate', 0.5)
-        
-        suggestions.append({
-            "suggestion_id": f"add_{dim}_{datetime.now().strftime('%Y%m%d')}",
-            "type": "add",
-            "dimension": dim,
-            "reason": f"已有{threshold_days}天未在记录中出现,但候选维度列表中",
-            "recommendation": "建议添加该维度",
-            "suggested_priority": round(suggested_priority, 2)
-        })
-    
-    return suggestions
-
-
-def suggest_remove_dimensions(records: List[Dict], active_dimensions: List[str], threshold_days: int = 60) -> List[Dict]:
-    """建议删除长期未出现的维度"""
-    stats = count_dimension_frequency(records)
-    suggestions = []
-    
-    now = datetime.now()
-    
-    for dim in active_dimensions:
-        dim_stat = stats.get(dim, {})
-        last_seen_str = dim_stat.get('last_seen')
-        
-        if not last_seen_str:
-            # 从未出现过
-            suggestions.append({
-                "suggestion_id": f"remove_{dim}_{datetime.now().strftime('%Y%m%d')}",
-                "type": "remove",
-                "dimension": dim,
-                "reason": "从未在记录中出现",
-                "recommendation": "建议删除该维度"
-            })
-        else:
-            last_seen = parse_date(last_seen_str)
-            if last_seen:
-                days_diff = (now - last_seen).days
-                if days_diff > threshold_days:
-                    suggestions.append({
-                        "suggestion_id": f"remove_{dim}_{datetime.now().strftime('%Y%m%d')}",
-                        "type": "remove",
-                        "dimension": dim,
-                        "reason": f"已有{days_diff}天未在记录中出现",
-                        "recommendation": "建议删除该维度"
-                    })
-    
-    return suggestions
-
-
-def suggest_priority_adjustment(records: List[Dict], dimension_config: Dict) -> List[Dict]:
-    """建议调整频繁出现维度的优先级"""
-    stats = count_dimension_frequency(records)
-    priorities = calculate_dimension_priority(records)
-    suggestions = []
-    
-    active_dimensions = dimension_config.get('active_dimensions', [])
-    
-    for dim_info in active_dimensions:
-        dim_name = dim_info.get('name')
-        current_priority = dim_info.get('priority', 0.0)
-        dim_stat = stats.get(dim_name, {})
-        frequency_rate = dim_stat.get('frequency_rate', 0.0)
-        
-        # 如果频率 > 70% 且当前优先级 < 频率,建议提升
-        if frequency_rate > 0.7 and current_priority < frequency_rate:
-            suggestions.append({
-                "suggestion_id": f"priority_{dim_name}_{datetime.now().strftime('%Y%m%d')}",
-                "type": "priority_adjustment",
-                "dimension": dim_name,
-                "reason": f"最近出现频率达{frequency_rate*100:.1f}%,但当前优先级仅为{current_priority:.2f}",
-                "current_priority": current_priority,
-                "suggested_priority": round(frequency_rate, 2),
-                "recommendation": "建议提高该维度的优先级"
-            })
-    
-    return suggestions
-
-
-def generate_all_suggestions(records: List[Dict], dimension_config: Dict) -> Dict[str, List[Dict]]:
-    """生成所有建议的综合报告"""
-    all_records = records
-    
-    # 获取当前配置
-    active_dimensions = [d['name'] for d in dimension_config.get('active_dimensions', [])]
-    candidate_dimensions = dimension_config.get('candidate_dimensions', [])
-    
-    # 生成各类建议
-    add_suggestions = suggest_add_dimensions(all_records, candidate_dimensions, threshold_days=30)
-    remove_suggestions = suggest_remove_dimensions(all_records, active_dimensions, threshold_days=60)
-    priority_suggestions = suggest_priority_adjustment(all_records, dimension_config)
-    
-    return {
-        "add": add_suggestions,
-        "remove": remove_suggestions,
-        "priority_adjustment": priority_suggestions
-    }
-
-
-# ==================== 维度与Themes对比功能 ====================
-
-def count_dimension_frequency_from_extractions(extraction_results: List[Dict]) -> Dict[str, Dict]:
-    """从提取结果中统计维度频率"""
-    dimension_stats = defaultdict(lambda: {
-        'frequency': 0,
-        'dates': [],
-        'first_seen': None,
-        'last_seen': None
-    })
-    
-    total_extractions = len(extraction_results)
-    
-    for result in extraction_results:
-        dimensions = result.get('dimensions', [])
-        extraction_date_str = result.get('extraction_date', result.get('report_date', ''))
-        extraction_date = parse_date(extraction_date_str.split('T')[0])  # 只取日期部分
-        
-        for dim in dimensions:
-            if dim:
-                dimension_stats[dim]['frequency'] += 1
-                if extraction_date:
-                    dimension_stats[dim]['dates'].append(extraction_date)
-                    if dimension_stats[dim]['first_seen'] is None or extraction_date < dimension_stats[dim]['first_seen']:
-                        dimension_stats[dim]['first_seen'] = extraction_date
-                    if dimension_stats[dim]['last_seen'] is None or extraction_date > dimension_stats[dim]['last_seen']:
-                        dimension_stats[dim]['last_seen'] = extraction_date
-    
-    # 计算频率率和格式化日期
-    result = {}
-    for dim, stats in dimension_stats.items():
-        result[dim] = {
-            'frequency': stats['frequency'],
-            'frequency_rate': stats['frequency'] / total_extractions if total_extractions > 0 else 0.0,
-            'first_seen': stats['first_seen'].strftime("%Y-%m-%d") if stats['first_seen'] else None,
-            'last_seen': stats['last_seen'].strftime("%Y-%m-%d") if stats['last_seen'] else None,
-        }
-    
-    return result
-
-
-def analyze_theme_dimension_match(themes: List[str], extraction_results: List[Dict], days_window: int = 30) -> Dict[str, Dict]:
-    """分析themes与维度的匹配度"""
-    now = datetime.now()
-    
-    # 统计维度频率
-    dim_stats = count_dimension_frequency_from_extractions(extraction_results)
-    
-    # 过滤最近N天的提取结果
-    recent_results = []
-    for result in extraction_results:
-        extraction_date_str = result.get('extraction_date', result.get('report_date', ''))
-        extraction_date = parse_date(extraction_date_str.split('T')[0])
-        if extraction_date:
-            days_diff = (now - extraction_date).days
-            if days_diff <= days_window:
-                recent_results.append(result)
-    
-    # 统计最近N天内的维度
-    recent_dim_stats = count_dimension_frequency_from_extractions(recent_results)
-    
-    theme_match = {}
-    
-    for theme in themes:
-        # 计算theme在提取维度中的匹配情况
-        match_count = 0
-        total_count = len(recent_results)
-        
-        for result in recent_results:
-            dimensions = result.get('dimensions', [])
-            # 简单匹配:theme是否在维度列表中(可以考虑更复杂的相似度匹配)
-            if theme in dimensions:
-                match_count += 1
-        
-        match_rate = match_count / total_count if total_count > 0 else 0.0
-        
-        # 计算最近一次匹配的时间
-        last_match_date = None
-        for result in recent_results:
-            dimensions = result.get('dimensions', [])
-            if theme in dimensions:
-                extraction_date_str = result.get('extraction_date', result.get('report_date', ''))
-                extraction_date = parse_date(extraction_date_str.split('T')[0])
-                if extraction_date:
-                    if last_match_date is None or extraction_date > last_match_date:
-                        last_match_date = extraction_date
-        
-        theme_match[theme] = {
-            'match_rate': match_rate,
-            'match_count': match_count,
-            'total_count': total_count,
-            'last_match_date': last_match_date.strftime("%Y-%m-%d") if last_match_date else None,
-            'days_without_match': (now - last_match_date).days if last_match_date else days_window
-        }
-    
-    return theme_match
-
-
-def suggest_add_themes(dim_stats: Dict[str, Dict], themes: List[str], threshold_frequency: float = 0.5, min_recent_count: int = 3, days_window: int = 30) -> List[Dict]:
-    """建议添加新themes(维度中出现但themes中没有的)"""
-    suggestions = []
-    now = datetime.now()
-    
-    for dim, stats in dim_stats.items():
-        # 如果维度不在themes中
-        if dim not in themes:
-            frequency_rate = stats.get('frequency_rate', 0.0)
-            last_seen_str = stats.get('last_seen')
-            
-            # 检查最近出现次数
-            recent_count = 0
-            if last_seen_str:
-                last_seen = parse_date(last_seen_str)
-                if last_seen:
-                    days_diff = (now - last_seen).days
-                    if days_diff <= days_window:
-                        # 估算最近出现次数(简化:假设频率一致)
-                        recent_count = int(frequency_rate * (days_window / 7))  # 粗略估算
-            
-            # 如果频率达到阈值且最近有出现
-            if frequency_rate >= threshold_frequency and recent_count >= min_recent_count:
-                suggestions.append({
-                    "suggestion_id": f"add_theme_{dim}_{datetime.now().strftime('%Y%m%d')}",
-                    "type": "add_theme",
-                    "theme": dim,
-                    "reason": f"从报告中提取的维度'{dim}'出现频率{frequency_rate*100:.1f}%,最近{days_window}天出现{recent_count}次",
-                    "source_dimensions": [dim],
-                    "frequency": frequency_rate,
-                    "recent_count": recent_count
-                })
-    
-    return suggestions
-
-
-def suggest_remove_themes(theme_match: Dict[str, Dict], threshold_frequency: float = 0.1, min_days: int = 60) -> List[Dict]:
-    """建议删除themes(长期与维度不匹配的)"""
-    suggestions = []
-    
-    for theme, match_info in theme_match.items():
-        match_rate = match_info.get('match_rate', 0.0)
-        days_without_match = match_info.get('days_without_match', 0)
-        
-        # 如果匹配率低于阈值且持续时间超过阈值
-        if match_rate < threshold_frequency and days_without_match >= min_days:
-            suggestions.append({
-                "suggestion_id": f"remove_theme_{theme}_{datetime.now().strftime('%Y%m%d')}",
-                "type": "remove_theme",
-                "theme": theme,
-                "reason": f"过去{min_days}天内,'{theme}'在提取维度中的匹配率仅{match_rate*100:.1f}%,且已有{days_without_match}天未匹配",
-                "match_rate": match_rate,
-                "days_without_match": days_without_match
-            })
-    
-    return suggestions
-
-
-def generate_theme_suggestions(extraction_results: List[Dict], themes: List[str]) -> Dict[str, List[Dict]]:
-    """生成themes修正建议"""
-    # 统计维度频率
-    dim_stats = count_dimension_frequency_from_extractions(extraction_results)
-    
-    # 分析themes匹配度(使用30天窗口)
-    theme_match = analyze_theme_dimension_match(themes, extraction_results, days_window=30)
-    
-    # 生成建议
-    add_suggestions = suggest_add_themes(dim_stats, themes, threshold_frequency=0.5, min_recent_count=3, days_window=30)
-    remove_suggestions = suggest_remove_themes(theme_match, threshold_frequency=0.1, min_days=60)
-    
-    return {
-        "add": add_suggestions,
-        "remove": remove_suggestions,
-        "theme_match_analysis": theme_match
-    }
-
-
-# ==================== 配置文件管理 ====================
-
-def load_dimension_config(config_file: Path) -> Dict:
-    """加载维度配置文件"""
-    if config_file.exists():
-        try:
-            with open(config_file, 'r', encoding='utf-8') as f:
-                return json.load(f)
-        except Exception as e:
-            print(f"⚠️  加载配置文件失败: {e}")
-    
-    # 返回默认配置
-    return {
-        "active_dimensions": [],
-        "candidate_dimensions": [],
-        "removed_dimensions": []
-    }
-
-
-def save_dimension_config(config_file: Path, config: Dict):
-    """保存维度配置文件"""
-    config_file.parent.mkdir(parents=True, exist_ok=True)
-    with open(config_file, 'w', encoding='utf-8') as f:
-        json.dump(config, f, indent=2, ensure_ascii=False)
-
-
-def load_dimension_history(history_file: Path) -> List[Dict]:
-    """加载维度历史记录"""
-    if history_file.exists():
-        try:
-            with open(history_file, 'r', encoding='utf-8') as f:
-                data = json.load(f)
-                return data.get('history', [])
-        except Exception as e:
-            print(f"⚠️  加载历史记录失败: {e}")
-    
-    return []
-
-
-def save_dimension_history(history_file: Path, history: List[Dict]):
-    """保存维度历史记录"""
-    history_file.parent.mkdir(parents=True, exist_ok=True)
-    data = {"history": history}
-    with open(history_file, 'w', encoding='utf-8') as f:
-        json.dump(data, f, indent=2, ensure_ascii=False)
-
-
-def record_dimension_event(event_type: str, dimension: str, timestamp: str = None, metadata: Dict = None) -> Dict:
-    """记录维度事件(ADD/REMOVE/PRIORITY_CHANGE)"""
-    if timestamp is None:
-        timestamp = datetime.now().strftime("%Y-%m-%d")
-    
-    event = {
-        "date": timestamp,
-        "event": event_type,
-        "dimension": dimension
-    }
-    
-    if metadata:
-        event.update(metadata)
-    
-    return event
-
-
-# ==================== 用户交互功能 ====================
-
-def present_suggestions(suggestions: Dict[str, List[Dict]]) -> None:
-    """展示系统建议给用户(简单文本)"""
-    print("\n" + "=" * 70)
-    print("📋 维度调整建议")
-    print("=" * 70)
-    
-    all_count = sum(len(v) for v in suggestions.values())
-    if all_count == 0:
-        print("✅ 暂无建议")
-        return
-    
-    # 展示新增建议
-    if suggestions.get('add'):
-        print("\n【新增维度建议】")
-        for i, sug in enumerate(suggestions['add'], 1):
-            print(f"  {i}. {sug['dimension']}")
-            print(f"     原因: {sug['reason']}")
-            print(f"     建议优先级: {sug['suggested_priority']}")
-    
-    # 展示删除建议
-    if suggestions.get('remove'):
-        print("\n【删除维度建议】")
-        for i, sug in enumerate(suggestions['remove'], 1):
-            print(f"  {i}. {sug['dimension']}")
-            print(f"     原因: {sug['reason']}")
-    
-    # 展示优先级调整建议
-    if suggestions.get('priority_adjustment'):
-        print("\n【优先级调整建议】")
-        for i, sug in enumerate(suggestions['priority_adjustment'], 1):
-            print(f"  {i}. {sug['dimension']}")
-            print(f"     原因: {sug['reason']}")
-            print(f"     当前优先级: {sug['current_priority']:.2f} → 建议: {sug['suggested_priority']:.2f}")
-    
-    print("\n" + "=" * 70)
-
-
-def get_user_confirmation(suggestion: Dict) -> str:
-    """获取用户确认(接受/拒绝,简单输入)"""
-    print(f"\n建议: {suggestion['recommendation']}")
-    print(f"维度: {suggestion['dimension']}")
-    print(f"原因: {suggestion['reason']}")
-    
-    while True:
-        user_input = input("接受 (y) / 拒绝 (n): ").strip().lower()
-        if user_input in ['y', 'yes', '是', '接受']:
-            return 'accepted'
-        elif user_input in ['n', 'no', '否', '拒绝']:
-            return 'rejected'
-        else:
-            print("⚠️  请输入 y 或 n")
-
-
-def format_history_text(history: List[Dict]) -> str:
-    """格式化历史记录为简单文本"""
-    if not history:
-        return "暂无历史记录"
-    
-    lines = ["维度演化历史:"]
-    lines.append("-" * 70)
-    
-    for event in history:
-        date = event.get('date', '')
-        event_type = event.get('event', '')
-        dimension = event.get('dimension', '')
-        
-        if event_type == "ADD":
-            info = f"新增维度"
-        elif event_type == "REMOVE":
-            info = f"删除维度"
-        elif event_type == "PRIORITY_CHANGE":
-            old_priority = event.get('old_priority', '')
-            new_priority = event.get('new_priority', '')
-            info = f"优先级调整: {old_priority} → {new_priority}"
-        else:
-            info = event_type
-        
-        lines.append(f"{date} | {event_type} | {dimension} | {info}")
-    
-    return "\n".join(lines)
-
+"""
+维度分析模块 - V1 简化版
+提供维度数据的收集、分析、建议生成和用户交互功能
+"""
+
+import json
+import os
+import sys
+from pathlib import Path
+from datetime import datetime, timedelta
+from typing import List, Dict, Optional, Any
+from collections import defaultdict
+
+# 设置控制台编码为UTF-8(Windows)
+# 注意:只在作为主脚本运行时重定向,避免在被导入时冲突
+if sys.platform == 'win32' and __name__ == "__main__":
+    import io
+    if not isinstance(sys.stdout, io.TextIOWrapper):
+        sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+    if not isinstance(sys.stderr, io.TextIOWrapper):
+        sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
+
+
+# ==================== 数据收集功能 ====================
+
+def collect_daily_records(archive_dir: Path) -> List[Dict]:
+    """从 archive/youtube/ 目录读取所有日报 JSON"""
+    records = []
+    if not archive_dir.exists():
+        return records
+    
+    for json_file in archive_dir.glob("*.json"):
+        # 跳过 research 报告
+        if json_file.name.endswith("_research.json"):
+            continue
+        
+        try:
+            with open(json_file, 'r', encoding='utf-8') as f:
+                data = json.load(f)
+                # 确保有 dimensions 字段(向后兼容)
+                if 'dimensions' not in data:
+                    data['dimensions'] = []
+                records.append(data)
+        except Exception as e:
+            print(f"⚠️  读取文件失败 {json_file.name}: {e}")
+    
+    return records
+
+
+def collect_weekly_records(weekly_dir: Path) -> List[Dict]:
+    """从指定目录读取周报 JSON"""
+    records = []
+    if not weekly_dir.exists():
+        return records
+    
+    for json_file in weekly_dir.glob("*.json"):
+        try:
+            with open(json_file, 'r', encoding='utf-8') as f:
+                data = json.load(f)
+                if 'dimensions' not in data:
+                    data['dimensions'] = []
+                records.append(data)
+        except Exception as e:
+            print(f"⚠️  读取周报文件失败 {json_file.name}: {e}")
+    
+    return records
+
+
+def collect_monthly_records(monthly_dir: Path) -> List[Dict]:
+    """从指定目录读取月报 JSON"""
+    records = []
+    if not monthly_dir.exists():
+        return records
+    
+    for json_file in monthly_dir.glob("*.json"):
+        try:
+            with open(json_file, 'r', encoding='utf-8') as f:
+                data = json.load(f)
+                if 'dimensions' not in data:
+                    data['dimensions'] = []
+                records.append(data)
+        except Exception as e:
+            print(f"⚠️  读取月报文件失败 {json_file.name}: {e}")
+    
+    return records
+
+
+def load_all_records(base_dir: Path) -> Dict[str, List[Dict]]:
+    """统一加载所有类型的记录"""
+    archive_dir = base_dir / "archive" / "youtube"
+    weekly_dir = base_dir / "archive" / "weekly"
+    monthly_dir = base_dir / "archive" / "monthly"
+    
+    return {
+        "daily": collect_daily_records(archive_dir),
+        "weekly": collect_weekly_records(weekly_dir),
+        "monthly": collect_monthly_records(monthly_dir)
+    }
+
+
+# ==================== 维度分析功能 ====================
+
+def parse_date(date_str: str) -> Optional[datetime]:
+    """解析日期字符串为 datetime 对象"""
+    try:
+        # 支持多种日期格式
+        for fmt in ["%Y-%m-%d", "%Y-%m-%dT%H:%M:%SZ", "%Y-%m-%dT%H:%M:%S"]:
+            try:
+                return datetime.strptime(date_str, fmt)
+            except ValueError:
+                continue
+        return None
+    except Exception:
+        return None
+
+
+def count_dimension_frequency(records: List[Dict]) -> Dict[str, Dict]:
+    """统计每个维度的出现频率"""
+    dimension_stats = defaultdict(lambda: {
+        'frequency': 0,
+        'dates': [],
+        'first_seen': None,
+        'last_seen': None
+    })
+    
+    total_records = len(records)
+    
+    for record in records:
+        date_str = record.get('date', '')
+        dimensions = record.get('dimensions', [])
+        
+        if not dimensions:
+            continue
+        
+        record_date = parse_date(date_str)
+        
+        for dim in dimensions:
+            if dim:  # 跳过空字符串
+                dimension_stats[dim]['frequency'] += 1
+                if record_date:
+                    dimension_stats[dim]['dates'].append(record_date)
+                    if dimension_stats[dim]['first_seen'] is None or record_date < dimension_stats[dim]['first_seen']:
+                        dimension_stats[dim]['first_seen'] = record_date
+                    if dimension_stats[dim]['last_seen'] is None or record_date > dimension_stats[dim]['last_seen']:
+                        dimension_stats[dim]['last_seen'] = record_date
+    
+    # 计算频率率和格式化日期
+    result = {}
+    for dim, stats in dimension_stats.items():
+        result[dim] = {
+            'frequency': stats['frequency'],
+            'frequency_rate': stats['frequency'] / total_records if total_records > 0 else 0.0,
+            'first_seen': stats['first_seen'].strftime("%Y-%m-%d") if stats['first_seen'] else None,
+            'last_seen': stats['last_seen'].strftime("%Y-%m-%d") if stats['last_seen'] else None,
+            'dates': [d.strftime("%Y-%m-%d") for d in stats['dates']]
+        }
+    
+    return result
+
+
+def find_missing_dimensions(records: List[Dict], candidate_dimensions: List[str], days_threshold: int = 30) -> List[str]:
+    """查找缺失的维度(超过N天未出现)"""
+    now = datetime.now()
+    missing = []
+    
+    for dim in candidate_dimensions:
+        # 查找该维度最后一次出现的时间
+        last_seen = None
+        for record in records:
+            dimensions = record.get('dimensions', [])
+            if dim in dimensions:
+                date_str = record.get('date', '')
+                record_date = parse_date(date_str)
+                if record_date and (last_seen is None or record_date > last_seen):
+                    last_seen = record_date
+        
+        # 如果从未出现或超过阈值天数,加入缺失列表
+        if last_seen is None:
+            missing.append(dim)
+        else:
+            days_diff = (now - last_seen).days
+            if days_diff > days_threshold:
+                missing.append(dim)
+    
+    return missing
+
+
+# ==================== 优先级计算功能 ====================
+
+def calculate_dimension_priority(records: List[Dict]) -> Dict[str, float]:
+    """计算维度优先级分数(仅基于出现频率)"""
+    stats = count_dimension_frequency(records)
+    priorities = {}
+    
+    for dim, dim_stats in stats.items():
+        priorities[dim] = dim_stats['frequency_rate']
+    
+    return priorities
+
+
+# ==================== 建议生成功能 ====================
+
+def suggest_add_dimensions(records: List[Dict], candidate_dimensions: List[str], threshold_days: int = 30) -> List[Dict]:
+    """建议添加缺失但重要的维度"""
+    missing = find_missing_dimensions(records, candidate_dimensions, threshold_days)
+    suggestions = []
+    
+    for dim in missing:
+        # 计算建议的优先级(如果该维度曾经出现过,使用历史频率;否则使用默认值 0.5)
+        stats = count_dimension_frequency(records)
+        suggested_priority = stats.get(dim, {}).get('frequency_rate', 0.5)
+        
+        suggestions.append({
+            "suggestion_id": f"add_{dim}_{datetime.now().strftime('%Y%m%d')}",
+            "type": "add",
+            "dimension": dim,
+            "reason": f"已有{threshold_days}天未在记录中出现,但候选维度列表中",
+            "recommendation": "建议添加该维度",
+            "suggested_priority": round(suggested_priority, 2)
+        })
+    
+    return suggestions
+
+
+def suggest_remove_dimensions(records: List[Dict], active_dimensions: List[str], threshold_days: int = 60) -> List[Dict]:
+    """建议删除长期未出现的维度"""
+    stats = count_dimension_frequency(records)
+    suggestions = []
+    
+    now = datetime.now()
+    
+    for dim in active_dimensions:
+        dim_stat = stats.get(dim, {})
+        last_seen_str = dim_stat.get('last_seen')
+        
+        if not last_seen_str:
+            # 从未出现过
+            suggestions.append({
+                "suggestion_id": f"remove_{dim}_{datetime.now().strftime('%Y%m%d')}",
+                "type": "remove",
+                "dimension": dim,
+                "reason": "从未在记录中出现",
+                "recommendation": "建议删除该维度"
+            })
+        else:
+            last_seen = parse_date(last_seen_str)
+            if last_seen:
+                days_diff = (now - last_seen).days
+                if days_diff > threshold_days:
+                    suggestions.append({
+                        "suggestion_id": f"remove_{dim}_{datetime.now().strftime('%Y%m%d')}",
+                        "type": "remove",
+                        "dimension": dim,
+                        "reason": f"已有{days_diff}天未在记录中出现",
+                        "recommendation": "建议删除该维度"
+                    })
+    
+    return suggestions
+
+
+def suggest_priority_adjustment(records: List[Dict], dimension_config: Dict) -> List[Dict]:
+    """建议调整频繁出现维度的优先级"""
+    stats = count_dimension_frequency(records)
+    priorities = calculate_dimension_priority(records)
+    suggestions = []
+    
+    active_dimensions = dimension_config.get('active_dimensions', [])
+    
+    for dim_info in active_dimensions:
+        dim_name = dim_info.get('name')
+        current_priority = dim_info.get('priority', 0.0)
+        dim_stat = stats.get(dim_name, {})
+        frequency_rate = dim_stat.get('frequency_rate', 0.0)
+        
+        # 如果频率 > 70% 且当前优先级 < 频率,建议提升
+        if frequency_rate > 0.7 and current_priority < frequency_rate:
+            suggestions.append({
+                "suggestion_id": f"priority_{dim_name}_{datetime.now().strftime('%Y%m%d')}",
+                "type": "priority_adjustment",
+                "dimension": dim_name,
+                "reason": f"最近出现频率达{frequency_rate*100:.1f}%,但当前优先级仅为{current_priority:.2f}",
+                "current_priority": current_priority,
+                "suggested_priority": round(frequency_rate, 2),
+                "recommendation": "建议提高该维度的优先级"
+            })
+    
+    return suggestions
+
+
+def generate_all_suggestions(records: List[Dict], dimension_config: Dict) -> Dict[str, List[Dict]]:
+    """生成所有建议的综合报告"""
+    all_records = records
+    
+    # 获取当前配置
+    active_dimensions = [d['name'] for d in dimension_config.get('active_dimensions', [])]
+    candidate_dimensions = dimension_config.get('candidate_dimensions', [])
+    
+    # 生成各类建议
+    add_suggestions = suggest_add_dimensions(all_records, candidate_dimensions, threshold_days=30)
+    remove_suggestions = suggest_remove_dimensions(all_records, active_dimensions, threshold_days=60)
+    priority_suggestions = suggest_priority_adjustment(all_records, dimension_config)
+    
+    return {
+        "add": add_suggestions,
+        "remove": remove_suggestions,
+        "priority_adjustment": priority_suggestions
+    }
+
+
+# ==================== 维度与Themes对比功能 ====================
+
+def count_dimension_frequency_from_extractions(extraction_results: List[Dict]) -> Dict[str, Dict]:
+    """从提取结果中统计维度频率"""
+    dimension_stats = defaultdict(lambda: {
+        'frequency': 0,
+        'dates': [],
+        'first_seen': None,
+        'last_seen': None
+    })
+    
+    total_extractions = len(extraction_results)
+    
+    for result in extraction_results:
+        dimensions = result.get('dimensions', [])
+        extraction_date_str = result.get('extraction_date', result.get('report_date', ''))
+        extraction_date = parse_date(extraction_date_str.split('T')[0])  # 只取日期部分
+        
+        for dim in dimensions:
+            if dim:
+                dimension_stats[dim]['frequency'] += 1
+                if extraction_date:
+                    dimension_stats[dim]['dates'].append(extraction_date)
+                    if dimension_stats[dim]['first_seen'] is None or extraction_date < dimension_stats[dim]['first_seen']:
+                        dimension_stats[dim]['first_seen'] = extraction_date
+                    if dimension_stats[dim]['last_seen'] is None or extraction_date > dimension_stats[dim]['last_seen']:
+                        dimension_stats[dim]['last_seen'] = extraction_date
+    
+    # 计算频率率和格式化日期
+    result = {}
+    for dim, stats in dimension_stats.items():
+        result[dim] = {
+            'frequency': stats['frequency'],
+            'frequency_rate': stats['frequency'] / total_extractions if total_extractions > 0 else 0.0,
+            'first_seen': stats['first_seen'].strftime("%Y-%m-%d") if stats['first_seen'] else None,
+            'last_seen': stats['last_seen'].strftime("%Y-%m-%d") if stats['last_seen'] else None,
+        }
+    
+    return result
+
+
+def analyze_theme_dimension_match(themes: List[str], extraction_results: List[Dict], days_window: int = 30) -> Dict[str, Dict]:
+    """分析themes与维度的匹配度"""
+    now = datetime.now()
+    
+    # 统计维度频率
+    dim_stats = count_dimension_frequency_from_extractions(extraction_results)
+    
+    # 过滤最近N天的提取结果
+    recent_results = []
+    for result in extraction_results:
+        extraction_date_str = result.get('extraction_date', result.get('report_date', ''))
+        extraction_date = parse_date(extraction_date_str.split('T')[0])
+        if extraction_date:
+            days_diff = (now - extraction_date).days
+            if days_diff <= days_window:
+                recent_results.append(result)
+    
+    # 统计最近N天内的维度
+    recent_dim_stats = count_dimension_frequency_from_extractions(recent_results)
+    
+    theme_match = {}
+    
+    for theme in themes:
+        # 计算theme在提取维度中的匹配情况
+        match_count = 0
+        total_count = len(recent_results)
+        
+        for result in recent_results:
+            dimensions = result.get('dimensions', [])
+            # 简单匹配:theme是否在维度列表中(可以考虑更复杂的相似度匹配)
+            if theme in dimensions:
+                match_count += 1
+        
+        match_rate = match_count / total_count if total_count > 0 else 0.0
+        
+        # 计算最近一次匹配的时间
+        last_match_date = None
+        for result in recent_results:
+            dimensions = result.get('dimensions', [])
+            if theme in dimensions:
+                extraction_date_str = result.get('extraction_date', result.get('report_date', ''))
+                extraction_date = parse_date(extraction_date_str.split('T')[0])
+                if extraction_date:
+                    if last_match_date is None or extraction_date > last_match_date:
+                        last_match_date = extraction_date
+        
+        theme_match[theme] = {
+            'match_rate': match_rate,
+            'match_count': match_count,
+            'total_count': total_count,
+            'last_match_date': last_match_date.strftime("%Y-%m-%d") if last_match_date else None,
+            'days_without_match': (now - last_match_date).days if last_match_date else days_window
+        }
+    
+    return theme_match
+
+
+def suggest_add_themes(dim_stats: Dict[str, Dict], themes: List[str], threshold_frequency: float = 0.5, min_recent_count: int = 3, days_window: int = 30) -> List[Dict]:
+    """建议添加新themes(维度中出现但themes中没有的)"""
+    suggestions = []
+    now = datetime.now()
+    
+    for dim, stats in dim_stats.items():
+        # 如果维度不在themes中
+        if dim not in themes:
+            frequency_rate = stats.get('frequency_rate', 0.0)
+            last_seen_str = stats.get('last_seen')
+            
+            # 检查最近出现次数
+            recent_count = 0
+            if last_seen_str:
+                last_seen = parse_date(last_seen_str)
+                if last_seen:
+                    days_diff = (now - last_seen).days
+                    if days_diff <= days_window:
+                        # 估算最近出现次数(简化:假设频率一致)
+                        recent_count = int(frequency_rate * (days_window / 7))  # 粗略估算
+            
+            # 如果频率达到阈值且最近有出现
+            if frequency_rate >= threshold_frequency and recent_count >= min_recent_count:
+                suggestions.append({
+                    "suggestion_id": f"add_theme_{dim}_{datetime.now().strftime('%Y%m%d')}",
+                    "type": "add_theme",
+                    "theme": dim,
+                    "reason": f"从报告中提取的维度'{dim}'出现频率{frequency_rate*100:.1f}%,最近{days_window}天出现{recent_count}次",
+                    "source_dimensions": [dim],
+                    "frequency": frequency_rate,
+                    "recent_count": recent_count
+                })
+    
+    return suggestions
+
+
+def suggest_remove_themes(theme_match: Dict[str, Dict], threshold_frequency: float = 0.1, min_days: int = 60) -> List[Dict]:
+    """建议删除themes(长期与维度不匹配的)"""
+    suggestions = []
+    
+    for theme, match_info in theme_match.items():
+        match_rate = match_info.get('match_rate', 0.0)
+        days_without_match = match_info.get('days_without_match', 0)
+        
+        # 如果匹配率低于阈值且持续时间超过阈值
+        if match_rate < threshold_frequency and days_without_match >= min_days:
+            suggestions.append({
+                "suggestion_id": f"remove_theme_{theme}_{datetime.now().strftime('%Y%m%d')}",
+                "type": "remove_theme",
+                "theme": theme,
+                "reason": f"过去{min_days}天内,'{theme}'在提取维度中的匹配率仅{match_rate*100:.1f}%,且已有{days_without_match}天未匹配",
+                "match_rate": match_rate,
+                "days_without_match": days_without_match
+            })
+    
+    return suggestions
+
+
+def generate_theme_suggestions(extraction_results: List[Dict], themes: List[str]) -> Dict[str, List[Dict]]:
+    """生成themes修正建议"""
+    # 统计维度频率
+    dim_stats = count_dimension_frequency_from_extractions(extraction_results)
+    
+    # 分析themes匹配度(使用30天窗口)
+    theme_match = analyze_theme_dimension_match(themes, extraction_results, days_window=30)
+    
+    # 生成建议
+    add_suggestions = suggest_add_themes(dim_stats, themes, threshold_frequency=0.5, min_recent_count=3, days_window=30)
+    remove_suggestions = suggest_remove_themes(theme_match, threshold_frequency=0.1, min_days=60)
+    
+    return {
+        "add": add_suggestions,
+        "remove": remove_suggestions,
+        "theme_match_analysis": theme_match
+    }
+
+
+# ==================== 配置文件管理 ====================
+
+def load_dimension_config(config_file: Path) -> Dict:
+    """加载维度配置文件"""
+    if config_file.exists():
+        try:
+            with open(config_file, 'r', encoding='utf-8') as f:
+                return json.load(f)
+        except Exception as e:
+            print(f"⚠️  加载配置文件失败: {e}")
+    
+    # 返回默认配置
+    return {
+        "active_dimensions": [],
+        "candidate_dimensions": [],
+        "removed_dimensions": []
+    }
+
+
+def save_dimension_config(config_file: Path, config: Dict):
+    """保存维度配置文件"""
+    config_file.parent.mkdir(parents=True, exist_ok=True)
+    with open(config_file, 'w', encoding='utf-8') as f:
+        json.dump(config, f, indent=2, ensure_ascii=False)
+
+
+def load_dimension_history(history_file: Path) -> List[Dict]:
+    """加载维度历史记录"""
+    if history_file.exists():
+        try:
+            with open(history_file, 'r', encoding='utf-8') as f:
+                data = json.load(f)
+                return data.get('history', [])
+        except Exception as e:
+            print(f"⚠️  加载历史记录失败: {e}")
+    
+    return []
+
+
+def save_dimension_history(history_file: Path, history: List[Dict]):
+    """保存维度历史记录"""
+    history_file.parent.mkdir(parents=True, exist_ok=True)
+    data = {"history": history}
+    with open(history_file, 'w', encoding='utf-8') as f:
+        json.dump(data, f, indent=2, ensure_ascii=False)
+
+
+def record_dimension_event(event_type: str, dimension: str, timestamp: str = None, metadata: Dict = None) -> Dict:
+    """记录维度事件(ADD/REMOVE/PRIORITY_CHANGE)"""
+    if timestamp is None:
+        timestamp = datetime.now().strftime("%Y-%m-%d")
+    
+    event = {
+        "date": timestamp,
+        "event": event_type,
+        "dimension": dimension
+    }
+    
+    if metadata:
+        event.update(metadata)
+    
+    return event
+
+
+# ==================== 用户交互功能 ====================
+
+def present_suggestions(suggestions: Dict[str, List[Dict]]) -> None:
+    """展示系统建议给用户(简单文本)"""
+    print("\n" + "=" * 70)
+    print("📋 维度调整建议")
+    print("=" * 70)
+    
+    all_count = sum(len(v) for v in suggestions.values())
+    if all_count == 0:
+        print("✅ 暂无建议")
+        return
+    
+    # 展示新增建议
+    if suggestions.get('add'):
+        print("\n【新增维度建议】")
+        for i, sug in enumerate(suggestions['add'], 1):
+            print(f"  {i}. {sug['dimension']}")
+            print(f"     原因: {sug['reason']}")
+            print(f"     建议优先级: {sug['suggested_priority']}")
+    
+    # 展示删除建议
+    if suggestions.get('remove'):
+        print("\n【删除维度建议】")
+        for i, sug in enumerate(suggestions['remove'], 1):
+            print(f"  {i}. {sug['dimension']}")
+            print(f"     原因: {sug['reason']}")
+    
+    # 展示优先级调整建议
+    if suggestions.get('priority_adjustment'):
+        print("\n【优先级调整建议】")
+        for i, sug in enumerate(suggestions['priority_adjustment'], 1):
+            print(f"  {i}. {sug['dimension']}")
+            print(f"     原因: {sug['reason']}")
+            print(f"     当前优先级: {sug['current_priority']:.2f} → 建议: {sug['suggested_priority']:.2f}")
+    
+    print("\n" + "=" * 70)
+
+
+def get_user_confirmation(suggestion: Dict) -> str:
+    """获取用户确认(接受/拒绝,简单输入)"""
+    print(f"\n建议: {suggestion['recommendation']}")
+    print(f"维度: {suggestion['dimension']}")
+    print(f"原因: {suggestion['reason']}")
+    
+    while True:
+        user_input = input("接受 (y) / 拒绝 (n): ").strip().lower()
+        if user_input in ['y', 'yes', '是', '接受']:
+            return 'accepted'
+        elif user_input in ['n', 'no', '否', '拒绝']:
+            return 'rejected'
+        else:
+            print("⚠️  请输入 y 或 n")
+
+
+def format_history_text(history: List[Dict]) -> str:
+    """格式化历史记录为简单文本"""
+    if not history:
+        return "暂无历史记录"
+    
+    lines = ["维度演化历史:"]
+    lines.append("-" * 70)
+    
+    for event in history:
+        date = event.get('date', '')
+        event_type = event.get('event', '')
+        dimension = event.get('dimension', '')
+        
+        if event_type == "ADD":
+            info = f"新增维度"
+        elif event_type == "REMOVE":
+            info = f"删除维度"
+        elif event_type == "PRIORITY_CHANGE":
+            old_priority = event.get('old_priority', '')
+            new_priority = event.get('new_priority', '')
+            info = f"优先级调整: {old_priority} → {new_priority}"
+        else:
+            info = event_type
+        
+        lines.append(f"{date} | {event_type} | {dimension} | {info}")
+    
+    return "\n".join(lines)
+

+ 16 - 16
Co-creation-projects/Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/env.example → Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/env.example

@@ -1,16 +1,16 @@
-# 环境变量配置模板
-# 复制此文件为 .env 并填入你的实际配置值
-
-# LLM配置(用于维度提取和研究报告生成)
-LLM_API_KEY=your_llm_api_key_here
-LLM_MODEL_ID=qwen-plus
-LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
-LLM_TIMEOUT=60
-
-# YouTube API配置(用于视频搜索)
-YOUTUBE_API_KEY=your_youtube_api_key_here
-
-# 其他可选配置
-DASHSCOPE_API_KEY=your_dashscope_api_key_here
-MODELSCOPE_API_KEY=your_modelscope_api_key_here
-
+# 环境变量配置模板
+# 复制此文件为 .env 并填入你的实际配置值
+
+# LLM配置(用于维度提取和研究报告生成)
+LLM_API_KEY=your_llm_api_key_here
+LLM_MODEL_ID=qwen-plus
+LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
+LLM_TIMEOUT=60
+
+# YouTube API配置(用于视频搜索)
+YOUTUBE_API_KEY=your_youtube_api_key_here
+
+# 其他可选配置
+DASHSCOPE_API_KEY=your_dashscope_api_key_here
+MODELSCOPE_API_KEY=your_modelscope_api_key_here
+

+ 382 - 382
Co-creation-projects/Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/extract_dimensions.py → Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/extract_dimensions.py

@@ -1,382 +1,382 @@
-"""
-维度提取模块 - 使用LLM从报告中提取维度
-"""
-
-import sys
-import json
-import os
-from pathlib import Path
-from datetime import datetime
-from typing import List, Dict, Optional
-
-# 设置控制台编码为UTF-8(Windows)
-# 注意:只在作为主脚本运行时重定向,避免在被导入时冲突
-if sys.platform == 'win32' and __name__ == "__main__":
-    import io
-    if not isinstance(sys.stdout, io.TextIOWrapper):
-        sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
-    if not isinstance(sys.stderr, io.TextIOWrapper):
-        sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
-
-# 加载 .env 文件(如果存在)
-try:
-    from dotenv import load_dotenv
-    load_dotenv()
-except ImportError:
-    pass
-
-# 导入LLM
-try:
-    from hello_agents.core.llm import HelloAgentsLLM
-    LLM_AVAILABLE = True
-except ImportError:
-    LLM_AVAILABLE = False
-    print("⚠️  警告: hello_agents 模块未安装,无法使用LLM提取维度")
-
-
-def init_llm():
-    """初始化LLM"""
-    if not LLM_AVAILABLE:
-        return None
-    
-    # 从环境变量读取LLM配置
-    llm_model = (
-        os.getenv("LLM_MODEL") or
-        os.getenv("LLM_MODEL_ID") or
-        "qwen-plus"
-    )
-    llm_api_key = (
-        os.getenv("LLM_API_KEY") or
-        os.getenv("MODELSCOPE_API_KEY") or
-        os.getenv("MODELSCOPE_API_TOKEN")
-    )
-    llm_base_url = (
-        os.getenv("LLM_BASE_URL") or
-        "https://api-inference.modelscope.cn/v1/"
-    )
-    llm_provider = os.getenv("LLM_PROVIDER", "modelscope")
-    
-    if not llm_api_key:
-        print("⚠️  警告: 未找到LLM API Key")
-        return None
-    
-    try:
-        llm = HelloAgentsLLM(
-            model=llm_model,
-            api_key=llm_api_key,
-            base_url=llm_base_url,
-            provider=llm_provider
-        )
-        return llm
-    except Exception as e:
-        print(f"⚠️  初始化LLM失败: {e}")
-        return None
-
-
-def extract_json_from_text(text: str) -> Optional[Dict]:
-    """从文本中提取JSON内容"""
-    import re
-    
-    # 尝试直接解析
-    try:
-        return json.loads(text.strip())
-    except json.JSONDecodeError:
-        pass
-    
-    # 尝试提取JSON代码块
-    json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
-    if json_match:
-        try:
-            return json.loads(json_match.group(1))
-        except json.JSONDecodeError:
-            pass
-    
-    # 尝试提取第一个完整的JSON对象
-    json_match = re.search(r'\{.*\}', text, re.DOTALL)
-    if json_match:
-        try:
-            return json.loads(json_match.group(0))
-        except json.JSONDecodeError:
-            pass
-    
-    return None
-
-
-def extract_dimensions_from_text(text: str, llm, existing_themes: List[str] = None) -> Dict:
-    """从报告文本中提取维度
-    
-    Args:
-        text: 报告文本内容
-        llm: LLM实例
-        existing_themes: 现有的themes列表,用于参考抽象级别
-    """
-    if not llm:
-        return {"dimensions": [], "confidence": 0.0, "error": "LLM未初始化"}
-    
-    themes_hint = ""
-    if existing_themes:
-        themes_hint = f"\n参考现有themes的风格(这些是用户已经定义的兴趣主题):{existing_themes}\n提取的维度应该与这些themes在抽象级别上保持一致。"
-    
-    prompt = f"""请从以下用户报告中提取3-8个维度(dimensions)。维度应该是用户关注的**高级别的主题、领域或兴趣点**,而不是简单的名词拆分。
-
-报告内容:
-{text}
-{themes_hint}
-
-**提取原则**:
-1. **保持概念完整性**:如果报告中提到"信息信号系统"这样的完整概念,应该提取为"信息信号系统"或"系统",而不要拆成"信息"、"信号"、"系统"三个词
-2. **提取主题级别**:维度应该是主题级别的概念(如"AI"、"健康"、"工作"),而不是具体细节(如"更新"、"今天"、"高兴")
-3. **过滤无关词**:
-   - 过滤掉动作词(如:更新、创建、删除)
-   - 过滤掉时间词(如:今天、昨天、本周)
-   - 过滤掉情绪词(如:高兴、难过),除非情绪本身是报告的主题
-   - 过滤掉过于通用的词(如:事情、内容、问题)
-4. **理解语义上下文**:理解整个句子的含义,提取其背后关注的主题
-5. **抽象层次**:维度应该是足够抽象的主题,可以作为YouTube搜索关键词或兴趣标签使用
-
-**示例**:
-- 报告:"今天很高兴,我们的信息信号系统再次迎来了更新"
-- ❌ 错误提取:["信息", "信号", "系统", "更新", "今天"]
-- ✅ 正确提取:["信息信号系统"] 或 ["系统"] 或 ["技术系统"]
-
-请以JSON格式返回维度列表:
-{{
-  "dimensions": ["维度1", "维度2", "维度3"],
-  "confidence": 0.85,
-  "reasoning": "简要说明提取理由"
-}}
-
-要求:
-- 维度数量:3-8个(根据报告内容的重要性决定)
-- 维度格式:简洁的主题词(2-8个字),保持概念的完整性
-- confidence:提取的置信度(0-1之间)
-- reasoning:简要说明为什么提取这些维度
-
-请直接返回JSON,不要包含其他文字。"""
-
-    try:
-        messages = [
-            {"role": "system", "content": "你是一个专业的文本分析助手,擅长从文本中提取高级别的主题和兴趣维度。你会理解语义上下文,保持概念的完整性,不会简单地进行分词。"},
-            {"role": "user", "content": prompt}
-        ]
-        
-        response = llm.invoke(messages)
-        
-        # 提取JSON
-        result = extract_json_from_text(response)
-        
-        if result and "dimensions" in result:
-            return {
-                "dimensions": result["dimensions"],
-                "confidence": result.get("confidence", 0.8),
-                "reasoning": result.get("reasoning", "")
-            }
-        else:
-            print(f"⚠️  LLM返回格式不正确: {response[:200]}")
-            return {"dimensions": [], "confidence": 0.0, "error": "格式解析失败"}
-    
-    except Exception as e:
-        print(f"⚠️  提取维度失败: {e}")
-        return {"dimensions": [], "confidence": 0.0, "error": str(e)}
-
-
-def extract_dimensions_from_report(report_file: Path, llm, existing_themes: List[str] = None) -> Optional[Dict]:
-    """从Markdown文件中提取维度
-    
-    Args:
-        report_file: 报告文件路径
-        llm: LLM实例
-        existing_themes: 现有的themes列表,用于参考抽象级别
-    """
-    if not report_file.exists():
-        print(f"❌ 报告文件不存在: {report_file}")
-        return None
-    
-    try:
-        with open(report_file, 'r', encoding='utf-8') as f:
-            content = f.read()
-        
-        # 移除Markdown标题(如果存在)
-        lines = content.split('\n')
-        # 跳过开头的#标题行
-        content_lines = []
-        for line in lines:
-            if line.strip().startswith('#') and not content_lines:
-                continue
-            content_lines.append(line)
-        text = '\n'.join(content_lines).strip()
-        
-        if not text:
-            print(f"⚠️  报告内容为空: {report_file}")
-            return None
-        
-        # 提取维度(传入existing_themes)
-        result = extract_dimensions_from_text(text, llm, existing_themes=existing_themes)
-        
-        # 添加报告信息
-        result["report_file"] = str(report_file)
-        result["report_date"] = report_file.stem
-        result["extraction_date"] = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
-        
-        return result
-    
-    except Exception as e:
-        print(f"❌ 读取报告失败 {report_file}: {e}")
-        return None
-
-
-def save_extraction_result(base_dir: Path, result: Dict, report_type: str):
-    """保存提取结果"""
-    dimensions_dir = base_dir / "archive" / "dimensions"
-    dimensions_dir.mkdir(parents=True, exist_ok=True)
-    
-    # 根据报告日期生成文件名
-    report_date = result.get("report_date", datetime.now().strftime("%Y-%m-%d"))
-    output_file = dimensions_dir / f"{report_date}_{report_type}_dimensions.json"
-    
-    try:
-        with open(output_file, 'w', encoding='utf-8') as f:
-            json.dump(result, f, indent=2, ensure_ascii=False)
-        print(f"✅ 维度提取结果已保存: {output_file}")
-        return output_file
-    except Exception as e:
-        print(f"❌ 保存失败: {e}")
-        return None
-
-
-def batch_extract_dimensions(base_dir: Path, report_type: str = None, llm=None, existing_themes: List[str] = None) -> List[Dict]:
-    """批量提取维度
-    
-    Args:
-        base_dir: 基础目录路径
-        report_type: 报告类型(daily/weekly/monthly),None表示处理所有类型
-        llm: LLM实例
-        existing_themes: 现有的themes列表,如果为None则自动从themes.yaml加载
-    """
-    if not llm:
-        llm = init_llm()
-        if not llm:
-            print("❌ LLM未初始化,无法提取维度")
-            return []
-    
-    # 如果没有传入existing_themes,尝试从themes.yaml加载
-    if existing_themes is None:
-        try:
-            # 避免循环导入,直接在这里读取yaml
-            import yaml
-            themes_file = base_dir / "themes.yaml"
-            if themes_file.exists():
-                with open(themes_file, 'r', encoding='utf-8') as f:
-                    data = yaml.safe_load(f)
-                    if data and isinstance(data, dict):
-                        existing_themes = data.get('themes', [])
-                    else:
-                        existing_themes = []
-                if existing_themes:
-                    print(f"📌 已加载 {len(existing_themes)} 个现有themes作为参考: {existing_themes}")
-        except Exception as e:
-            print(f"⚠️  加载themes.yaml失败,将不参考现有themes: {e}")
-            existing_themes = []
-    
-    reports_dir = base_dir / "archive" / "reports"
-    results = []
-    
-    # 确定要处理的报告类型
-    types_to_process = [report_type] if report_type else ["daily", "weekly", "monthly"]
-    
-    for rtype in types_to_process:
-        type_dir = reports_dir / rtype
-        if not type_dir.exists():
-            continue
-        
-        print(f"\n📂 处理{rtype}报告...")
-        report_files = sorted(type_dir.glob("*.md"))
-        
-        for report_file in report_files:
-            print(f"  处理: {report_file.name}")
-            result = extract_dimensions_from_report(report_file, llm, existing_themes=existing_themes)
-            
-            if result and result.get("dimensions"):
-                # 添加报告类型
-                result["report_type"] = rtype
-                
-                # 保存提取结果
-                save_extraction_result(base_dir, result, rtype)
-                
-                results.append(result)
-                print(f"    ✅ 提取到 {len(result['dimensions'])} 个维度: {', '.join(result['dimensions'][:5])}")
-                # 如果有reasoning,也显示出来(用于调试)
-                if result.get("reasoning"):
-                    print(f"       推理: {result['reasoning'][:100]}...")
-            else:
-                print(f"    ⚠️  未提取到维度")
-    
-    return results
-
-
-def load_extraction_results(base_dir: Path) -> List[Dict]:
-    """加载所有提取结果"""
-    dimensions_dir = base_dir / "archive" / "dimensions"
-    
-    if not dimensions_dir.exists():
-        return []
-    
-    results = []
-    for json_file in dimensions_dir.glob("*_dimensions.json"):
-        try:
-            with open(json_file, 'r', encoding='utf-8') as f:
-                result = json.load(f)
-                results.append(result)
-        except Exception as e:
-            print(f"⚠️  读取提取结果失败 {json_file.name}: {e}")
-    
-    return results
-
-
-if __name__ == "__main__":
-    # 命令行工具
-    import argparse
-    
-    parser = argparse.ArgumentParser(description="从报告中提取维度")
-    parser.add_argument("--report-type", choices=["daily", "weekly", "monthly"], 
-                       help="指定报告类型(不指定则处理所有类型)")
-    parser.add_argument("--report-file", type=str,
-                       help="指定单个报告文件路径")
-    parser.add_argument("--base-dir", type=str,
-                       help="基础目录路径(默认为脚本所在目录)")
-    
-    args = parser.parse_args()
-    
-    base_dir = Path(args.base_dir) if args.base_dir else Path(__file__).parent
-    
-    llm = init_llm()
-    if not llm:
-        print("❌ 无法初始化LLM,退出")
-        sys.exit(1)
-    
-    # 加载existing_themes(如果存在)
-    existing_themes = None
-    try:
-        import yaml
-        themes_file = base_dir / "themes.yaml"
-        if themes_file.exists():
-            with open(themes_file, 'r', encoding='utf-8') as f:
-                data = yaml.safe_load(f)
-                if data and isinstance(data, dict):
-                    existing_themes = data.get('themes', [])
-    except Exception:
-        pass
-    
-    if args.report_file:
-        # 处理单个文件
-        report_file = Path(args.report_file)
-        result = extract_dimensions_from_report(report_file, llm, existing_themes=existing_themes)
-        if result:
-            report_type = result.get("report_type", "daily")
-            save_extraction_result(base_dir, result, report_type)
-            print(f"\n提取的维度: {result.get('dimensions', [])}")
-    else:
-        # 批量处理
-        results = batch_extract_dimensions(base_dir, args.report_type, llm, existing_themes=existing_themes)
-        print(f"\n✅ 共处理 {len(results)} 个报告")
-
+"""
+维度提取模块 - 使用LLM从报告中提取维度
+"""
+
+import sys
+import json
+import os
+from pathlib import Path
+from datetime import datetime
+from typing import List, Dict, Optional
+
+# 设置控制台编码为UTF-8(Windows)
+# 注意:只在作为主脚本运行时重定向,避免在被导入时冲突
+if sys.platform == 'win32' and __name__ == "__main__":
+    import io
+    if not isinstance(sys.stdout, io.TextIOWrapper):
+        sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+    if not isinstance(sys.stderr, io.TextIOWrapper):
+        sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
+
+# 加载 .env 文件(如果存在)
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    pass
+
+# 导入LLM
+try:
+    from hello_agents.core.llm import HelloAgentsLLM
+    LLM_AVAILABLE = True
+except ImportError:
+    LLM_AVAILABLE = False
+    print("⚠️  警告: hello_agents 模块未安装,无法使用LLM提取维度")
+
+
+def init_llm():
+    """初始化LLM"""
+    if not LLM_AVAILABLE:
+        return None
+    
+    # 从环境变量读取LLM配置
+    llm_model = (
+        os.getenv("LLM_MODEL") or
+        os.getenv("LLM_MODEL_ID") or
+        "qwen-plus"
+    )
+    llm_api_key = (
+        os.getenv("LLM_API_KEY") or
+        os.getenv("MODELSCOPE_API_KEY") or
+        os.getenv("MODELSCOPE_API_TOKEN")
+    )
+    llm_base_url = (
+        os.getenv("LLM_BASE_URL") or
+        "https://api-inference.modelscope.cn/v1/"
+    )
+    llm_provider = os.getenv("LLM_PROVIDER", "modelscope")
+    
+    if not llm_api_key:
+        print("⚠️  警告: 未找到LLM API Key")
+        return None
+    
+    try:
+        llm = HelloAgentsLLM(
+            model=llm_model,
+            api_key=llm_api_key,
+            base_url=llm_base_url,
+            provider=llm_provider
+        )
+        return llm
+    except Exception as e:
+        print(f"⚠️  初始化LLM失败: {e}")
+        return None
+
+
+def extract_json_from_text(text: str) -> Optional[Dict]:
+    """从文本中提取JSON内容"""
+    import re
+    
+    # 尝试直接解析
+    try:
+        return json.loads(text.strip())
+    except json.JSONDecodeError:
+        pass
+    
+    # 尝试提取JSON代码块
+    json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
+    if json_match:
+        try:
+            return json.loads(json_match.group(1))
+        except json.JSONDecodeError:
+            pass
+    
+    # 尝试提取第一个完整的JSON对象
+    json_match = re.search(r'\{.*\}', text, re.DOTALL)
+    if json_match:
+        try:
+            return json.loads(json_match.group(0))
+        except json.JSONDecodeError:
+            pass
+    
+    return None
+
+
+def extract_dimensions_from_text(text: str, llm, existing_themes: List[str] = None) -> Dict:
+    """从报告文本中提取维度
+    
+    Args:
+        text: 报告文本内容
+        llm: LLM实例
+        existing_themes: 现有的themes列表,用于参考抽象级别
+    """
+    if not llm:
+        return {"dimensions": [], "confidence": 0.0, "error": "LLM未初始化"}
+    
+    themes_hint = ""
+    if existing_themes:
+        themes_hint = f"\n参考现有themes的风格(这些是用户已经定义的兴趣主题):{existing_themes}\n提取的维度应该与这些themes在抽象级别上保持一致。"
+    
+    prompt = f"""请从以下用户报告中提取3-8个维度(dimensions)。维度应该是用户关注的**高级别的主题、领域或兴趣点**,而不是简单的名词拆分。
+
+报告内容:
+{text}
+{themes_hint}
+
+**提取原则**:
+1. **保持概念完整性**:如果报告中提到"信息信号系统"这样的完整概念,应该提取为"信息信号系统"或"系统",而不要拆成"信息"、"信号"、"系统"三个词
+2. **提取主题级别**:维度应该是主题级别的概念(如"AI"、"健康"、"工作"),而不是具体细节(如"更新"、"今天"、"高兴")
+3. **过滤无关词**:
+   - 过滤掉动作词(如:更新、创建、删除)
+   - 过滤掉时间词(如:今天、昨天、本周)
+   - 过滤掉情绪词(如:高兴、难过),除非情绪本身是报告的主题
+   - 过滤掉过于通用的词(如:事情、内容、问题)
+4. **理解语义上下文**:理解整个句子的含义,提取其背后关注的主题
+5. **抽象层次**:维度应该是足够抽象的主题,可以作为YouTube搜索关键词或兴趣标签使用
+
+**示例**:
+- 报告:"今天很高兴,我们的信息信号系统再次迎来了更新"
+- ❌ 错误提取:["信息", "信号", "系统", "更新", "今天"]
+- ✅ 正确提取:["信息信号系统"] 或 ["系统"] 或 ["技术系统"]
+
+请以JSON格式返回维度列表:
+{{
+  "dimensions": ["维度1", "维度2", "维度3"],
+  "confidence": 0.85,
+  "reasoning": "简要说明提取理由"
+}}
+
+要求:
+- 维度数量:3-8个(根据报告内容的重要性决定)
+- 维度格式:简洁的主题词(2-8个字),保持概念的完整性
+- confidence:提取的置信度(0-1之间)
+- reasoning:简要说明为什么提取这些维度
+
+请直接返回JSON,不要包含其他文字。"""
+
+    try:
+        messages = [
+            {"role": "system", "content": "你是一个专业的文本分析助手,擅长从文本中提取高级别的主题和兴趣维度。你会理解语义上下文,保持概念的完整性,不会简单地进行分词。"},
+            {"role": "user", "content": prompt}
+        ]
+        
+        response = llm.invoke(messages)
+        
+        # 提取JSON
+        result = extract_json_from_text(response)
+        
+        if result and "dimensions" in result:
+            return {
+                "dimensions": result["dimensions"],
+                "confidence": result.get("confidence", 0.8),
+                "reasoning": result.get("reasoning", "")
+            }
+        else:
+            print(f"⚠️  LLM返回格式不正确: {response[:200]}")
+            return {"dimensions": [], "confidence": 0.0, "error": "格式解析失败"}
+    
+    except Exception as e:
+        print(f"⚠️  提取维度失败: {e}")
+        return {"dimensions": [], "confidence": 0.0, "error": str(e)}
+
+
+def extract_dimensions_from_report(report_file: Path, llm, existing_themes: List[str] = None) -> Optional[Dict]:
+    """从Markdown文件中提取维度
+    
+    Args:
+        report_file: 报告文件路径
+        llm: LLM实例
+        existing_themes: 现有的themes列表,用于参考抽象级别
+    """
+    if not report_file.exists():
+        print(f"❌ 报告文件不存在: {report_file}")
+        return None
+    
+    try:
+        with open(report_file, 'r', encoding='utf-8') as f:
+            content = f.read()
+        
+        # 移除Markdown标题(如果存在)
+        lines = content.split('\n')
+        # 跳过开头的#标题行
+        content_lines = []
+        for line in lines:
+            if line.strip().startswith('#') and not content_lines:
+                continue
+            content_lines.append(line)
+        text = '\n'.join(content_lines).strip()
+        
+        if not text:
+            print(f"⚠️  报告内容为空: {report_file}")
+            return None
+        
+        # 提取维度(传入existing_themes)
+        result = extract_dimensions_from_text(text, llm, existing_themes=existing_themes)
+        
+        # 添加报告信息
+        result["report_file"] = str(report_file)
+        result["report_date"] = report_file.stem
+        result["extraction_date"] = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
+        
+        return result
+    
+    except Exception as e:
+        print(f"❌ 读取报告失败 {report_file}: {e}")
+        return None
+
+
+def save_extraction_result(base_dir: Path, result: Dict, report_type: str):
+    """保存提取结果"""
+    dimensions_dir = base_dir / "archive" / "dimensions"
+    dimensions_dir.mkdir(parents=True, exist_ok=True)
+    
+    # 根据报告日期生成文件名
+    report_date = result.get("report_date", datetime.now().strftime("%Y-%m-%d"))
+    output_file = dimensions_dir / f"{report_date}_{report_type}_dimensions.json"
+    
+    try:
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(result, f, indent=2, ensure_ascii=False)
+        print(f"✅ 维度提取结果已保存: {output_file}")
+        return output_file
+    except Exception as e:
+        print(f"❌ 保存失败: {e}")
+        return None
+
+
+def batch_extract_dimensions(base_dir: Path, report_type: str = None, llm=None, existing_themes: List[str] = None) -> List[Dict]:
+    """批量提取维度
+    
+    Args:
+        base_dir: 基础目录路径
+        report_type: 报告类型(daily/weekly/monthly),None表示处理所有类型
+        llm: LLM实例
+        existing_themes: 现有的themes列表,如果为None则自动从themes.yaml加载
+    """
+    if not llm:
+        llm = init_llm()
+        if not llm:
+            print("❌ LLM未初始化,无法提取维度")
+            return []
+    
+    # 如果没有传入existing_themes,尝试从themes.yaml加载
+    if existing_themes is None:
+        try:
+            # 避免循环导入,直接在这里读取yaml
+            import yaml
+            themes_file = base_dir / "themes.yaml"
+            if themes_file.exists():
+                with open(themes_file, 'r', encoding='utf-8') as f:
+                    data = yaml.safe_load(f)
+                    if data and isinstance(data, dict):
+                        existing_themes = data.get('themes', [])
+                    else:
+                        existing_themes = []
+                if existing_themes:
+                    print(f"📌 已加载 {len(existing_themes)} 个现有themes作为参考: {existing_themes}")
+        except Exception as e:
+            print(f"⚠️  加载themes.yaml失败,将不参考现有themes: {e}")
+            existing_themes = []
+    
+    reports_dir = base_dir / "archive" / "reports"
+    results = []
+    
+    # 确定要处理的报告类型
+    types_to_process = [report_type] if report_type else ["daily", "weekly", "monthly"]
+    
+    for rtype in types_to_process:
+        type_dir = reports_dir / rtype
+        if not type_dir.exists():
+            continue
+        
+        print(f"\n📂 处理{rtype}报告...")
+        report_files = sorted(type_dir.glob("*.md"))
+        
+        for report_file in report_files:
+            print(f"  处理: {report_file.name}")
+            result = extract_dimensions_from_report(report_file, llm, existing_themes=existing_themes)
+            
+            if result and result.get("dimensions"):
+                # 添加报告类型
+                result["report_type"] = rtype
+                
+                # 保存提取结果
+                save_extraction_result(base_dir, result, rtype)
+                
+                results.append(result)
+                print(f"    ✅ 提取到 {len(result['dimensions'])} 个维度: {', '.join(result['dimensions'][:5])}")
+                # 如果有reasoning,也显示出来(用于调试)
+                if result.get("reasoning"):
+                    print(f"       推理: {result['reasoning'][:100]}...")
+            else:
+                print(f"    ⚠️  未提取到维度")
+    
+    return results
+
+
+def load_extraction_results(base_dir: Path) -> List[Dict]:
+    """加载所有提取结果"""
+    dimensions_dir = base_dir / "archive" / "dimensions"
+    
+    if not dimensions_dir.exists():
+        return []
+    
+    results = []
+    for json_file in dimensions_dir.glob("*_dimensions.json"):
+        try:
+            with open(json_file, 'r', encoding='utf-8') as f:
+                result = json.load(f)
+                results.append(result)
+        except Exception as e:
+            print(f"⚠️  读取提取结果失败 {json_file.name}: {e}")
+    
+    return results
+
+
+if __name__ == "__main__":
+    # 命令行工具
+    import argparse
+    
+    parser = argparse.ArgumentParser(description="从报告中提取维度")
+    parser.add_argument("--report-type", choices=["daily", "weekly", "monthly"], 
+                       help="指定报告类型(不指定则处理所有类型)")
+    parser.add_argument("--report-file", type=str,
+                       help="指定单个报告文件路径")
+    parser.add_argument("--base-dir", type=str,
+                       help="基础目录路径(默认为脚本所在目录)")
+    
+    args = parser.parse_args()
+    
+    base_dir = Path(args.base_dir) if args.base_dir else Path(__file__).parent
+    
+    llm = init_llm()
+    if not llm:
+        print("❌ 无法初始化LLM,退出")
+        sys.exit(1)
+    
+    # 加载existing_themes(如果存在)
+    existing_themes = None
+    try:
+        import yaml
+        themes_file = base_dir / "themes.yaml"
+        if themes_file.exists():
+            with open(themes_file, 'r', encoding='utf-8') as f:
+                data = yaml.safe_load(f)
+                if data and isinstance(data, dict):
+                    existing_themes = data.get('themes', [])
+    except Exception:
+        pass
+    
+    if args.report_file:
+        # 处理单个文件
+        report_file = Path(args.report_file)
+        result = extract_dimensions_from_report(report_file, llm, existing_themes=existing_themes)
+        if result:
+            report_type = result.get("report_type", "daily")
+            save_extraction_result(base_dir, result, report_type)
+            print(f"\n提取的维度: {result.get('dimensions', [])}")
+    else:
+        # 批量处理
+        results = batch_extract_dimensions(base_dir, args.report_type, llm, existing_themes=existing_themes)
+        print(f"\n✅ 共处理 {len(results)} 个报告")
+

+ 155 - 155
Co-creation-projects/Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/manage_themes.py → Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/manage_themes.py

@@ -1,155 +1,155 @@
-"""
-主题管理工具 - 管理themes.yaml文件
-"""
-
-import sys
-import yaml
-from pathlib import Path
-from typing import List
-
-# 设置控制台编码为UTF-8(Windows)
-# 注意:只在作为主脚本运行时重定向,避免在被导入时冲突
-if sys.platform == 'win32' and __name__ == "__main__":
-    import io
-    if not isinstance(sys.stdout, io.TextIOWrapper):
-        sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
-    if not isinstance(sys.stderr, io.TextIOWrapper):
-        sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
-
-
-def load_themes(themes_file: Path) -> List[str]:
-    """从themes.yaml加载themes"""
-    if not themes_file.exists():
-        return []
-    
-    try:
-        with open(themes_file, 'r', encoding='utf-8') as f:
-            data = yaml.safe_load(f)
-            if data is None:
-                return []
-            return data.get('themes', [])
-    except Exception as e:
-        print(f"⚠️  读取themes.yaml失败: {e}")
-        return []
-
-
-def save_themes(themes_file: Path, themes: List[str]):
-    """保存themes到themes.yaml"""
-    themes_file.parent.mkdir(parents=True, exist_ok=True)
-    
-    data = {'themes': themes}
-    
-    try:
-        with open(themes_file, 'w', encoding='utf-8') as f:
-            yaml.dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
-        print(f"✅ themes已保存到: {themes_file}")
-        return True
-    except Exception as e:
-        print(f"❌ 保存失败: {e}")
-        return False
-
-
-def add_theme(themes_file: Path, theme: str) -> bool:
-    """添加theme"""
-    themes = load_themes(themes_file)
-    
-    if theme in themes:
-        print(f"⚠️  theme '{theme}' 已存在")
-        return False
-    
-    themes.append(theme)
-    return save_themes(themes_file, themes)
-
-
-def remove_theme(themes_file: Path, theme: str) -> bool:
-    """删除theme"""
-    themes = load_themes(themes_file)
-    
-    if theme not in themes:
-        print(f"⚠️  theme '{theme}' 不存在")
-        return False
-    
-    themes.remove(theme)
-    return save_themes(themes_file, themes)
-
-
-def list_themes(themes_file: Path):
-    """列出所有themes"""
-    themes = load_themes(themes_file)
-    
-    if not themes:
-        print("📋 当前没有themes")
-        return
-    
-    print(f"📋 当前themes ({len(themes)}个):")
-    print("-" * 70)
-    for i, theme in enumerate(themes, 1):
-        print(f"  {i}. {theme}")
-    print("-" * 70)
-
-
-def interactive_theme_management(base_dir: Path):
-    """交互式主题管理"""
-    themes_file = base_dir / "themes.yaml"
-    
-    while True:
-        print("\n" + "=" * 70)
-        print("主题管理")
-        print("=" * 70)
-        list_themes(themes_file)
-        
-        print("\n请选择操作:")
-        print("  1. 添加theme")
-        print("  2. 删除theme")
-        print("  3. 查看themes")
-        print("  0. 退出")
-        
-        choice = input("\n请选择 (0-3): ").strip()
-        
-        if choice == "0":
-            break
-        elif choice == "1":
-            theme = input("请输入要添加的theme: ").strip()
-            if theme:
-                if add_theme(themes_file, theme):
-                    print(f"✅ 已添加theme: {theme}")
-        elif choice == "2":
-            theme = input("请输入要删除的theme: ").strip()
-            if theme:
-                confirm = input(f"确认删除 '{theme}'? (y/n): ").strip().lower()
-                if confirm in ['y', 'yes', '是']:
-                    if remove_theme(themes_file, theme):
-                        print(f"✅ 已删除theme: {theme}")
-        elif choice == "3":
-            list_themes(themes_file)
-        else:
-            print("⚠️  无效选择,请重试")
-
-
-if __name__ == "__main__":
-    import argparse
-    
-    parser = argparse.ArgumentParser(description="主题管理工具")
-    parser.add_argument("--add", type=str, help="添加theme")
-    parser.add_argument("--remove", type=str, help="删除theme")
-    parser.add_argument("--list", action="store_true", help="列出所有themes")
-    parser.add_argument("--interactive", action="store_true", help="交互式管理")
-    parser.add_argument("--base-dir", type=str, help="基础目录路径(默认为脚本所在目录)")
-    
-    args = parser.parse_args()
-    
-    base_dir = Path(args.base_dir) if args.base_dir else Path(__file__).parent
-    themes_file = base_dir / "themes.yaml"
-    
-    if args.list:
-        list_themes(themes_file)
-    elif args.add:
-        add_theme(themes_file, args.add)
-    elif args.remove:
-        remove_theme(themes_file, args.remove)
-    elif args.interactive:
-        interactive_theme_management(base_dir)
-    else:
-        # 默认交互式
-        interactive_theme_management(base_dir)
-
+"""
+主题管理工具 - 管理themes.yaml文件
+"""
+
+import sys
+import yaml
+from pathlib import Path
+from typing import List
+
+# 设置控制台编码为UTF-8(Windows)
+# 注意:只在作为主脚本运行时重定向,避免在被导入时冲突
+if sys.platform == 'win32' and __name__ == "__main__":
+    import io
+    if not isinstance(sys.stdout, io.TextIOWrapper):
+        sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+    if not isinstance(sys.stderr, io.TextIOWrapper):
+        sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
+
+
+def load_themes(themes_file: Path) -> List[str]:
+    """从themes.yaml加载themes"""
+    if not themes_file.exists():
+        return []
+    
+    try:
+        with open(themes_file, 'r', encoding='utf-8') as f:
+            data = yaml.safe_load(f)
+            if data is None:
+                return []
+            return data.get('themes', [])
+    except Exception as e:
+        print(f"⚠️  读取themes.yaml失败: {e}")
+        return []
+
+
+def save_themes(themes_file: Path, themes: List[str]):
+    """保存themes到themes.yaml"""
+    themes_file.parent.mkdir(parents=True, exist_ok=True)
+    
+    data = {'themes': themes}
+    
+    try:
+        with open(themes_file, 'w', encoding='utf-8') as f:
+            yaml.dump(data, f, allow_unicode=True, default_flow_style=False, sort_keys=False)
+        print(f"✅ themes已保存到: {themes_file}")
+        return True
+    except Exception as e:
+        print(f"❌ 保存失败: {e}")
+        return False
+
+
+def add_theme(themes_file: Path, theme: str) -> bool:
+    """添加theme"""
+    themes = load_themes(themes_file)
+    
+    if theme in themes:
+        print(f"⚠️  theme '{theme}' 已存在")
+        return False
+    
+    themes.append(theme)
+    return save_themes(themes_file, themes)
+
+
+def remove_theme(themes_file: Path, theme: str) -> bool:
+    """删除theme"""
+    themes = load_themes(themes_file)
+    
+    if theme not in themes:
+        print(f"⚠️  theme '{theme}' 不存在")
+        return False
+    
+    themes.remove(theme)
+    return save_themes(themes_file, themes)
+
+
+def list_themes(themes_file: Path):
+    """列出所有themes"""
+    themes = load_themes(themes_file)
+    
+    if not themes:
+        print("📋 当前没有themes")
+        return
+    
+    print(f"📋 当前themes ({len(themes)}个):")
+    print("-" * 70)
+    for i, theme in enumerate(themes, 1):
+        print(f"  {i}. {theme}")
+    print("-" * 70)
+
+
+def interactive_theme_management(base_dir: Path):
+    """交互式主题管理"""
+    themes_file = base_dir / "themes.yaml"
+    
+    while True:
+        print("\n" + "=" * 70)
+        print("主题管理")
+        print("=" * 70)
+        list_themes(themes_file)
+        
+        print("\n请选择操作:")
+        print("  1. 添加theme")
+        print("  2. 删除theme")
+        print("  3. 查看themes")
+        print("  0. 退出")
+        
+        choice = input("\n请选择 (0-3): ").strip()
+        
+        if choice == "0":
+            break
+        elif choice == "1":
+            theme = input("请输入要添加的theme: ").strip()
+            if theme:
+                if add_theme(themes_file, theme):
+                    print(f"✅ 已添加theme: {theme}")
+        elif choice == "2":
+            theme = input("请输入要删除的theme: ").strip()
+            if theme:
+                confirm = input(f"确认删除 '{theme}'? (y/n): ").strip().lower()
+                if confirm in ['y', 'yes', '是']:
+                    if remove_theme(themes_file, theme):
+                        print(f"✅ 已删除theme: {theme}")
+        elif choice == "3":
+            list_themes(themes_file)
+        else:
+            print("⚠️  无效选择,请重试")
+
+
+if __name__ == "__main__":
+    import argparse
+    
+    parser = argparse.ArgumentParser(description="主题管理工具")
+    parser.add_argument("--add", type=str, help="添加theme")
+    parser.add_argument("--remove", type=str, help="删除theme")
+    parser.add_argument("--list", action="store_true", help="列出所有themes")
+    parser.add_argument("--interactive", action="store_true", help="交互式管理")
+    parser.add_argument("--base-dir", type=str, help="基础目录路径(默认为脚本所在目录)")
+    
+    args = parser.parse_args()
+    
+    base_dir = Path(args.base_dir) if args.base_dir else Path(__file__).parent
+    themes_file = base_dir / "themes.yaml"
+    
+    if args.list:
+        list_themes(themes_file)
+    elif args.add:
+        add_theme(themes_file, args.add)
+    elif args.remove:
+        remove_theme(themes_file, args.remove)
+    elif args.interactive:
+        interactive_theme_management(base_dir)
+    else:
+        # 默认交互式
+        interactive_theme_management(base_dir)
+

+ 0 - 0
Co-creation-projects/Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/person.png → Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/person.png


+ 20 - 20
Co-creation-projects/Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/requirements.txt → Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/requirements.txt

@@ -1,20 +1,20 @@
-# Personal Information Signaling System - 个人信息信号系统
-# 依赖包列表
-
-# YAML文件处理
-pyyaml>=6.0.0
-
-# 图片处理(桌面提醒功能)
-Pillow>=10.0.0
-
-# 环境变量管理
-python-dotenv>=1.0.0
-
-# HTTP客户端(YouTube API调用)
-httpx>=0.28.0
-
-# LLM调用(可选,用于维度提取和研究报告生成)
-# 注意:hello_agents 需要单独安装,请参考:
-# https://github.com/datawhalechina/hello-agents
-# 或者使用 pip install hello-agents(如果已发布到PyPI)
-
+# Personal Information Signaling System - 个人信息信号系统
+# 依赖包列表
+
+# YAML文件处理
+pyyaml>=6.0.0
+
+# 图片处理(桌面提醒功能)
+Pillow>=10.0.0
+
+# 环境变量管理
+python-dotenv>=1.0.0
+
+# HTTP客户端(YouTube API调用)
+httpx>=0.28.0
+
+# LLM调用(可选,用于维度提取和研究报告生成)
+# 注意:hello_agents 需要单独安装,请参考:
+# https://github.com/datawhalechina/hello-agents
+# 或者使用 pip install hello-agents(如果已发布到PyPI)
+

+ 18 - 18
Co-creation-projects/Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/requirements_simplified.txt → Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/requirements_simplified.txt

@@ -1,18 +1,18 @@
-# 维度分析系统依赖(简化版)
-# 只包含实际使用的库
-
-# YAML文件处理
-pyyaml==6.0.2
-
-# 图片处理(桌面提醒功能)
-Pillow>=10.0.0
-
-# 环境变量管理
-python-dotenv==1.1.1
-
-# HTTP客户端(YouTube搜索脚本需要)
-httpx==0.28.1
-
-# LLM调用(如果使用hello_agents)
-# 注意:hello_agents可能需要额外安装,请根据实际情况调整
-
+# 维度分析系统依赖(简化版)
+# 只包含实际使用的库
+
+# YAML文件处理
+pyyaml==6.0.2
+
+# 图片处理(桌面提醒功能)
+Pillow>=10.0.0
+
+# 环境变量管理
+python-dotenv==1.1.1
+
+# HTTP客户端(YouTube搜索脚本需要)
+httpx==0.28.1
+
+# LLM调用(如果使用hello_agents)
+# 注意:hello_agents可能需要额外安装,请根据实际情况调整
+

+ 740 - 740
Co-creation-projects/Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/search_youtube_mcp_videos.py → Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/search_youtube_mcp_videos.py

@@ -1,740 +1,740 @@
-"""
-YouTube 视频搜索脚本 - 按主题搜索、评分、生成日报
-从 themes.yaml 读取主题列表,对每个主题分别搜索 YouTube
-合并结果、评分、排序后生成日报报告
-"""
-
-import sys
-import os
-import json
-import argparse
-import re
-from pathlib import Path
-from datetime import datetime, timedelta, timezone
-
-# 设置控制台编码为UTF-8(Windows)
-if sys.platform == 'win32':
-    import io
-    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
-    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
-
-try:
-    import httpx
-except ImportError:
-    print("❌ 错误: 需要安装 httpx 库")
-    print("💡 运行: pip install httpx")
-    sys.exit(1)
-
-try:
-    import yaml
-except ImportError:
-    print("❌ 错误: 需要安装 PyYAML 库")
-    print("💡 运行: pip install pyyaml")
-    sys.exit(1)
-
-# 加载 .env 文件(如果存在)
-try:
-    from dotenv import load_dotenv
-    load_dotenv()
-except ImportError:
-    pass  # dotenv 可选,如果未安装则跳过
-
-# 可选:导入 LLM 相关模块(仅用于 research 模式)
-try:
-    from hello_agents.core.llm import HelloAgentsLLM
-    LLM_AVAILABLE = True
-except ImportError:
-    LLM_AVAILABLE = False
-
-# 配置常量
-DAYS_WINDOW = int(os.getenv("DAYS_WINDOW", "14"))  # 时间窗口:默认14天
-
-
-def load_youtube_api_key():
-    """从环境变量或配置文件中加载 YouTube API Key"""
-    # 首先尝试环境变量
-    api_key = os.getenv("YOUTUBE_API_KEY")
-    
-    if api_key:
-        return api_key
-    
-    # 尝试从配置文件中读取
-    config_file = Path(__file__).parent / "config"
-    if config_file.exists():
-        try:
-            with open(config_file, 'r', encoding='utf-8') as f:
-                for line in f:
-                    line = line.strip()
-                    if line.startswith("YOUTUBE_API_KEY=") and not line.startswith("#"):
-                        api_key = line.split("=", 1)[1].strip()
-                        if api_key:
-                            return api_key
-        except Exception as e:
-            print(f"⚠️  读取配置文件失败: {e}")
-    
-    return None
-
-
-def load_themes():
-    """从 themes.yaml 读取主题列表"""
-    themes_file = Path(__file__).parent / "themes.yaml"
-    if not themes_file.exists():
-        print(f"❌ 错误: 找不到 themes.yaml 文件: {themes_file}")
-        return []
-    
-    try:
-        with open(themes_file, 'r', encoding='utf-8') as f:
-            data = yaml.safe_load(f)
-            if data is None:
-                print(f"❌ 错误: themes.yaml 文件为空或格式错误")
-                return []
-            themes = data.get('themes', [])
-            if not themes:
-                print(f"⚠️  警告: themes.yaml 中未找到主题列表")
-                return []
-            print(f"✅ 加载了 {len(themes)} 个主题: {', '.join(themes)}")
-            return themes
-    except Exception as e:
-        print(f"❌ 读取 themes.yaml 失败: {e}")
-        import traceback
-        traceback.print_exc()
-        return []
-
-
-def load_whitelist_channels():
-    """从 channels.yaml 读取白名单频道"""
-    channels_file = Path(__file__).parent / "channels.yaml"
-    if not channels_file.exists():
-        print(f"⚠️  警告: 找不到 channels.yaml 文件: {channels_file}")
-        return []
-    
-    try:
-        with open(channels_file, 'r', encoding='utf-8') as f:
-            data = yaml.safe_load(f)
-            if data is None:
-                print(f"⚠️  警告: channels.yaml 文件为空或格式错误")
-                return []
-            channels = data.get('whitelist_channels', [])
-            print(f"✅ 加载了 {len(channels)} 个白名单频道")
-            return channels
-    except Exception as e:
-        print(f"⚠️  读取 channels.yaml 失败: {e}")
-        return []
-
-
-def search_youtube_videos(query: str, max_results: int = 10, api_key: str = None):
-    """搜索 YouTube 视频"""
-    if not api_key:
-        api_key = load_youtube_api_key()
-    
-    if not api_key:
-        print("❌ 错误: 未找到 YouTube API Key")
-        print("💡 请设置环境变量 YOUTUBE_API_KEY 或在 config 文件中配置")
-        return None
-    
-    try:
-        url = "https://www.googleapis.com/youtube/v3/search"
-        params = {
-            "key": api_key,
-            "q": query,
-            "part": "snippet",
-            "type": "video",
-            "maxResults": min(max_results, 50),  # API limit
-            "order": "relevance"
-        }
-        
-        response = httpx.get(url, params=params, timeout=10.0)
-        response.raise_for_status()
-        
-        data = response.json()
-        
-        if "items" not in data or not data["items"]:
-            return []
-        
-        videos = []
-        for item in data["items"]:
-            video_info = {
-                "video_id": item["id"]["videoId"],
-                "title": item["snippet"]["title"],
-                "description": item["snippet"]["description"],
-                "channel_title": item["snippet"]["channelTitle"],
-                "channel_id": item["snippet"]["channelId"],
-                "published_at": item["snippet"]["publishedAt"],
-                "thumbnail": item["snippet"]["thumbnails"].get("medium", {}).get("url", ""),
-                "url": f"https://www.youtube.com/watch?v={item['id']['videoId']}",
-                "query": query  # 记录搜索关键词
-            }
-            videos.append(video_info)
-        
-        return videos
-    
-    except httpx.HTTPStatusError as e:
-        if e.response.status_code == 403:
-            print(f"❌ 错误: API 密钥无效或配额已用完 (查询: {query})")
-        else:
-            print(f"❌ HTTP 错误: {e.response.status_code} (查询: {query})")
-        return None
-    except Exception as e:
-        print(f"❌ 搜索失败 (查询: {query}): {str(e)}")
-        return None
-
-
-def parse_published_time(published_at_str: str):
-    """解析发布时间字符串为 datetime 对象"""
-    try:
-        # YouTube API 返回 ISO 8601 格式: 2024-01-01T12:00:00Z
-        dt = datetime.fromisoformat(published_at_str.replace('Z', '+00:00'))
-        return dt
-    except Exception as e:
-        print(f"⚠️  解析发布时间失败: {published_at_str}, 错误: {e}")
-        return None
-
-
-def is_within_time_window(published_at_str: str, days_window: int = DAYS_WINDOW):
-    """检查视频是否在时间窗口内(默认14天)"""
-    published_time = parse_published_time(published_at_str)
-    if not published_time:
-        return False
-    
-    now = datetime.now(timezone.utc)
-    time_diff = now - published_time
-    
-    return time_diff <= timedelta(days=days_window)
-
-
-def calculate_time_score(published_at_str: str):
-    """计算时间评分:24小时内 +3,48小时内 +2"""
-    published_time = parse_published_time(published_at_str)
-    if not published_time:
-        return 0
-    
-    now = datetime.now(timezone.utc)
-    time_diff = now - published_time
-    
-    if time_diff <= timedelta(hours=24):
-        return 3
-    elif time_diff <= timedelta(hours=48):
-        return 2
-    else:
-        return 0
-
-
-def count_theme_keywords(text: str, themes: list):
-    """计算文本中命中的主题关键词数量(不区分大小写)"""
-    if not text:
-        return 0
-    
-    text_lower = text.lower()
-    count = 0
-    for theme in themes:
-        if theme.lower() in text_lower:
-            count += 1
-    return count
-
-
-def score_video(video: dict, themes: list, whitelist_channels: list):
-    """为视频计算评分"""
-    score = 0
-    
-    # 1. 白名单频道评分 +10
-    if video['channel_title'] in whitelist_channels:
-        score += 10
-    
-    # 2. 标题或描述中每命中1个主题关键词 +5
-    title_matches = count_theme_keywords(video['title'], themes)
-    desc_matches = count_theme_keywords(video['description'], themes)
-    keyword_score = (title_matches + desc_matches) * 5
-    score += keyword_score
-    
-    # 3. 发布时间评分
-    time_score = calculate_time_score(video['published_at'])
-    score += time_score
-    
-    return score
-
-
-def merge_and_deduplicate_videos(all_videos: list):
-    """合并视频列表并按 videoId 去重"""
-    video_dict = {}
-    
-    for video in all_videos:
-        video_id = video['video_id']
-        if video_id not in video_dict:
-            video_dict[video_id] = video
-        else:
-            # 如果已存在,合并查询关键词
-            existing_queries = video_dict[video_id].get('queries', [])
-            if isinstance(existing_queries, str):
-                existing_queries = [existing_queries]
-            if video['query'] not in existing_queries:
-                existing_queries.append(video['query'])
-            video_dict[video_id]['queries'] = existing_queries
-    
-    return list(video_dict.values())
-
-
-def generate_action(videos: list):
-    """生成 action 字段:从 Top1 生成1条可执行动作(≤15min)"""
-    if not videos:
-        return "暂无推荐视频"
-    
-    # 只使用 Top1
-    top1 = videos[0]
-    action = f"观看《{top1['title']}》({top1['channel_title']}),预计≤15分钟"
-    
-    return action
-
-
-def has_clickbait_words(title: str):
-    """检查标题中是否包含标题党词汇"""
-    clickbait_words = ['INSANE', 'HYPE', 'SHOCKING', 'UNBELIEVABLE', 'MIND-BLOWING', 
-                       'AMAZING', 'INCREDIBLE', 'YOU WON\'T BELIEVE', 'THIS WILL BLOW YOUR MIND']
-    title_upper = title.upper()
-    for word in clickbait_words:
-        if word in title_upper:
-            return True
-    return False
-
-
-def is_older_than_days(published_at_str: str, days: int = 30):
-    """检查视频是否超过指定天数"""
-    published_time = parse_published_time(published_at_str)
-    if not published_time:
-        return False
-    
-    now = datetime.now(timezone.utc)
-    time_diff = now - published_time
-    
-    return time_diff > timedelta(days=days)
-
-
-def generate_risk(videos: list, themes: list):
-    """生成 risk 字段:偏差检测"""
-    if not videos:
-        return "无风险"
-    
-    # 只检查 Top3
-    top3 = videos[:3]
-    warnings = []
-    
-    # 检查是否有超过30天的视频
-    old_videos = []
-    for video in top3:
-        if is_older_than_days(video['published_at'], days=30):
-            old_videos.append(video['title'])
-    
-    if old_videos:
-        warnings.append(f"Top3中存在超过30天的视频: {', '.join(old_videos[:2])}")
-    
-    # 检查是否有标题党词汇
-    clickbait_videos = []
-    for video in top3:
-        if has_clickbait_words(video['title']):
-            clickbait_videos.append(video['title'])
-    
-    if clickbait_videos:
-        warnings.append(f"检测到标题党词汇: {', '.join(clickbait_videos[:2])}")
-    
-    # 如果有警告,返回警告;否则返回正面评价
-    if warnings:
-        return "; ".join(warnings)
-    else:
-        return "今日信号较新且较可信"
-
-
-def init_research_llm():
-    """初始化用于研究模式的 LLM(使用通义千问/ModelScope配置)"""
-    if not LLM_AVAILABLE:
-        print("⚠️  警告: hello_agents 模块未安装,无法使用研究模式")
-        return None
-    
-    # 从环境变量读取 LLM 配置(优先级顺序,与 chapter9 保持一致)
-    # 优先使用 ModelScope 配置(通义千问)
-    llm_model = (
-        os.getenv("LLM_MODEL") or 
-        os.getenv("LLM_MODEL_ID") or
-        "Qwen/Qwen2.5-7B-Instruct"  # 默认通义千问模型
-    )
-    llm_api_key = (
-        os.getenv("LLM_API_KEY") or  # 优先使用 LLM_API_KEY(阿里云通义千问)
-        os.getenv("MODELSCOPE_API_KEY") or 
-        os.getenv("MODELSCOPE_API_TOKEN")
-    )
-    llm_base_url = (
-        os.getenv("LLM_BASE_URL") or 
-        "https://api-inference.modelscope.cn/v1/"  # ModelScope 默认地址
-    )
-    llm_provider = os.getenv("LLM_PROVIDER", "modelscope")
-    
-    if not llm_api_key:
-        print("⚠️  警告: 未找到 LLM API Key,研究模式需要配置 LLM")
-        print("💡 请设置环境变量(推荐在 .env 文件中配置):")
-        print("   MODELSCOPE_API_KEY=your-modelscope-token-here")
-        print("   LLM_MODEL=Qwen/Qwen2.5-7B-Instruct")
-        print("   LLM_BASE_URL=https://api-inference.modelscope.cn/v1/")
-        print("   LLM_PROVIDER=modelscope")
-        return None
-    
-    try:
-        llm = HelloAgentsLLM(
-            model=llm_model,
-            api_key=llm_api_key,
-            base_url=llm_base_url,
-            provider=llm_provider
-        )
-        print(f"✅ LLM 初始化成功: {llm_model} ({llm_provider})")
-        return llm
-    except Exception as e:
-        print(f"⚠️  初始化 LLM 失败: {e}")
-        return None
-
-
-def prepare_sources_data(top3_videos: list):
-    """从 Top3 视频中提取 sources 数据"""
-    sources = []
-    for video in top3_videos:
-        sources.append({
-            "title": video['title'],
-            "channel": video['channel_title'],
-            "url": video['url'],
-            "published_at": video['published_at'],
-            "score": video['score']
-        })
-    return sources
-
-
-def extract_json_from_text(text: str):
-    """从文本中提取 JSON 内容(处理 LLM 可能返回的格式化文本)"""
-    # 尝试直接解析
-    try:
-        return json.loads(text.strip())
-    except json.JSONDecodeError:
-        pass
-    
-    # 尝试提取 JSON 代码块
-    json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
-    if json_match:
-        try:
-            return json.loads(json_match.group(1))
-        except json.JSONDecodeError:
-            pass
-    
-    # 尝试提取第一个完整的 JSON 对象
-    json_match = re.search(r'\{.*\}', text, re.DOTALL)
-    if json_match:
-        try:
-            return json.loads(json_match.group(0))
-        except json.JSONDecodeError:
-            pass
-    
-    return None
-
-
-def generate_research_report(top3_videos: list, themes: list, llm):
-    """使用 LLM 生成研究报告"""
-    if not top3_videos:
-        return None
-    
-    # 构建视频信息文本
-    videos_info = []
-    for i, video in enumerate(top3_videos, 1):
-        videos_info.append(
-            f"{i}. 标题: {video['title']}\n"
-            f"   频道: {video['channel_title']}\n"
-            f"   发布时间: {video['published_at']}\n"
-            f"   评分: {video['score']}分\n"
-            f"   链接: {video['url']}"
-        )
-    
-    videos_text = "\n\n".join(videos_info)
-    themes_text = ", ".join(themes)
-    
-    # 构建 prompt
-    prompt = f"""基于以下 Top3 YouTube 视频信息,生成一份结构化研究报告。
-
-视频信息:
-{videos_text}
-
-搜索主题:{themes_text}
-
-请以 JSON 格式返回以下内容:
-1. question: 一个核心问题,概括这些视频的共同关注点
-2. key_findings: 3条发现,每条1句话,基于标题/频道/发布时间推断,使用"可能/倾向"等措辞
-3. why_it_matters_to_me: 为什么这些信息对我重要(个性化解释)
-4. next_steps: 1-3条行动建议,每条≤15分钟
-
-请严格按照以下 JSON 格式返回(不要包含其他文字):
-{{
-  "question": "核心问题",
-  "key_findings": [
-    "发现1(使用可能/倾向等措辞)",
-    "发现2(使用可能/倾向等措辞)",
-    "发现3(使用可能/倾向等措辞)"
-  ],
-  "why_it_matters_to_me": "个性化解释",
-  "next_steps": [
-    "行动建议1(≤15分钟)",
-    "行动建议2(≤15分钟)",
-    "行动建议3(≤15分钟)"
-  ]
-}}"""
-
-    messages = [
-        {"role": "system", "content": "你是一位专业的研究分析师,擅长从视频信息中提取关键洞察并给出可执行的行动建议。请始终以 JSON 格式返回结果。"},
-        {"role": "user", "content": prompt}
-    ]
-    
-    try:
-        print("\n🔬 正在使用 LLM 生成研究报告...")
-        response = llm.invoke(messages)
-        
-        if not response:
-            print("⚠️  LLM 返回空响应")
-            return None
-        
-        # 提取 JSON
-        research_data = extract_json_from_text(response)
-        
-        if not research_data:
-            print(f"⚠️  无法解析 LLM 响应为 JSON,原始响应: {response[:200]}...")
-            return None
-        
-        # 验证必需字段
-        required_fields = ["question", "key_findings", "why_it_matters_to_me", "next_steps"]
-        missing_fields = [field for field in required_fields if field not in research_data]
-        if missing_fields:
-            print(f"⚠️  LLM 响应缺少必需字段: {', '.join(missing_fields)}")
-            return None
-        
-        # 确保 key_findings 是列表且有3条
-        if not isinstance(research_data.get("key_findings"), list):
-            research_data["key_findings"] = []
-        if len(research_data["key_findings"]) != 3:
-            # 如果不足3条,填充或截断
-            while len(research_data["key_findings"]) < 3:
-                research_data["key_findings"].append("暂无发现")
-            research_data["key_findings"] = research_data["key_findings"][:3]
-        
-        # 确保 next_steps 是列表,最多3条
-        if not isinstance(research_data.get("next_steps"), list):
-            research_data["next_steps"] = []
-        research_data["next_steps"] = research_data["next_steps"][:3]
-        
-        print("✅ 研究报告生成成功")
-        return research_data
-        
-    except Exception as e:
-        print(f"⚠️  生成研究报告时出错: {e}")
-        import traceback
-        traceback.print_exc()
-        return None
-
-
-def main():
-    """主函数"""
-    # 解析命令行参数
-    parser = argparse.ArgumentParser(description="YouTube 视频搜索 - 多主题智能搜索与日报生成")
-    parser.add_argument(
-        "--mode",
-        type=str,
-        choices=["daily_signal", "research"],
-        default="research",
-        help="运行模式: research (默认,生成日报+研究报告) 或 daily_signal (仅生成日报)"
-    )
-    args = parser.parse_args()
-    mode = args.mode
-    
-    print("=" * 70)
-    print("YouTube 视频搜索 - 多主题智能搜索与日报生成")
-    if mode == "research":
-        print("运行模式: 研究模式 (将生成日报 + 研究报告)")
-    else:
-        print("运行模式: 日报模式 (仅生成日报)")
-    print("=" * 70)
-    
-    # 1. 加载配置
-    themes = load_themes()
-    if not themes:
-        print("❌ 无法加载主题列表,退出")
-        return
-    
-    whitelist_channels = load_whitelist_channels()
-    api_key = load_youtube_api_key()
-    if not api_key:
-        print("❌ 无法加载 API Key,退出")
-        return
-    
-    # 2. 对每个主题搜索
-    print(f"\n🔍 开始搜索 {len(themes)} 个主题...")
-    all_videos = []
-    
-    for theme in themes:
-        print(f"  搜索主题: {theme}")
-        videos = search_youtube_videos(theme, max_results=10, api_key=api_key)
-        if videos:
-            all_videos.extend(videos)
-            print(f"    ✅ 找到 {len(videos)} 个视频")
-        else:
-            print(f"    ⚠️  未找到视频或搜索失败")
-    
-    if not all_videos:
-        print("❌ 未找到任何视频,退出")
-        return
-    
-    print(f"\n📊 合并前共找到 {len(all_videos)} 个视频")
-    
-    # 3. 合并去重
-    unique_videos = merge_and_deduplicate_videos(all_videos)
-    print(f"📊 去重后剩余 {len(unique_videos)} 个唯一视频")
-    
-    # 4. 时间窗口过滤:只考虑最近 DAYS_WINDOW 天的视频
-    print(f"\n⏰ 应用时间窗口过滤({DAYS_WINDOW}天)...")
-    filtered_videos = [v for v in unique_videos if is_within_time_window(v['published_at'], DAYS_WINDOW)]
-    excluded_count = len(unique_videos) - len(filtered_videos)
-    if excluded_count > 0:
-        print(f"   ⚠️  过滤掉 {excluded_count} 个超过 {DAYS_WINDOW} 天的视频")
-    print(f"   ✅ 剩余 {len(filtered_videos)} 个视频参与排序")
-    
-    if not filtered_videos:
-        print(f"❌ 时间窗口内({DAYS_WINDOW}天)未找到任何视频,退出")
-        return
-    
-    # 5. 评分
-    print(f"\n⭐ 开始评分...")
-    for video in filtered_videos:
-        score = score_video(video, themes, whitelist_channels)
-        video['score'] = score
-        video['scoring_details'] = {
-            'whitelist_bonus': 10 if video['channel_title'] in whitelist_channels else 0,
-            'keyword_matches': count_theme_keywords(video['title'], themes) + count_theme_keywords(video['description'], themes),
-            'time_bonus': calculate_time_score(video['published_at'])
-        }
-    
-    # 6. 排序并取 Top 3
-    sorted_videos = sorted(filtered_videos, key=lambda x: x['score'], reverse=True)
-    top3_videos = sorted_videos[:3]
-    
-    print(f"\n🏆 Top 3 视频:")
-    for i, video in enumerate(top3_videos, 1):
-        print(f"  {i}. [{video['score']}分] {video['title']}")
-        print(f"     频道: {video['channel_title']}")
-        print(f"     链接: {video['url']}")
-    
-    # 7. 生成日期字符串
-    today = datetime.now().strftime("%Y-%m-%d")
-    
-    # 8. 创建输出目录
-    base_dir = Path(__file__).parent
-    raw_dir = base_dir / "raw" / "youtube"
-    archive_dir = base_dir / "archive" / "youtube"
-    raw_dir.mkdir(parents=True, exist_ok=True)
-    archive_dir.mkdir(parents=True, exist_ok=True)
-    
-    # 9. 保存原始数据
-    raw_file = raw_dir / f"{today}_raw.json"
-    raw_data = {
-        "date": today,
-        "themes_used": themes,
-        "whitelist_channels": whitelist_channels,
-        "days_window": DAYS_WINDOW,
-        "total_videos_found": len(all_videos),
-        "unique_videos": len(unique_videos),
-        "filtered_videos_count": len(filtered_videos),
-        "all_videos": sorted_videos  # 保存过滤后的视频,按评分排序
-    }
-    
-    try:
-        with open(raw_file, 'w', encoding='utf-8') as f:
-            json.dump(raw_data, f, indent=2, ensure_ascii=False)
-        print(f"\n💾 原始数据已保存到: {raw_file}")
-    except Exception as e:
-        print(f"❌ 保存原始数据失败: {e}")
-        return
-    
-    # 10. 生成并保存日报
-    action = generate_action(top3_videos)
-    risk = generate_risk(sorted_videos, themes)
-    
-    daily_report = {
-        "date": today,
-        "themes_used": themes,
-        "dimensions": [],  # 新增:用户可选的维度标签(如:["健康", "情绪", "工作"]),向后兼容
-        "top3": [
-            {
-                "title": video['title'],
-                "channel": video['channel_title'],
-                "url": video['url'],
-                "score": video['score'],
-                "published_at": video['published_at'],
-                "scoring_details": video['scoring_details']
-            }
-            for video in top3_videos
-        ],
-        "action": action,
-        "risk": risk
-    }
-    
-    archive_file = archive_dir / f"{today}.json"
-    try:
-        with open(archive_file, 'w', encoding='utf-8') as f:
-            json.dump(daily_report, f, indent=2, ensure_ascii=False)
-        print(f"💾 日报信号已保存到: {archive_file}")
-    except Exception as e:
-        print(f"❌ 保存日报信号失败: {e}")
-        return
-    
-    # 11. 如果模式是 research,生成研究报告
-    if mode == "research":
-        llm = init_research_llm()
-        if llm:
-            try:
-                research_report = generate_research_report(top3_videos, themes, llm)
-                if research_report:
-                    # 添加 sources 字段
-                    research_report["sources"] = prepare_sources_data(top3_videos)
-                    research_report["date"] = today
-                    research_report["themes_used"] = themes
-                    
-                    # 保存研究报告
-                    research_file = archive_dir / f"{today}_research.json"
-                    with open(research_file, 'w', encoding='utf-8') as f:
-                        json.dump(research_report, f, indent=2, ensure_ascii=False)
-                    print(f"\n💾 研究报告已保存到: {research_file}")
-                    
-                    # 显示研究报告摘要
-                    print("\n" + "=" * 70)
-                    print("🔬 研究报告摘要")
-                    print("=" * 70)
-                    print(f"核心问题: {research_report.get('question', 'N/A')}")
-                    print(f"\n关键发现:")
-                    for i, finding in enumerate(research_report.get('key_findings', []), 1):
-                        print(f"  {i}. {finding}")
-                    print(f"\n为什么重要: {research_report.get('why_it_matters_to_me', 'N/A')}")
-                    print(f"\n下一步行动:")
-                    for i, step in enumerate(research_report.get('next_steps', []), 1):
-                        print(f"  {i}. {step}")
-                    print("=" * 70)
-                else:
-                    print("⚠️  研究报告生成失败,已跳过")
-            except Exception as e:
-                print(f"⚠️  生成研究报告时出错: {e}")
-                import traceback
-                traceback.print_exc()
-        else:
-            print("⚠️  未配置 LLM,跳过研究模式")
-    
-    # 12. 显示日报摘要
-    print("\n" + "=" * 70)
-    print("📄 日报摘要")
-    print("=" * 70)
-    print(f"日期: {daily_report['date']}")
-    print(f"主题: {', '.join(daily_report['themes_used'])}")
-    print(f"\n推荐行动 (Action):")
-    print(f"  {daily_report['action']}")
-    print(f"\n风险评估 (Risk):")
-    print(f"  {daily_report['risk']}")
-    print("=" * 70)
-
-
-if __name__ == "__main__":
-    main()
+"""
+YouTube 视频搜索脚本 - 按主题搜索、评分、生成日报
+从 themes.yaml 读取主题列表,对每个主题分别搜索 YouTube
+合并结果、评分、排序后生成日报报告
+"""
+
+import sys
+import os
+import json
+import argparse
+import re
+from pathlib import Path
+from datetime import datetime, timedelta, timezone
+
+# 设置控制台编码为UTF-8(Windows)
+if sys.platform == 'win32':
+    import io
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
+
+try:
+    import httpx
+except ImportError:
+    print("❌ 错误: 需要安装 httpx 库")
+    print("💡 运行: pip install httpx")
+    sys.exit(1)
+
+try:
+    import yaml
+except ImportError:
+    print("❌ 错误: 需要安装 PyYAML 库")
+    print("💡 运行: pip install pyyaml")
+    sys.exit(1)
+
+# 加载 .env 文件(如果存在)
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    pass  # dotenv 可选,如果未安装则跳过
+
+# 可选:导入 LLM 相关模块(仅用于 research 模式)
+try:
+    from hello_agents.core.llm import HelloAgentsLLM
+    LLM_AVAILABLE = True
+except ImportError:
+    LLM_AVAILABLE = False
+
+# 配置常量
+DAYS_WINDOW = int(os.getenv("DAYS_WINDOW", "14"))  # 时间窗口:默认14天
+
+
+def load_youtube_api_key():
+    """从环境变量或配置文件中加载 YouTube API Key"""
+    # 首先尝试环境变量
+    api_key = os.getenv("YOUTUBE_API_KEY")
+    
+    if api_key:
+        return api_key
+    
+    # 尝试从配置文件中读取
+    config_file = Path(__file__).parent / "config"
+    if config_file.exists():
+        try:
+            with open(config_file, 'r', encoding='utf-8') as f:
+                for line in f:
+                    line = line.strip()
+                    if line.startswith("YOUTUBE_API_KEY=") and not line.startswith("#"):
+                        api_key = line.split("=", 1)[1].strip()
+                        if api_key:
+                            return api_key
+        except Exception as e:
+            print(f"⚠️  读取配置文件失败: {e}")
+    
+    return None
+
+
+def load_themes():
+    """从 themes.yaml 读取主题列表"""
+    themes_file = Path(__file__).parent / "themes.yaml"
+    if not themes_file.exists():
+        print(f"❌ 错误: 找不到 themes.yaml 文件: {themes_file}")
+        return []
+    
+    try:
+        with open(themes_file, 'r', encoding='utf-8') as f:
+            data = yaml.safe_load(f)
+            if data is None:
+                print(f"❌ 错误: themes.yaml 文件为空或格式错误")
+                return []
+            themes = data.get('themes', [])
+            if not themes:
+                print(f"⚠️  警告: themes.yaml 中未找到主题列表")
+                return []
+            print(f"✅ 加载了 {len(themes)} 个主题: {', '.join(themes)}")
+            return themes
+    except Exception as e:
+        print(f"❌ 读取 themes.yaml 失败: {e}")
+        import traceback
+        traceback.print_exc()
+        return []
+
+
+def load_whitelist_channels():
+    """从 channels.yaml 读取白名单频道"""
+    channels_file = Path(__file__).parent / "channels.yaml"
+    if not channels_file.exists():
+        print(f"⚠️  警告: 找不到 channels.yaml 文件: {channels_file}")
+        return []
+    
+    try:
+        with open(channels_file, 'r', encoding='utf-8') as f:
+            data = yaml.safe_load(f)
+            if data is None:
+                print(f"⚠️  警告: channels.yaml 文件为空或格式错误")
+                return []
+            channels = data.get('whitelist_channels', [])
+            print(f"✅ 加载了 {len(channels)} 个白名单频道")
+            return channels
+    except Exception as e:
+        print(f"⚠️  读取 channels.yaml 失败: {e}")
+        return []
+
+
+def search_youtube_videos(query: str, max_results: int = 10, api_key: str = None):
+    """搜索 YouTube 视频"""
+    if not api_key:
+        api_key = load_youtube_api_key()
+    
+    if not api_key:
+        print("❌ 错误: 未找到 YouTube API Key")
+        print("💡 请设置环境变量 YOUTUBE_API_KEY 或在 config 文件中配置")
+        return None
+    
+    try:
+        url = "https://www.googleapis.com/youtube/v3/search"
+        params = {
+            "key": api_key,
+            "q": query,
+            "part": "snippet",
+            "type": "video",
+            "maxResults": min(max_results, 50),  # API limit
+            "order": "relevance"
+        }
+        
+        response = httpx.get(url, params=params, timeout=10.0)
+        response.raise_for_status()
+        
+        data = response.json()
+        
+        if "items" not in data or not data["items"]:
+            return []
+        
+        videos = []
+        for item in data["items"]:
+            video_info = {
+                "video_id": item["id"]["videoId"],
+                "title": item["snippet"]["title"],
+                "description": item["snippet"]["description"],
+                "channel_title": item["snippet"]["channelTitle"],
+                "channel_id": item["snippet"]["channelId"],
+                "published_at": item["snippet"]["publishedAt"],
+                "thumbnail": item["snippet"]["thumbnails"].get("medium", {}).get("url", ""),
+                "url": f"https://www.youtube.com/watch?v={item['id']['videoId']}",
+                "query": query  # 记录搜索关键词
+            }
+            videos.append(video_info)
+        
+        return videos
+    
+    except httpx.HTTPStatusError as e:
+        if e.response.status_code == 403:
+            print(f"❌ 错误: API 密钥无效或配额已用完 (查询: {query})")
+        else:
+            print(f"❌ HTTP 错误: {e.response.status_code} (查询: {query})")
+        return None
+    except Exception as e:
+        print(f"❌ 搜索失败 (查询: {query}): {str(e)}")
+        return None
+
+
+def parse_published_time(published_at_str: str):
+    """解析发布时间字符串为 datetime 对象"""
+    try:
+        # YouTube API 返回 ISO 8601 格式: 2024-01-01T12:00:00Z
+        dt = datetime.fromisoformat(published_at_str.replace('Z', '+00:00'))
+        return dt
+    except Exception as e:
+        print(f"⚠️  解析发布时间失败: {published_at_str}, 错误: {e}")
+        return None
+
+
+def is_within_time_window(published_at_str: str, days_window: int = DAYS_WINDOW):
+    """检查视频是否在时间窗口内(默认14天)"""
+    published_time = parse_published_time(published_at_str)
+    if not published_time:
+        return False
+    
+    now = datetime.now(timezone.utc)
+    time_diff = now - published_time
+    
+    return time_diff <= timedelta(days=days_window)
+
+
+def calculate_time_score(published_at_str: str):
+    """计算时间评分:24小时内 +3,48小时内 +2"""
+    published_time = parse_published_time(published_at_str)
+    if not published_time:
+        return 0
+    
+    now = datetime.now(timezone.utc)
+    time_diff = now - published_time
+    
+    if time_diff <= timedelta(hours=24):
+        return 3
+    elif time_diff <= timedelta(hours=48):
+        return 2
+    else:
+        return 0
+
+
+def count_theme_keywords(text: str, themes: list):
+    """计算文本中命中的主题关键词数量(不区分大小写)"""
+    if not text:
+        return 0
+    
+    text_lower = text.lower()
+    count = 0
+    for theme in themes:
+        if theme.lower() in text_lower:
+            count += 1
+    return count
+
+
+def score_video(video: dict, themes: list, whitelist_channels: list):
+    """为视频计算评分"""
+    score = 0
+    
+    # 1. 白名单频道评分 +10
+    if video['channel_title'] in whitelist_channels:
+        score += 10
+    
+    # 2. 标题或描述中每命中1个主题关键词 +5
+    title_matches = count_theme_keywords(video['title'], themes)
+    desc_matches = count_theme_keywords(video['description'], themes)
+    keyword_score = (title_matches + desc_matches) * 5
+    score += keyword_score
+    
+    # 3. 发布时间评分
+    time_score = calculate_time_score(video['published_at'])
+    score += time_score
+    
+    return score
+
+
+def merge_and_deduplicate_videos(all_videos: list):
+    """合并视频列表并按 videoId 去重"""
+    video_dict = {}
+    
+    for video in all_videos:
+        video_id = video['video_id']
+        if video_id not in video_dict:
+            video_dict[video_id] = video
+        else:
+            # 如果已存在,合并查询关键词
+            existing_queries = video_dict[video_id].get('queries', [])
+            if isinstance(existing_queries, str):
+                existing_queries = [existing_queries]
+            if video['query'] not in existing_queries:
+                existing_queries.append(video['query'])
+            video_dict[video_id]['queries'] = existing_queries
+    
+    return list(video_dict.values())
+
+
+def generate_action(videos: list):
+    """生成 action 字段:从 Top1 生成1条可执行动作(≤15min)"""
+    if not videos:
+        return "暂无推荐视频"
+    
+    # 只使用 Top1
+    top1 = videos[0]
+    action = f"观看《{top1['title']}》({top1['channel_title']}),预计≤15分钟"
+    
+    return action
+
+
+def has_clickbait_words(title: str):
+    """检查标题中是否包含标题党词汇"""
+    clickbait_words = ['INSANE', 'HYPE', 'SHOCKING', 'UNBELIEVABLE', 'MIND-BLOWING', 
+                       'AMAZING', 'INCREDIBLE', 'YOU WON\'T BELIEVE', 'THIS WILL BLOW YOUR MIND']
+    title_upper = title.upper()
+    for word in clickbait_words:
+        if word in title_upper:
+            return True
+    return False
+
+
+def is_older_than_days(published_at_str: str, days: int = 30):
+    """检查视频是否超过指定天数"""
+    published_time = parse_published_time(published_at_str)
+    if not published_time:
+        return False
+    
+    now = datetime.now(timezone.utc)
+    time_diff = now - published_time
+    
+    return time_diff > timedelta(days=days)
+
+
+def generate_risk(videos: list, themes: list):
+    """生成 risk 字段:偏差检测"""
+    if not videos:
+        return "无风险"
+    
+    # 只检查 Top3
+    top3 = videos[:3]
+    warnings = []
+    
+    # 检查是否有超过30天的视频
+    old_videos = []
+    for video in top3:
+        if is_older_than_days(video['published_at'], days=30):
+            old_videos.append(video['title'])
+    
+    if old_videos:
+        warnings.append(f"Top3中存在超过30天的视频: {', '.join(old_videos[:2])}")
+    
+    # 检查是否有标题党词汇
+    clickbait_videos = []
+    for video in top3:
+        if has_clickbait_words(video['title']):
+            clickbait_videos.append(video['title'])
+    
+    if clickbait_videos:
+        warnings.append(f"检测到标题党词汇: {', '.join(clickbait_videos[:2])}")
+    
+    # 如果有警告,返回警告;否则返回正面评价
+    if warnings:
+        return "; ".join(warnings)
+    else:
+        return "今日信号较新且较可信"
+
+
+def init_research_llm():
+    """初始化用于研究模式的 LLM(使用通义千问/ModelScope配置)"""
+    if not LLM_AVAILABLE:
+        print("⚠️  警告: hello_agents 模块未安装,无法使用研究模式")
+        return None
+    
+    # 从环境变量读取 LLM 配置(优先级顺序,与 chapter9 保持一致)
+    # 优先使用 ModelScope 配置(通义千问)
+    llm_model = (
+        os.getenv("LLM_MODEL") or 
+        os.getenv("LLM_MODEL_ID") or
+        "Qwen/Qwen2.5-7B-Instruct"  # 默认通义千问模型
+    )
+    llm_api_key = (
+        os.getenv("LLM_API_KEY") or  # 优先使用 LLM_API_KEY(阿里云通义千问)
+        os.getenv("MODELSCOPE_API_KEY") or 
+        os.getenv("MODELSCOPE_API_TOKEN")
+    )
+    llm_base_url = (
+        os.getenv("LLM_BASE_URL") or 
+        "https://api-inference.modelscope.cn/v1/"  # ModelScope 默认地址
+    )
+    llm_provider = os.getenv("LLM_PROVIDER", "modelscope")
+    
+    if not llm_api_key:
+        print("⚠️  警告: 未找到 LLM API Key,研究模式需要配置 LLM")
+        print("💡 请设置环境变量(推荐在 .env 文件中配置):")
+        print("   MODELSCOPE_API_KEY=your-modelscope-token-here")
+        print("   LLM_MODEL=Qwen/Qwen2.5-7B-Instruct")
+        print("   LLM_BASE_URL=https://api-inference.modelscope.cn/v1/")
+        print("   LLM_PROVIDER=modelscope")
+        return None
+    
+    try:
+        llm = HelloAgentsLLM(
+            model=llm_model,
+            api_key=llm_api_key,
+            base_url=llm_base_url,
+            provider=llm_provider
+        )
+        print(f"✅ LLM 初始化成功: {llm_model} ({llm_provider})")
+        return llm
+    except Exception as e:
+        print(f"⚠️  初始化 LLM 失败: {e}")
+        return None
+
+
+def prepare_sources_data(top3_videos: list):
+    """从 Top3 视频中提取 sources 数据"""
+    sources = []
+    for video in top3_videos:
+        sources.append({
+            "title": video['title'],
+            "channel": video['channel_title'],
+            "url": video['url'],
+            "published_at": video['published_at'],
+            "score": video['score']
+        })
+    return sources
+
+
+def extract_json_from_text(text: str):
+    """从文本中提取 JSON 内容(处理 LLM 可能返回的格式化文本)"""
+    # 尝试直接解析
+    try:
+        return json.loads(text.strip())
+    except json.JSONDecodeError:
+        pass
+    
+    # 尝试提取 JSON 代码块
+    json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
+    if json_match:
+        try:
+            return json.loads(json_match.group(1))
+        except json.JSONDecodeError:
+            pass
+    
+    # 尝试提取第一个完整的 JSON 对象
+    json_match = re.search(r'\{.*\}', text, re.DOTALL)
+    if json_match:
+        try:
+            return json.loads(json_match.group(0))
+        except json.JSONDecodeError:
+            pass
+    
+    return None
+
+
+def generate_research_report(top3_videos: list, themes: list, llm):
+    """使用 LLM 生成研究报告"""
+    if not top3_videos:
+        return None
+    
+    # 构建视频信息文本
+    videos_info = []
+    for i, video in enumerate(top3_videos, 1):
+        videos_info.append(
+            f"{i}. 标题: {video['title']}\n"
+            f"   频道: {video['channel_title']}\n"
+            f"   发布时间: {video['published_at']}\n"
+            f"   评分: {video['score']}分\n"
+            f"   链接: {video['url']}"
+        )
+    
+    videos_text = "\n\n".join(videos_info)
+    themes_text = ", ".join(themes)
+    
+    # 构建 prompt
+    prompt = f"""基于以下 Top3 YouTube 视频信息,生成一份结构化研究报告。
+
+视频信息:
+{videos_text}
+
+搜索主题:{themes_text}
+
+请以 JSON 格式返回以下内容:
+1. question: 一个核心问题,概括这些视频的共同关注点
+2. key_findings: 3条发现,每条1句话,基于标题/频道/发布时间推断,使用"可能/倾向"等措辞
+3. why_it_matters_to_me: 为什么这些信息对我重要(个性化解释)
+4. next_steps: 1-3条行动建议,每条≤15分钟
+
+请严格按照以下 JSON 格式返回(不要包含其他文字):
+{{
+  "question": "核心问题",
+  "key_findings": [
+    "发现1(使用可能/倾向等措辞)",
+    "发现2(使用可能/倾向等措辞)",
+    "发现3(使用可能/倾向等措辞)"
+  ],
+  "why_it_matters_to_me": "个性化解释",
+  "next_steps": [
+    "行动建议1(≤15分钟)",
+    "行动建议2(≤15分钟)",
+    "行动建议3(≤15分钟)"
+  ]
+}}"""
+
+    messages = [
+        {"role": "system", "content": "你是一位专业的研究分析师,擅长从视频信息中提取关键洞察并给出可执行的行动建议。请始终以 JSON 格式返回结果。"},
+        {"role": "user", "content": prompt}
+    ]
+    
+    try:
+        print("\n🔬 正在使用 LLM 生成研究报告...")
+        response = llm.invoke(messages)
+        
+        if not response:
+            print("⚠️  LLM 返回空响应")
+            return None
+        
+        # 提取 JSON
+        research_data = extract_json_from_text(response)
+        
+        if not research_data:
+            print(f"⚠️  无法解析 LLM 响应为 JSON,原始响应: {response[:200]}...")
+            return None
+        
+        # 验证必需字段
+        required_fields = ["question", "key_findings", "why_it_matters_to_me", "next_steps"]
+        missing_fields = [field for field in required_fields if field not in research_data]
+        if missing_fields:
+            print(f"⚠️  LLM 响应缺少必需字段: {', '.join(missing_fields)}")
+            return None
+        
+        # 确保 key_findings 是列表且有3条
+        if not isinstance(research_data.get("key_findings"), list):
+            research_data["key_findings"] = []
+        if len(research_data["key_findings"]) != 3:
+            # 如果不足3条,填充或截断
+            while len(research_data["key_findings"]) < 3:
+                research_data["key_findings"].append("暂无发现")
+            research_data["key_findings"] = research_data["key_findings"][:3]
+        
+        # 确保 next_steps 是列表,最多3条
+        if not isinstance(research_data.get("next_steps"), list):
+            research_data["next_steps"] = []
+        research_data["next_steps"] = research_data["next_steps"][:3]
+        
+        print("✅ 研究报告生成成功")
+        return research_data
+        
+    except Exception as e:
+        print(f"⚠️  生成研究报告时出错: {e}")
+        import traceback
+        traceback.print_exc()
+        return None
+
+
+def main():
+    """主函数"""
+    # 解析命令行参数
+    parser = argparse.ArgumentParser(description="YouTube 视频搜索 - 多主题智能搜索与日报生成")
+    parser.add_argument(
+        "--mode",
+        type=str,
+        choices=["daily_signal", "research"],
+        default="research",
+        help="运行模式: research (默认,生成日报+研究报告) 或 daily_signal (仅生成日报)"
+    )
+    args = parser.parse_args()
+    mode = args.mode
+    
+    print("=" * 70)
+    print("YouTube 视频搜索 - 多主题智能搜索与日报生成")
+    if mode == "research":
+        print("运行模式: 研究模式 (将生成日报 + 研究报告)")
+    else:
+        print("运行模式: 日报模式 (仅生成日报)")
+    print("=" * 70)
+    
+    # 1. 加载配置
+    themes = load_themes()
+    if not themes:
+        print("❌ 无法加载主题列表,退出")
+        return
+    
+    whitelist_channels = load_whitelist_channels()
+    api_key = load_youtube_api_key()
+    if not api_key:
+        print("❌ 无法加载 API Key,退出")
+        return
+    
+    # 2. 对每个主题搜索
+    print(f"\n🔍 开始搜索 {len(themes)} 个主题...")
+    all_videos = []
+    
+    for theme in themes:
+        print(f"  搜索主题: {theme}")
+        videos = search_youtube_videos(theme, max_results=10, api_key=api_key)
+        if videos:
+            all_videos.extend(videos)
+            print(f"    ✅ 找到 {len(videos)} 个视频")
+        else:
+            print(f"    ⚠️  未找到视频或搜索失败")
+    
+    if not all_videos:
+        print("❌ 未找到任何视频,退出")
+        return
+    
+    print(f"\n📊 合并前共找到 {len(all_videos)} 个视频")
+    
+    # 3. 合并去重
+    unique_videos = merge_and_deduplicate_videos(all_videos)
+    print(f"📊 去重后剩余 {len(unique_videos)} 个唯一视频")
+    
+    # 4. 时间窗口过滤:只考虑最近 DAYS_WINDOW 天的视频
+    print(f"\n⏰ 应用时间窗口过滤({DAYS_WINDOW}天)...")
+    filtered_videos = [v for v in unique_videos if is_within_time_window(v['published_at'], DAYS_WINDOW)]
+    excluded_count = len(unique_videos) - len(filtered_videos)
+    if excluded_count > 0:
+        print(f"   ⚠️  过滤掉 {excluded_count} 个超过 {DAYS_WINDOW} 天的视频")
+    print(f"   ✅ 剩余 {len(filtered_videos)} 个视频参与排序")
+    
+    if not filtered_videos:
+        print(f"❌ 时间窗口内({DAYS_WINDOW}天)未找到任何视频,退出")
+        return
+    
+    # 5. 评分
+    print(f"\n⭐ 开始评分...")
+    for video in filtered_videos:
+        score = score_video(video, themes, whitelist_channels)
+        video['score'] = score
+        video['scoring_details'] = {
+            'whitelist_bonus': 10 if video['channel_title'] in whitelist_channels else 0,
+            'keyword_matches': count_theme_keywords(video['title'], themes) + count_theme_keywords(video['description'], themes),
+            'time_bonus': calculate_time_score(video['published_at'])
+        }
+    
+    # 6. 排序并取 Top 3
+    sorted_videos = sorted(filtered_videos, key=lambda x: x['score'], reverse=True)
+    top3_videos = sorted_videos[:3]
+    
+    print(f"\n🏆 Top 3 视频:")
+    for i, video in enumerate(top3_videos, 1):
+        print(f"  {i}. [{video['score']}分] {video['title']}")
+        print(f"     频道: {video['channel_title']}")
+        print(f"     链接: {video['url']}")
+    
+    # 7. 生成日期字符串
+    today = datetime.now().strftime("%Y-%m-%d")
+    
+    # 8. 创建输出目录
+    base_dir = Path(__file__).parent
+    raw_dir = base_dir / "raw" / "youtube"
+    archive_dir = base_dir / "archive" / "youtube"
+    raw_dir.mkdir(parents=True, exist_ok=True)
+    archive_dir.mkdir(parents=True, exist_ok=True)
+    
+    # 9. 保存原始数据
+    raw_file = raw_dir / f"{today}_raw.json"
+    raw_data = {
+        "date": today,
+        "themes_used": themes,
+        "whitelist_channels": whitelist_channels,
+        "days_window": DAYS_WINDOW,
+        "total_videos_found": len(all_videos),
+        "unique_videos": len(unique_videos),
+        "filtered_videos_count": len(filtered_videos),
+        "all_videos": sorted_videos  # 保存过滤后的视频,按评分排序
+    }
+    
+    try:
+        with open(raw_file, 'w', encoding='utf-8') as f:
+            json.dump(raw_data, f, indent=2, ensure_ascii=False)
+        print(f"\n💾 原始数据已保存到: {raw_file}")
+    except Exception as e:
+        print(f"❌ 保存原始数据失败: {e}")
+        return
+    
+    # 10. 生成并保存日报
+    action = generate_action(top3_videos)
+    risk = generate_risk(sorted_videos, themes)
+    
+    daily_report = {
+        "date": today,
+        "themes_used": themes,
+        "dimensions": [],  # 新增:用户可选的维度标签(如:["健康", "情绪", "工作"]),向后兼容
+        "top3": [
+            {
+                "title": video['title'],
+                "channel": video['channel_title'],
+                "url": video['url'],
+                "score": video['score'],
+                "published_at": video['published_at'],
+                "scoring_details": video['scoring_details']
+            }
+            for video in top3_videos
+        ],
+        "action": action,
+        "risk": risk
+    }
+    
+    archive_file = archive_dir / f"{today}.json"
+    try:
+        with open(archive_file, 'w', encoding='utf-8') as f:
+            json.dump(daily_report, f, indent=2, ensure_ascii=False)
+        print(f"💾 日报信号已保存到: {archive_file}")
+    except Exception as e:
+        print(f"❌ 保存日报信号失败: {e}")
+        return
+    
+    # 11. 如果模式是 research,生成研究报告
+    if mode == "research":
+        llm = init_research_llm()
+        if llm:
+            try:
+                research_report = generate_research_report(top3_videos, themes, llm)
+                if research_report:
+                    # 添加 sources 字段
+                    research_report["sources"] = prepare_sources_data(top3_videos)
+                    research_report["date"] = today
+                    research_report["themes_used"] = themes
+                    
+                    # 保存研究报告
+                    research_file = archive_dir / f"{today}_research.json"
+                    with open(research_file, 'w', encoding='utf-8') as f:
+                        json.dump(research_report, f, indent=2, ensure_ascii=False)
+                    print(f"\n💾 研究报告已保存到: {research_file}")
+                    
+                    # 显示研究报告摘要
+                    print("\n" + "=" * 70)
+                    print("🔬 研究报告摘要")
+                    print("=" * 70)
+                    print(f"核心问题: {research_report.get('question', 'N/A')}")
+                    print(f"\n关键发现:")
+                    for i, finding in enumerate(research_report.get('key_findings', []), 1):
+                        print(f"  {i}. {finding}")
+                    print(f"\n为什么重要: {research_report.get('why_it_matters_to_me', 'N/A')}")
+                    print(f"\n下一步行动:")
+                    for i, step in enumerate(research_report.get('next_steps', []), 1):
+                        print(f"  {i}. {step}")
+                    print("=" * 70)
+                else:
+                    print("⚠️  研究报告生成失败,已跳过")
+            except Exception as e:
+                print(f"⚠️  生成研究报告时出错: {e}")
+                import traceback
+                traceback.print_exc()
+        else:
+            print("⚠️  未配置 LLM,跳过研究模式")
+    
+    # 12. 显示日报摘要
+    print("\n" + "=" * 70)
+    print("📄 日报摘要")
+    print("=" * 70)
+    print(f"日期: {daily_report['date']}")
+    print(f"主题: {', '.join(daily_report['themes_used'])}")
+    print(f"\n推荐行动 (Action):")
+    print(f"  {daily_report['action']}")
+    print(f"\n风险评估 (Risk):")
+    print(f"  {daily_report['risk']}")
+    print("=" * 70)
+
+
+if __name__ == "__main__":
+    main()

+ 14 - 14
Co-creation-projects/Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/themes.yaml.example → Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/themes.yaml.example

@@ -1,14 +1,14 @@
-# 搜索主题配置示例
-# 复制此文件为 themes.yaml 并修改为你感兴趣的主题
-
-themes:
-  - mcp          # Model Context Protocol
-  - agent        # AI Agent
-  - rag          # Retrieval-Augmented Generation
-  - money        # 理财/投资
-  - AI           # 人工智能
-  # 你可以添加更多主题,例如:
-  # - 健康
-  # - 学习
-  # - 工作
-
+# 搜索主题配置示例
+# 复制此文件为 themes.yaml 并修改为你感兴趣的主题
+
+themes:
+  - mcp          # Model Context Protocol
+  - agent        # AI Agent
+  - rag          # Retrieval-Augmented Generation
+  - money        # 理财/投资
+  - AI           # 人工智能
+  # 你可以添加更多主题,例如:
+  # - 健康
+  # - 学习
+  # - 工作
+

+ 254 - 254
Co-creation-projects/Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/write_report.py → Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/write_report.py

@@ -1,254 +1,254 @@
-"""
-报告编写工具 - 创建日报/周报/月报
-支持交互式输入,保存为Markdown格式
-"""
-
-import sys
-import os
-from pathlib import Path
-from datetime import datetime
-
-# 设置控制台编码为UTF-8(Windows)
-if sys.platform == 'win32':
-    import io
-    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
-    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
-
-
-def get_week_number(date=None):
-    """获取ISO周数"""
-    if date is None:
-        date = datetime.now()
-    year, week, _ = date.isocalendar()
-    return f"{year}-W{week:02d}"
-
-
-def get_current_date_id(report_type):
-    """获取当前日期标识"""
-    now = datetime.now()
-    
-    if report_type == "daily":
-        return now.strftime("%Y-%m-%d")
-    elif report_type == "weekly":
-        return get_week_number(now)
-    elif report_type == "monthly":
-        return now.strftime("%Y-%m")
-    else:
-        return now.strftime("%Y-%m-%d")
-
-
-def get_report_dir(base_dir, report_type):
-    """获取报告目录路径"""
-    report_dir = base_dir / "archive" / "reports" / report_type
-    report_dir.mkdir(parents=True, exist_ok=True)
-    return report_dir
-
-
-def input_multiline(prompt="请输入报告内容(输入空行后按Enter结束):\n"):
-    """多行输入,以空行结束"""
-    print(prompt)
-    lines = []
-    empty_line_count = 0
-    
-    while True:
-        try:
-            line = input()
-            if line.strip() == "":
-                empty_line_count += 1
-                if empty_line_count >= 1:  # 一个空行就结束
-                    break
-            else:
-                empty_line_count = 0
-                lines.append(line)
-        except EOFError:
-            break
-        except KeyboardInterrupt:
-            print("\n\n⚠️  输入已取消")
-            return None
-    
-    return "\n".join(lines) if lines else None
-
-
-def save_report(report_dir, date_id, content, report_type):
-    """保存报告到文件"""
-    file_path = report_dir / f"{date_id}.md"
-    
-    # 如果文件已存在,询问是否覆盖
-    if file_path.exists():
-        response = input(f"⚠️  文件 {file_path.name} 已存在,是否覆盖?(y/n): ").strip().lower()
-        if response not in ['y', 'yes', '是']:
-            print("❌ 已取消保存")
-            return False
-    
-    try:
-        with open(file_path, 'w', encoding='utf-8') as f:
-            f.write(content)
-        print(f"✅ 报告已保存到: {file_path}")
-        return True
-    except Exception as e:
-        print(f"❌ 保存失败: {e}")
-        return False
-
-
-def create_report(report_type, base_dir):
-    """创建报告"""
-    print("=" * 70)
-    print(f"创建{report_type}报告")
-    print("=" * 70)
-    
-    # 获取日期标识
-    date_id = get_current_date_id(report_type)
-    print(f"日期标识: {date_id}")
-    
-    # 获取报告目录
-    report_dir = get_report_dir(base_dir, report_type)
-    
-    # 检查是否已存在
-    existing_file = report_dir / f"{date_id}.md"
-    if existing_file.exists():
-        print(f"📄 发现已有报告: {existing_file.name}")
-        view = input("是否查看现有内容?(y/n): ").strip().lower()
-        if view in ['y', 'yes', '是']:
-            try:
-                with open(existing_file, 'r', encoding='utf-8') as f:
-                    print("\n" + "=" * 70)
-                    print("现有内容:")
-                    print("=" * 70)
-                    print(f.read())
-                    print("=" * 70)
-            except Exception as e:
-                print(f"⚠️  读取失败: {e}")
-        
-        edit = input("\n是否编辑/覆盖?(y/n): ").strip().lower()
-        if edit not in ['y', 'yes', '是']:
-            print("❌ 已取消")
-            return
-    
-    # 输入报告内容
-    print(f"\n请开始输入{report_type}报告内容...")
-    print("提示:输入空行后按Enter结束输入")
-    content = input_multiline()
-    
-    if content is None or content.strip() == "":
-        print("❌ 内容为空,已取消保存")
-        return
-    
-    # 添加日期标记(可选)
-    header = f"# {report_type}报告 - {date_id}\n\n"
-    full_content = header + content
-    
-    # 保存文件
-    save_report(report_dir, date_id, full_content, report_type)
-
-
-def list_reports(base_dir, report_type):
-    """列出已有报告"""
-    report_dir = get_report_dir(base_dir, report_type)
-    
-    if not report_dir.exists():
-        print(f"📁 目录不存在: {report_dir}")
-        return
-    
-    reports = sorted(report_dir.glob("*.md"))
-    
-    if not reports:
-        print(f"📁 暂无{report_type}报告")
-        return
-    
-    print(f"\n📋 {report_type}报告列表 ({len(reports)}个):")
-    print("-" * 70)
-    for report in reports:
-        size = report.stat().st_size
-        mtime = datetime.fromtimestamp(report.stat().st_mtime).strftime("%Y-%m-%d %H:%M")
-        print(f"  {report.name:20s}  {size:6d} 字节  {mtime}")
-    print("-" * 70)
-
-
-def view_report(base_dir, report_type, date_id=None):
-    """查看报告内容"""
-    if date_id is None:
-        date_id = get_current_date_id(report_type)
-    
-    report_dir = get_report_dir(base_dir, report_type)
-    file_path = report_dir / f"{date_id}.md"
-    
-    if not file_path.exists():
-        print(f"❌ 报告不存在: {file_path}")
-        return
-    
-    try:
-        with open(file_path, 'r', encoding='utf-8') as f:
-            content = f.read()
-        print("\n" + "=" * 70)
-        print(f"{report_type}报告 - {date_id}")
-        print("=" * 70)
-        print(content)
-        print("=" * 70)
-    except Exception as e:
-        print(f"❌ 读取失败: {e}")
-
-
-def main():
-    """主函数"""
-    import sys
-    
-    base_dir = Path(__file__).parent
-    
-    # 检查命令行参数,支持直接启动日报模式
-    if len(sys.argv) > 1 and sys.argv[1] in ['--daily', '--auto-daily']:
-        create_report("daily", base_dir)
-        return
-    
-    print("=" * 70)
-    print("报告编写工具")
-    print("=" * 70)
-    print("\n请选择操作:")
-    print("  1. 创建日报")
-    print("  2. 创建周报")
-    print("  3. 创建月报")
-    print("  4. 查看日报列表")
-    print("  5. 查看周报列表")
-    print("  6. 查看月报列表")
-    print("  7. 查看报告内容")
-    print("  0. 退出")
-    
-    while True:
-        choice = input("\n请选择 (0-7): ").strip()
-        
-        if choice == "0":
-            print("👋 再见!")
-            break
-        elif choice == "1":
-            create_report("daily", base_dir)
-        elif choice == "2":
-            create_report("weekly", base_dir)
-        elif choice == "3":
-            create_report("monthly", base_dir)
-        elif choice == "4":
-            list_reports(base_dir, "daily")
-        elif choice == "5":
-            list_reports(base_dir, "weekly")
-        elif choice == "6":
-            list_reports(base_dir, "monthly")
-        elif choice == "7":
-            print("\n请选择报告类型:")
-            print("  1. 日报")
-            print("  2. 周报")
-            print("  3. 月报")
-            type_choice = input("选择 (1-3): ").strip()
-            if type_choice == "1":
-                date_id = input("请输入日期 (YYYY-MM-DD,直接Enter使用今天): ").strip()
-                view_report(base_dir, "daily", date_id if date_id else None)
-            elif type_choice == "2":
-                date_id = input("请输入周标识 (YYYY-Www,直接Enter使用当前周): ").strip()
-                view_report(base_dir, "weekly", date_id if date_id else None)
-            elif type_choice == "3":
-                date_id = input("请输入月份 (YYYY-MM,直接Enter使用当前月): ").strip()
-                view_report(base_dir, "monthly", date_id if date_id else None)
-        else:
-            print("⚠️  无效选择,请重试")
-
-
-if __name__ == "__main__":
-    main()
-
+"""
+报告编写工具 - 创建日报/周报/月报
+支持交互式输入,保存为Markdown格式
+"""
+
+import sys
+import os
+from pathlib import Path
+from datetime import datetime
+
+# 设置控制台编码为UTF-8(Windows)
+if sys.platform == 'win32':
+    import io
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
+
+
+def get_week_number(date=None):
+    """获取ISO周数"""
+    if date is None:
+        date = datetime.now()
+    year, week, _ = date.isocalendar()
+    return f"{year}-W{week:02d}"
+
+
+def get_current_date_id(report_type):
+    """获取当前日期标识"""
+    now = datetime.now()
+    
+    if report_type == "daily":
+        return now.strftime("%Y-%m-%d")
+    elif report_type == "weekly":
+        return get_week_number(now)
+    elif report_type == "monthly":
+        return now.strftime("%Y-%m")
+    else:
+        return now.strftime("%Y-%m-%d")
+
+
+def get_report_dir(base_dir, report_type):
+    """获取报告目录路径"""
+    report_dir = base_dir / "archive" / "reports" / report_type
+    report_dir.mkdir(parents=True, exist_ok=True)
+    return report_dir
+
+
+def input_multiline(prompt="请输入报告内容(输入空行后按Enter结束):\n"):
+    """多行输入,以空行结束"""
+    print(prompt)
+    lines = []
+    empty_line_count = 0
+    
+    while True:
+        try:
+            line = input()
+            if line.strip() == "":
+                empty_line_count += 1
+                if empty_line_count >= 1:  # 一个空行就结束
+                    break
+            else:
+                empty_line_count = 0
+                lines.append(line)
+        except EOFError:
+            break
+        except KeyboardInterrupt:
+            print("\n\n⚠️  输入已取消")
+            return None
+    
+    return "\n".join(lines) if lines else None
+
+
+def save_report(report_dir, date_id, content, report_type):
+    """保存报告到文件"""
+    file_path = report_dir / f"{date_id}.md"
+    
+    # 如果文件已存在,询问是否覆盖
+    if file_path.exists():
+        response = input(f"⚠️  文件 {file_path.name} 已存在,是否覆盖?(y/n): ").strip().lower()
+        if response not in ['y', 'yes', '是']:
+            print("❌ 已取消保存")
+            return False
+    
+    try:
+        with open(file_path, 'w', encoding='utf-8') as f:
+            f.write(content)
+        print(f"✅ 报告已保存到: {file_path}")
+        return True
+    except Exception as e:
+        print(f"❌ 保存失败: {e}")
+        return False
+
+
+def create_report(report_type, base_dir):
+    """创建报告"""
+    print("=" * 70)
+    print(f"创建{report_type}报告")
+    print("=" * 70)
+    
+    # 获取日期标识
+    date_id = get_current_date_id(report_type)
+    print(f"日期标识: {date_id}")
+    
+    # 获取报告目录
+    report_dir = get_report_dir(base_dir, report_type)
+    
+    # 检查是否已存在
+    existing_file = report_dir / f"{date_id}.md"
+    if existing_file.exists():
+        print(f"📄 发现已有报告: {existing_file.name}")
+        view = input("是否查看现有内容?(y/n): ").strip().lower()
+        if view in ['y', 'yes', '是']:
+            try:
+                with open(existing_file, 'r', encoding='utf-8') as f:
+                    print("\n" + "=" * 70)
+                    print("现有内容:")
+                    print("=" * 70)
+                    print(f.read())
+                    print("=" * 70)
+            except Exception as e:
+                print(f"⚠️  读取失败: {e}")
+        
+        edit = input("\n是否编辑/覆盖?(y/n): ").strip().lower()
+        if edit not in ['y', 'yes', '是']:
+            print("❌ 已取消")
+            return
+    
+    # 输入报告内容
+    print(f"\n请开始输入{report_type}报告内容...")
+    print("提示:输入空行后按Enter结束输入")
+    content = input_multiline()
+    
+    if content is None or content.strip() == "":
+        print("❌ 内容为空,已取消保存")
+        return
+    
+    # 添加日期标记(可选)
+    header = f"# {report_type}报告 - {date_id}\n\n"
+    full_content = header + content
+    
+    # 保存文件
+    save_report(report_dir, date_id, full_content, report_type)
+
+
+def list_reports(base_dir, report_type):
+    """列出已有报告"""
+    report_dir = get_report_dir(base_dir, report_type)
+    
+    if not report_dir.exists():
+        print(f"📁 目录不存在: {report_dir}")
+        return
+    
+    reports = sorted(report_dir.glob("*.md"))
+    
+    if not reports:
+        print(f"📁 暂无{report_type}报告")
+        return
+    
+    print(f"\n📋 {report_type}报告列表 ({len(reports)}个):")
+    print("-" * 70)
+    for report in reports:
+        size = report.stat().st_size
+        mtime = datetime.fromtimestamp(report.stat().st_mtime).strftime("%Y-%m-%d %H:%M")
+        print(f"  {report.name:20s}  {size:6d} 字节  {mtime}")
+    print("-" * 70)
+
+
+def view_report(base_dir, report_type, date_id=None):
+    """查看报告内容"""
+    if date_id is None:
+        date_id = get_current_date_id(report_type)
+    
+    report_dir = get_report_dir(base_dir, report_type)
+    file_path = report_dir / f"{date_id}.md"
+    
+    if not file_path.exists():
+        print(f"❌ 报告不存在: {file_path}")
+        return
+    
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+        print("\n" + "=" * 70)
+        print(f"{report_type}报告 - {date_id}")
+        print("=" * 70)
+        print(content)
+        print("=" * 70)
+    except Exception as e:
+        print(f"❌ 读取失败: {e}")
+
+
+def main():
+    """主函数"""
+    import sys
+    
+    base_dir = Path(__file__).parent
+    
+    # 检查命令行参数,支持直接启动日报模式
+    if len(sys.argv) > 1 and sys.argv[1] in ['--daily', '--auto-daily']:
+        create_report("daily", base_dir)
+        return
+    
+    print("=" * 70)
+    print("报告编写工具")
+    print("=" * 70)
+    print("\n请选择操作:")
+    print("  1. 创建日报")
+    print("  2. 创建周报")
+    print("  3. 创建月报")
+    print("  4. 查看日报列表")
+    print("  5. 查看周报列表")
+    print("  6. 查看月报列表")
+    print("  7. 查看报告内容")
+    print("  0. 退出")
+    
+    while True:
+        choice = input("\n请选择 (0-7): ").strip()
+        
+        if choice == "0":
+            print("👋 再见!")
+            break
+        elif choice == "1":
+            create_report("daily", base_dir)
+        elif choice == "2":
+            create_report("weekly", base_dir)
+        elif choice == "3":
+            create_report("monthly", base_dir)
+        elif choice == "4":
+            list_reports(base_dir, "daily")
+        elif choice == "5":
+            list_reports(base_dir, "weekly")
+        elif choice == "6":
+            list_reports(base_dir, "monthly")
+        elif choice == "7":
+            print("\n请选择报告类型:")
+            print("  1. 日报")
+            print("  2. 周报")
+            print("  3. 月报")
+            type_choice = input("选择 (1-3): ").strip()
+            if type_choice == "1":
+                date_id = input("请输入日期 (YYYY-MM-DD,直接Enter使用今天): ").strip()
+                view_report(base_dir, "daily", date_id if date_id else None)
+            elif type_choice == "2":
+                date_id = input("请输入周标识 (YYYY-Www,直接Enter使用当前周): ").strip()
+                view_report(base_dir, "weekly", date_id if date_id else None)
+            elif type_choice == "3":
+                date_id = input("请输入月份 (YYYY-MM,直接Enter使用当前月): ").strip()
+                view_report(base_dir, "monthly", date_id if date_id else None)
+        else:
+            print("⚠️  无效选择,请重试")
+
+
+if __name__ == "__main__":
+    main()
+

+ 246 - 246
Co-creation-projects/Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/完整使用流程说明.md → Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/完整使用流程说明.md

@@ -1,246 +1,246 @@
-# 维度提取与主题修正系统 - 完整使用流程
-
-## 系统概述
-
-本系统实现了从用户报告中提取维度,并用维度分析来修正YouTube搜索主题(themes)的完整流程。
-
-**核心流程**:
-1. 用户写日报/周报/月报(自然语言)
-2. LLM从报告中提取维度
-3. 维度分析与现有themes对比
-4. 生成themes修正建议(添加/删除themes)
-5. 用户确认后更新themes.yaml
-
-## 文件说明
-
-### 核心工具脚本
-
-1. **`write_report.py`** - 报告编写工具
-   - 创建日报/周报/月报
-   - 交互式输入,保存为Markdown格式
-
-2. **`extract_dimensions.py`** - 维度提取工具
-   - 从报告中用LLM提取维度
-   - 批量处理报告文件
-
-3. **`manage_themes.py`** - 主题管理工具
-   - 管理themes.yaml文件
-   - 添加/删除/查看themes
-
-4. **`analyze_dimensions.py`** - 主分析工具
-   - 整合维度提取和分析
-   - 生成themes修正建议
-   - 支持交互式确认和应用
-
-### 数据文件结构
-
-```
-Personal_Information_Signaling_System/
-├── themes.yaml                    # 用户设置的搜索主题
-├── archive/
-│   ├── reports/                   # 用户报告(Markdown)
-│   │   ├── daily/                 # 日报
-│   │   ├── weekly/                # 周报
-│   │   └── monthly/               # 月报
-│   ├── dimensions/                # 维度提取结果
-│   │   └── YYYY-MM-DD_type_dimensions.json
-│   └── dimension_analysis/        # 分析报告
-│       └── YYYY-MM-DD_analysis.json
-```
-
-## 完整使用流程
-
-### 步骤1:写报告
-
-使用 `write_report.py` 创建报告:
-
-```bash
-python write_report.py
-```
-
-选择操作:
-- 1. 创建日报
-- 2. 创建周报
-- 3. 创建月报
-
-输入报告内容(自然语言,Markdown格式),系统会自动保存。
-
-**或者**:直接在 `archive/reports/` 目录下创建/编辑 `.md` 文件:
-- 日报:`YYYY-MM-DD.md`
-- 周报:`YYYY-Www.md`
-- 月报:`YYYY-MM.md`
-
-### 步骤2:提取维度
-
-从报告中提取维度:
-
-```bash
-# 提取所有报告的维度
-python extract_dimensions.py
-
-# 只提取日报的维度
-python extract_dimensions.py --report-type daily
-
-# 提取单个报告文件的维度
-python extract_dimensions.py --report-file archive/reports/daily/2025-12-28.md
-```
-
-提取结果保存在 `archive/dimensions/` 目录。
-
-### 步骤3:分析维度并生成themes建议
-
-运行主分析工具:
-
-```bash
-# 基本分析(加载已有提取结果)
-python analyze_dimensions.py
-
-# 重新提取维度并分析
-python analyze_dimensions.py --extract
-
-# 交互模式(查看并处理建议)
-python analyze_dimensions.py --interactive
-
-# 组合使用
-python analyze_dimensions.py --extract --interactive
-```
-
-### 步骤4:管理themes(可选)
-
-手动管理themes:
-
-```bash
-# 交互式管理
-python manage_themes.py
-
-# 命令行操作
-python manage_themes.py --list              # 列出所有themes
-python manage_themes.py --add "健康"        # 添加theme
-python manage_themes.py --remove "娱乐"     # 删除theme
-```
-
-## 主题修正判断标准
-
-### 添加Theme建议
-
-条件:
-- 维度在提取结果中出现频率 ≥ 50%
-- 最近30天内出现次数 ≥ 3次
-- 该维度不在现有themes中
-
-### 删除Theme建议
-
-条件:
-- Theme在提取维度中的匹配率 < 10%
-- 持续时间 ≥ 60天
-- 过去90天内的匹配次数很少
-
-## 典型工作流程
-
-### 日常使用
-
-```bash
-# 1. 每天写日报
-python write_report.py
-# 选择 1,输入今天的内容
-
-# 2. 每周提取一次维度
-python extract_dimensions.py
-
-# 3. 分析并查看建议
-python analyze_dimensions.py --extract --interactive
-```
-
-### 初次设置
-
-```bash
-# 1. 设置初始themes(手动编辑themes.yaml或使用工具)
-python manage_themes.py
-
-# 2. 写几篇报告
-python write_report.py
-
-# 3. 提取维度
-python extract_dimensions.py
-
-# 4. 分析并应用建议
-python analyze_dimensions.py --extract --interactive
-```
-
-## 配置要求
-
-### LLM配置
-
-需要在 `.env` 文件中配置LLM(用于维度提取):
-
-```env
-LLM_API_KEY=your-api-key
-LLM_MODEL=qwen-plus
-LLM_BASE_URL=https://api-inference.modelscope.cn/v1/
-LLM_PROVIDER=modelscope
-```
-
-### Themes配置
-
-`themes.yaml` 文件格式:
-
-```yaml
-themes:
-  - mcp
-  - agent
-  - rag
-  - money
-  - AI
-```
-
-## 输出文件说明
-
-### 维度提取结果
-
-`archive/dimensions/YYYY-MM-DD_type_dimensions.json`:
-
-```json
-{
-  "report_date": "2025-12-28",
-  "report_type": "daily",
-  "report_file": "archive/reports/daily/2025-12-28.md",
-  "extracted_dimensions": ["健康", "情绪", "工作"],
-  "confidence": 0.85,
-  "extraction_date": "2025-12-28T10:00:00Z"
-}
-```
-
-### 分析报告
-
-`archive/dimension_analysis/YYYY-MM-DD_analysis.json`:
-
-包含维度统计、themes匹配分析、修正建议等信息。
-
-## 注意事项
-
-1. **首次使用**:需要先写几篇报告,然后提取维度,才能生成有意义的建议
-2. **LLM配置**:维度提取需要LLM,确保已配置API Key
-3. **报告格式**:报告使用Markdown格式,可以包含标题、段落、列表等
-4. **维度提取**:每次运行 `extract_dimensions.py` 会重新提取,提取结果会覆盖之前的文件
-5. **Themes更新**:建议在交互模式下仔细审查每个建议,确认后再应用
-
-## 故障排除
-
-### LLM初始化失败
-
-- 检查 `.env` 文件中的API Key配置
-- 确认网络连接正常
-- 检查API配额是否充足
-
-### 没有提取到维度
-
-- 确认报告文件存在且内容不为空
-- 检查报告文件格式是否正确
-- 尝试单独提取一个报告文件查看错误信息
-
-### 没有生成建议
-
-- 确认有足够的提取结果(建议至少3-5个报告)
-- 检查themes是否已设置
-- 查看分析报告中的统计信息
-
+# 维度提取与主题修正系统 - 完整使用流程
+
+## 系统概述
+
+本系统实现了从用户报告中提取维度,并用维度分析来修正YouTube搜索主题(themes)的完整流程。
+
+**核心流程**:
+1. 用户写日报/周报/月报(自然语言)
+2. LLM从报告中提取维度
+3. 维度分析与现有themes对比
+4. 生成themes修正建议(添加/删除themes)
+5. 用户确认后更新themes.yaml
+
+## 文件说明
+
+### 核心工具脚本
+
+1. **`write_report.py`** - 报告编写工具
+   - 创建日报/周报/月报
+   - 交互式输入,保存为Markdown格式
+
+2. **`extract_dimensions.py`** - 维度提取工具
+   - 从报告中用LLM提取维度
+   - 批量处理报告文件
+
+3. **`manage_themes.py`** - 主题管理工具
+   - 管理themes.yaml文件
+   - 添加/删除/查看themes
+
+4. **`analyze_dimensions.py`** - 主分析工具
+   - 整合维度提取和分析
+   - 生成themes修正建议
+   - 支持交互式确认和应用
+
+### 数据文件结构
+
+```
+Personal_Information_Signaling_System/
+├── themes.yaml                    # 用户设置的搜索主题
+├── archive/
+│   ├── reports/                   # 用户报告(Markdown)
+│   │   ├── daily/                 # 日报
+│   │   ├── weekly/                # 周报
+│   │   └── monthly/               # 月报
+│   ├── dimensions/                # 维度提取结果
+│   │   └── YYYY-MM-DD_type_dimensions.json
+│   └── dimension_analysis/        # 分析报告
+│       └── YYYY-MM-DD_analysis.json
+```
+
+## 完整使用流程
+
+### 步骤1:写报告
+
+使用 `write_report.py` 创建报告:
+
+```bash
+python write_report.py
+```
+
+选择操作:
+- 1. 创建日报
+- 2. 创建周报
+- 3. 创建月报
+
+输入报告内容(自然语言,Markdown格式),系统会自动保存。
+
+**或者**:直接在 `archive/reports/` 目录下创建/编辑 `.md` 文件:
+- 日报:`YYYY-MM-DD.md`
+- 周报:`YYYY-Www.md`
+- 月报:`YYYY-MM.md`
+
+### 步骤2:提取维度
+
+从报告中提取维度:
+
+```bash
+# 提取所有报告的维度
+python extract_dimensions.py
+
+# 只提取日报的维度
+python extract_dimensions.py --report-type daily
+
+# 提取单个报告文件的维度
+python extract_dimensions.py --report-file archive/reports/daily/2025-12-28.md
+```
+
+提取结果保存在 `archive/dimensions/` 目录。
+
+### 步骤3:分析维度并生成themes建议
+
+运行主分析工具:
+
+```bash
+# 基本分析(加载已有提取结果)
+python analyze_dimensions.py
+
+# 重新提取维度并分析
+python analyze_dimensions.py --extract
+
+# 交互模式(查看并处理建议)
+python analyze_dimensions.py --interactive
+
+# 组合使用
+python analyze_dimensions.py --extract --interactive
+```
+
+### 步骤4:管理themes(可选)
+
+手动管理themes:
+
+```bash
+# 交互式管理
+python manage_themes.py
+
+# 命令行操作
+python manage_themes.py --list              # 列出所有themes
+python manage_themes.py --add "健康"        # 添加theme
+python manage_themes.py --remove "娱乐"     # 删除theme
+```
+
+## 主题修正判断标准
+
+### 添加Theme建议
+
+条件:
+- 维度在提取结果中出现频率 ≥ 50%
+- 最近30天内出现次数 ≥ 3次
+- 该维度不在现有themes中
+
+### 删除Theme建议
+
+条件:
+- Theme在提取维度中的匹配率 < 10%
+- 持续时间 ≥ 60天
+- 过去90天内的匹配次数很少
+
+## 典型工作流程
+
+### 日常使用
+
+```bash
+# 1. 每天写日报
+python write_report.py
+# 选择 1,输入今天的内容
+
+# 2. 每周提取一次维度
+python extract_dimensions.py
+
+# 3. 分析并查看建议
+python analyze_dimensions.py --extract --interactive
+```
+
+### 初次设置
+
+```bash
+# 1. 设置初始themes(手动编辑themes.yaml或使用工具)
+python manage_themes.py
+
+# 2. 写几篇报告
+python write_report.py
+
+# 3. 提取维度
+python extract_dimensions.py
+
+# 4. 分析并应用建议
+python analyze_dimensions.py --extract --interactive
+```
+
+## 配置要求
+
+### LLM配置
+
+需要在 `.env` 文件中配置LLM(用于维度提取):
+
+```env
+LLM_API_KEY=your-api-key
+LLM_MODEL=qwen-plus
+LLM_BASE_URL=https://api-inference.modelscope.cn/v1/
+LLM_PROVIDER=modelscope
+```
+
+### Themes配置
+
+`themes.yaml` 文件格式:
+
+```yaml
+themes:
+  - mcp
+  - agent
+  - rag
+  - money
+  - AI
+```
+
+## 输出文件说明
+
+### 维度提取结果
+
+`archive/dimensions/YYYY-MM-DD_type_dimensions.json`:
+
+```json
+{
+  "report_date": "2025-12-28",
+  "report_type": "daily",
+  "report_file": "archive/reports/daily/2025-12-28.md",
+  "extracted_dimensions": ["健康", "情绪", "工作"],
+  "confidence": 0.85,
+  "extraction_date": "2025-12-28T10:00:00Z"
+}
+```
+
+### 分析报告
+
+`archive/dimension_analysis/YYYY-MM-DD_analysis.json`:
+
+包含维度统计、themes匹配分析、修正建议等信息。
+
+## 注意事项
+
+1. **首次使用**:需要先写几篇报告,然后提取维度,才能生成有意义的建议
+2. **LLM配置**:维度提取需要LLM,确保已配置API Key
+3. **报告格式**:报告使用Markdown格式,可以包含标题、段落、列表等
+4. **维度提取**:每次运行 `extract_dimensions.py` 会重新提取,提取结果会覆盖之前的文件
+5. **Themes更新**:建议在交互模式下仔细审查每个建议,确认后再应用
+
+## 故障排除
+
+### LLM初始化失败
+
+- 检查 `.env` 文件中的API Key配置
+- 确认网络连接正常
+- 检查API配额是否充足
+
+### 没有提取到维度
+
+- 确认报告文件存在且内容不为空
+- 检查报告文件格式是否正确
+- 尝试单独提取一个报告文件查看错误信息
+
+### 没有生成建议
+
+- 确认有足够的提取结果(建议至少3-5个报告)
+- 检查themes是否已设置
+- 查看分析报告中的统计信息
+

+ 377 - 377
Co-creation-projects/Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/桌面提醒设置说明.md → Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/桌面提醒设置说明.md

@@ -1,377 +1,377 @@
-# Windows任务计划程序设置说明
-
-## 功能说明
-
-设置Windows任务计划程序,每天23:30自动弹出提醒窗口,提醒写日报。
-
-## 📋 推荐配置方案
-
-### 方案 1:使用项目虚拟环境(推荐)
-
-如果项目有自己的虚拟环境(`.venv` 目录):
-
-**配置信息:**
-
-- **程序或脚本**: 
-  ```
-  C:\Python\pythonprogram\Personal_Information_Signaling_System\.venv\Scripts\python.exe
-  ```
-
-- **添加参数(可选)**: 
-  ```
-  daily_reminder.py
-  ```
-
-- **起始于(可选)**: 
-  ```
-  C:\Python\pythonprogram\Personal_Information_Signaling_System
-  ```
-
-### 方案 2:使用系统 Python
-
-如果使用系统安装的 Python(已添加到 PATH):
-
-**配置信息:**
-
-- **程序或脚本**: 
-  ```
-  python.exe
-  ```
-
-- **添加参数(可选)**: 
-  ```
-  daily_reminder.py
-  ```
-
-- **起始于(可选)**: 
-  ```
-  C:\Python\pythonprogram\Personal_Information_Signaling_System
-  ```
-
-### 方案 3:直接运行脚本(如果配置了文件关联)
-
-**配置信息:**
-
-- **程序或脚本**: 
-  ```
-  C:\Python\pythonprogram\Personal_Information_Signaling_System\daily_reminder.py
-  ```
-
-- **起始于(可选)**: 
-  ```
-  C:\Python\pythonprogram\Personal_Information_Signaling_System
-  ```
-
-## 🔍 如何确认 Python 路径
-
-### 方法 1:检查项目虚拟环境
-
-```bash
-cd C:\Python\pythonprogram\Personal_Information_Signaling_System
-dir .venv\Scripts\python.exe
-```
-
-如果存在,使用:
-```
-C:\Python\pythonprogram\Personal_Information_Signaling_System\.venv\Scripts\python.exe
-```
-
-### 方法 2:检查系统 Python
-
-在命令行输入:
-```bash
-where python
-```
-
-会显示 Python 的完整路径,例如:
-```
-C:\Python\python311\python.exe
-```
-
-### 方法 3:创建项目虚拟环境(如果还没有)
-
-```bash
-cd C:\Python\pythonprogram\Personal_Information_Signaling_System
-python -m venv .venv
-.venv\Scripts\activate
-pip install -r requirements.txt
-```
-
-然后使用方案 1 的配置。
-
-## 设置步骤
-
-### 1. 打开任务计划程序
-
-- 按 `Win + R` 打开运行对话框
-- 输入 `taskschd.msc` 并回车
-- 或者:开始菜单 → 搜索"任务计划程序"
-
-### 2. 创建基本任务
-
-1. 在右侧"操作"面板中,点击"创建基本任务"
-2. 输入任务名称:`每日写日报提醒`
-3. 输入描述:`每天23:30提醒写日报`
-4. 点击"下一步"
-
-### 3. 设置触发器
-
-1. 选择"每天"
-2. 点击"下一步"
-3. 设置开始时间:`23:30:00`
-4. 设置开始日期:今天或明天
-5. 点击"下一步"
-
-### 4. 设置操作
-
-1. 选择"启动程序"
-2. 点击"下一步"
-3. 在"程序或脚本"中输入Python的完整路径:
-   ```
-   C:\Python\pythonprogram\Personal_Information_Signaling_System\.venv\Scripts\python.exe
-   ```
-   (如果项目有虚拟环境)
-   
-   或者使用系统 Python:
-   ```
-   python.exe
-   ```
-   (需要确保 Python 已添加到系统 PATH)
-
-4. 在"添加参数"中输入:
-   ```
-   daily_reminder.py
-   ```
-
-5. 在"起始于"中输入脚本所在目录:
-   ```
-   C:\Python\pythonprogram\Personal_Information_Signaling_System
-   ```
-   (请根据你的实际路径修改)
-
-6. 点击"下一步"
-
-### 5. 完成设置
-
-1. 勾选"当单击完成时,打开此任务属性的对话框"
-2. 点击"完成"
-
-### 6. 高级设置(可选)
-
-在任务属性对话框中:
-
-1. **常规**选项卡:
-   - 勾选"不管用户是否登录都要运行"(可选)
-   - 勾选"使用最高权限运行"(可选)
-
-2. **条件**选项卡:
-   - 取消勾选"只有在计算机使用交流电源时才启动此任务"(如果使用笔记本)
-   - 勾选"唤醒计算机运行此任务"(可选)
-
-3. **设置**选项卡:
-   - 勾选"允许按需运行任务"
-   - 勾选"如果请求的任务正在运行,则停止现有实例"
-
-4. 点击"确定"保存
-
-## 测试任务
-
-### 方法1:立即运行测试
-
-1. 在任务计划程序中找到创建的任务
-2. 右键点击 → "运行"
-3. 检查是否弹出提醒窗口
-
-### 方法2:修改时间测试
-
-1. 右键点击任务 → "属性"
-2. 在"触发器"选项卡中,编辑触发器
-3. 将时间改为当前时间+1分钟
-4. 等待1分钟,观察是否弹出提醒窗口
-5. 测试完成后,将时间改回23:30
-
-## 常见问题
-
-### 1. 任务不运行
-
-- 检查Python路径是否正确
-- 检查脚本路径是否正确
-- 检查"起始于"目录是否正确
-- 查看任务历史记录中的错误信息
-
-### 2. 窗口不显示
-
-- 检查是否安装了Pillow:`pip install Pillow`
-- 检查图片文件是否存在:`assets/person.png`
-- 检查是否有错误提示
-
-### 3. 点击后不启动写日报
-
-- 检查 `write_report.py` 是否存在
-- 检查Python环境是否正确
-
-## 开机自启动(可选)
-
-如果需要开机自启动提醒服务(后台运行):
-
-1. 创建启动脚本 `start_reminder_service.bat`:
-   ```batch
-   @echo off
-   cd /d C:\Python\pythonprogram\Personal_Information_Signaling_System
-   C:\Python\pythonprogram\Personal_Information_Signaling_System\.venv\Scripts\python.exe daily_reminder_service.py
-   ```
-
-2. 将脚本添加到启动文件夹:
-   - 按 `Win + R`,输入 `shell:startup`
-   - 将 `start_reminder_service.bat` 的快捷方式放入此文件夹
-
-## 注意事项
-
-1. **Python路径**:确保使用虚拟环境中的Python,或使用完整路径
-2. **工作目录**:确保"起始于"目录设置正确
-3. **权限**:可能需要管理员权限运行任务计划程序
-4. **图片文件**:确保 `assets/person.png` 或 `assets/person.jpg` 存在
-
-## 卸载
-
-如果需要删除任务:
-
-1. 打开任务计划程序
-2. 找到"每日写日报提醒"任务
-3. 右键点击 → "删除"
-
----
-
-## 📝 更新现有任务(如果项目路径已更改)
-
-如果你之前已经设置了桌面提醒任务,但项目路径已更改,需要更新任务配置:
-
-### 需要更新的信息
-
-#### 原配置信息(旧路径)
-
-如果之前已经设置了桌面提醒任务,任务中可能包含以下路径:
-
-- **程序或脚本**: `C:\Python\pythonprogram\datawhale_Agent\PersonalizationMCP-main\daily_reminder.py`
-- **起始于**: `C:\Python\pythonprogram\datawhale_Agent\PersonalizationMCP-main`
-- **Python 解释器路径**: 可能是虚拟环境路径,如 `C:\Python\pythonprogram\datawhale_Agent\.venv\Scripts\python.exe`
-
-#### 新配置信息(更新后)
-
-- **程序或脚本**: `C:\Python\pythonprogram\Personal_Information_Signaling_System\daily_reminder.py`
-- **起始于**: `C:\Python\pythonprogram\Personal_Information_Signaling_System`
-- **Python 解释器路径**: 
-  - 如果使用项目虚拟环境: `C:\Python\pythonprogram\Personal_Information_Signaling_System\.venv\Scripts\python.exe`
-  - 如果使用系统 Python: `python.exe`(如果已添加到系统 PATH)
-
-### 详细更新步骤
-
-#### 步骤 1:打开任务计划程序
-
-1. 按 `Win + R` 打开运行对话框
-2. 输入 `taskschd.msc` 并按回车
-3. 或者:开始菜单 → 搜索"任务计划程序" → 打开
-
-#### 步骤 2:找到你的任务
-
-1. 在左侧"任务计划程序库"中查找你的任务
-2. 任务名称可能是:
-   - `每日写日报提醒`
-   - `Daily Report Reminder`
-   - `写日报提醒`
-   - 或其他你自定义的名称
-
-#### 步骤 3:编辑任务属性
-
-1. **右键点击任务** → 选择"属性"(或双击任务)
-2. 在弹出的属性窗口中,切换到 **"操作"** 标签页
-
-#### 步骤 4:更新"操作"配置
-
-在"操作"标签页中,更新以下字段:
-
-1. **程序或脚本**:
-   ```
-   旧值: C:\Python\pythonprogram\datawhale_Agent\PersonalizationMCP-main\daily_reminder.py
-   新值: C:\Python\pythonprogram\Personal_Information_Signaling_System\daily_reminder.py
-   ```
-   
-   或者,如果使用 Python 解释器直接运行:
-   ```
-   旧值: C:\Python\pythonprogram\datawhale_Agent\.venv\Scripts\python.exe
-   新值: C:\Python\pythonprogram\Personal_Information_Signaling_System\.venv\Scripts\python.exe
-   ```
-   (如果项目有虚拟环境)
-
-2. **添加参数(可选)**:
-   ```
-   如果程序或脚本是 Python 解释器,这里应该填写:
-   daily_reminder.py
-   ```
-
-3. **起始于**:
-   ```
-   旧值: C:\Python\pythonprogram\datawhale_Agent\PersonalizationMCP-main
-   新值: C:\Python\pythonprogram\Personal_Information_Signaling_System
-   ```
-
-#### 步骤 5:确认并保存
-
-1. 点击"确定"保存更改
-2. 系统可能会要求输入管理员密码(如果任务需要管理员权限)
-
-#### 步骤 6:测试任务
-
-1. 右键点击任务 → 选择"运行"
-2. 检查是否能正常弹出提醒窗口
-3. 如果失败,检查"历史记录"标签页查看错误信息
-
-### 通过命令行更新(高级)
-
-如果你熟悉命令行,可以使用以下 PowerShell 命令:
-
-```powershell
-# 1. 查看现有任务
-Get-ScheduledTask | Where-Object {$_.TaskName -like "*日报*" -or $_.TaskName -like "*reminder*"}
-
-# 2. 获取任务的操作配置
-$task = Get-ScheduledTask -TaskName "你的任务名称"
-$task.Actions
-
-# 3. 更新任务操作(需要管理员权限)
-$action = New-ScheduledTaskAction -Execute "C:\Python\pythonprogram\Personal_Information_Signaling_System\daily_reminder.py" -WorkingDirectory "C:\Python\pythonprogram\Personal_Information_Signaling_System"
-Set-ScheduledTask -TaskName "你的任务名称" -Action $action
-```
-
----
-
-## ✅ 配置检查清单
-
-设置或更新任务计划程序后,请确认:
-
-- [ ] **程序或脚本**路径正确且文件存在
-- [ ] **起始于**路径正确且目录存在
-- [ ] **添加参数**正确(如果使用 Python 解释器运行脚本)
-- [ ] **触发器**时间正确(通常是每天 23:30)
-- [ ] **测试运行**成功(右键任务 → 运行)
-
----
-
-## 🎯 快速复制粘贴
-
-### 方案 1(项目虚拟环境)
-
-```
-程序或脚本: C:\Python\pythonprogram\Personal_Information_Signaling_System\.venv\Scripts\python.exe
-添加参数: daily_reminder.py
-起始于: C:\Python\pythonprogram\Personal_Information_Signaling_System
-```
-
-### 方案 2(系统 Python)
-
-```
-程序或脚本: python.exe
-添加参数: daily_reminder.py
-起始于: C:\Python\pythonprogram\Personal_Information_Signaling_System
-```
-
+# Windows任务计划程序设置说明
+
+## 功能说明
+
+设置Windows任务计划程序,每天23:30自动弹出提醒窗口,提醒写日报。
+
+## 📋 推荐配置方案
+
+### 方案 1:使用项目虚拟环境(推荐)
+
+如果项目有自己的虚拟环境(`.venv` 目录):
+
+**配置信息:**
+
+- **程序或脚本**: 
+  ```
+  C:\Python\pythonprogram\Personal_Information_Signaling_System\.venv\Scripts\python.exe
+  ```
+
+- **添加参数(可选)**: 
+  ```
+  daily_reminder.py
+  ```
+
+- **起始于(可选)**: 
+  ```
+  C:\Python\pythonprogram\Personal_Information_Signaling_System
+  ```
+
+### 方案 2:使用系统 Python
+
+如果使用系统安装的 Python(已添加到 PATH):
+
+**配置信息:**
+
+- **程序或脚本**: 
+  ```
+  python.exe
+  ```
+
+- **添加参数(可选)**: 
+  ```
+  daily_reminder.py
+  ```
+
+- **起始于(可选)**: 
+  ```
+  C:\Python\pythonprogram\Personal_Information_Signaling_System
+  ```
+
+### 方案 3:直接运行脚本(如果配置了文件关联)
+
+**配置信息:**
+
+- **程序或脚本**: 
+  ```
+  C:\Python\pythonprogram\Personal_Information_Signaling_System\daily_reminder.py
+  ```
+
+- **起始于(可选)**: 
+  ```
+  C:\Python\pythonprogram\Personal_Information_Signaling_System
+  ```
+
+## 🔍 如何确认 Python 路径
+
+### 方法 1:检查项目虚拟环境
+
+```bash
+cd C:\Python\pythonprogram\Personal_Information_Signaling_System
+dir .venv\Scripts\python.exe
+```
+
+如果存在,使用:
+```
+C:\Python\pythonprogram\Personal_Information_Signaling_System\.venv\Scripts\python.exe
+```
+
+### 方法 2:检查系统 Python
+
+在命令行输入:
+```bash
+where python
+```
+
+会显示 Python 的完整路径,例如:
+```
+C:\Python\python311\python.exe
+```
+
+### 方法 3:创建项目虚拟环境(如果还没有)
+
+```bash
+cd C:\Python\pythonprogram\Personal_Information_Signaling_System
+python -m venv .venv
+.venv\Scripts\activate
+pip install -r requirements.txt
+```
+
+然后使用方案 1 的配置。
+
+## 设置步骤
+
+### 1. 打开任务计划程序
+
+- 按 `Win + R` 打开运行对话框
+- 输入 `taskschd.msc` 并回车
+- 或者:开始菜单 → 搜索"任务计划程序"
+
+### 2. 创建基本任务
+
+1. 在右侧"操作"面板中,点击"创建基本任务"
+2. 输入任务名称:`每日写日报提醒`
+3. 输入描述:`每天23:30提醒写日报`
+4. 点击"下一步"
+
+### 3. 设置触发器
+
+1. 选择"每天"
+2. 点击"下一步"
+3. 设置开始时间:`23:30:00`
+4. 设置开始日期:今天或明天
+5. 点击"下一步"
+
+### 4. 设置操作
+
+1. 选择"启动程序"
+2. 点击"下一步"
+3. 在"程序或脚本"中输入Python的完整路径:
+   ```
+   C:\Python\pythonprogram\Personal_Information_Signaling_System\.venv\Scripts\python.exe
+   ```
+   (如果项目有虚拟环境)
+   
+   或者使用系统 Python:
+   ```
+   python.exe
+   ```
+   (需要确保 Python 已添加到系统 PATH)
+
+4. 在"添加参数"中输入:
+   ```
+   daily_reminder.py
+   ```
+
+5. 在"起始于"中输入脚本所在目录:
+   ```
+   C:\Python\pythonprogram\Personal_Information_Signaling_System
+   ```
+   (请根据你的实际路径修改)
+
+6. 点击"下一步"
+
+### 5. 完成设置
+
+1. 勾选"当单击完成时,打开此任务属性的对话框"
+2. 点击"完成"
+
+### 6. 高级设置(可选)
+
+在任务属性对话框中:
+
+1. **常规**选项卡:
+   - 勾选"不管用户是否登录都要运行"(可选)
+   - 勾选"使用最高权限运行"(可选)
+
+2. **条件**选项卡:
+   - 取消勾选"只有在计算机使用交流电源时才启动此任务"(如果使用笔记本)
+   - 勾选"唤醒计算机运行此任务"(可选)
+
+3. **设置**选项卡:
+   - 勾选"允许按需运行任务"
+   - 勾选"如果请求的任务正在运行,则停止现有实例"
+
+4. 点击"确定"保存
+
+## 测试任务
+
+### 方法1:立即运行测试
+
+1. 在任务计划程序中找到创建的任务
+2. 右键点击 → "运行"
+3. 检查是否弹出提醒窗口
+
+### 方法2:修改时间测试
+
+1. 右键点击任务 → "属性"
+2. 在"触发器"选项卡中,编辑触发器
+3. 将时间改为当前时间+1分钟
+4. 等待1分钟,观察是否弹出提醒窗口
+5. 测试完成后,将时间改回23:30
+
+## 常见问题
+
+### 1. 任务不运行
+
+- 检查Python路径是否正确
+- 检查脚本路径是否正确
+- 检查"起始于"目录是否正确
+- 查看任务历史记录中的错误信息
+
+### 2. 窗口不显示
+
+- 检查是否安装了Pillow:`pip install Pillow`
+- 检查图片文件是否存在:`assets/person.png`
+- 检查是否有错误提示
+
+### 3. 点击后不启动写日报
+
+- 检查 `write_report.py` 是否存在
+- 检查Python环境是否正确
+
+## 开机自启动(可选)
+
+如果需要开机自启动提醒服务(后台运行):
+
+1. 创建启动脚本 `start_reminder_service.bat`:
+   ```batch
+   @echo off
+   cd /d C:\Python\pythonprogram\Personal_Information_Signaling_System
+   C:\Python\pythonprogram\Personal_Information_Signaling_System\.venv\Scripts\python.exe daily_reminder_service.py
+   ```
+
+2. 将脚本添加到启动文件夹:
+   - 按 `Win + R`,输入 `shell:startup`
+   - 将 `start_reminder_service.bat` 的快捷方式放入此文件夹
+
+## 注意事项
+
+1. **Python路径**:确保使用虚拟环境中的Python,或使用完整路径
+2. **工作目录**:确保"起始于"目录设置正确
+3. **权限**:可能需要管理员权限运行任务计划程序
+4. **图片文件**:确保 `assets/person.png` 或 `assets/person.jpg` 存在
+
+## 卸载
+
+如果需要删除任务:
+
+1. 打开任务计划程序
+2. 找到"每日写日报提醒"任务
+3. 右键点击 → "删除"
+
+---
+
+## 📝 更新现有任务(如果项目路径已更改)
+
+如果你之前已经设置了桌面提醒任务,但项目路径已更改,需要更新任务配置:
+
+### 需要更新的信息
+
+#### 原配置信息(旧路径)
+
+如果之前已经设置了桌面提醒任务,任务中可能包含以下路径:
+
+- **程序或脚本**: `C:\Python\pythonprogram\datawhale_Agent\PersonalizationMCP-main\daily_reminder.py`
+- **起始于**: `C:\Python\pythonprogram\datawhale_Agent\PersonalizationMCP-main`
+- **Python 解释器路径**: 可能是虚拟环境路径,如 `C:\Python\pythonprogram\datawhale_Agent\.venv\Scripts\python.exe`
+
+#### 新配置信息(更新后)
+
+- **程序或脚本**: `C:\Python\pythonprogram\Personal_Information_Signaling_System\daily_reminder.py`
+- **起始于**: `C:\Python\pythonprogram\Personal_Information_Signaling_System`
+- **Python 解释器路径**: 
+  - 如果使用项目虚拟环境: `C:\Python\pythonprogram\Personal_Information_Signaling_System\.venv\Scripts\python.exe`
+  - 如果使用系统 Python: `python.exe`(如果已添加到系统 PATH)
+
+### 详细更新步骤
+
+#### 步骤 1:打开任务计划程序
+
+1. 按 `Win + R` 打开运行对话框
+2. 输入 `taskschd.msc` 并按回车
+3. 或者:开始菜单 → 搜索"任务计划程序" → 打开
+
+#### 步骤 2:找到你的任务
+
+1. 在左侧"任务计划程序库"中查找你的任务
+2. 任务名称可能是:
+   - `每日写日报提醒`
+   - `Daily Report Reminder`
+   - `写日报提醒`
+   - 或其他你自定义的名称
+
+#### 步骤 3:编辑任务属性
+
+1. **右键点击任务** → 选择"属性"(或双击任务)
+2. 在弹出的属性窗口中,切换到 **"操作"** 标签页
+
+#### 步骤 4:更新"操作"配置
+
+在"操作"标签页中,更新以下字段:
+
+1. **程序或脚本**:
+   ```
+   旧值: C:\Python\pythonprogram\datawhale_Agent\PersonalizationMCP-main\daily_reminder.py
+   新值: C:\Python\pythonprogram\Personal_Information_Signaling_System\daily_reminder.py
+   ```
+   
+   或者,如果使用 Python 解释器直接运行:
+   ```
+   旧值: C:\Python\pythonprogram\datawhale_Agent\.venv\Scripts\python.exe
+   新值: C:\Python\pythonprogram\Personal_Information_Signaling_System\.venv\Scripts\python.exe
+   ```
+   (如果项目有虚拟环境)
+
+2. **添加参数(可选)**:
+   ```
+   如果程序或脚本是 Python 解释器,这里应该填写:
+   daily_reminder.py
+   ```
+
+3. **起始于**:
+   ```
+   旧值: C:\Python\pythonprogram\datawhale_Agent\PersonalizationMCP-main
+   新值: C:\Python\pythonprogram\Personal_Information_Signaling_System
+   ```
+
+#### 步骤 5:确认并保存
+
+1. 点击"确定"保存更改
+2. 系统可能会要求输入管理员密码(如果任务需要管理员权限)
+
+#### 步骤 6:测试任务
+
+1. 右键点击任务 → 选择"运行"
+2. 检查是否能正常弹出提醒窗口
+3. 如果失败,检查"历史记录"标签页查看错误信息
+
+### 通过命令行更新(高级)
+
+如果你熟悉命令行,可以使用以下 PowerShell 命令:
+
+```powershell
+# 1. 查看现有任务
+Get-ScheduledTask | Where-Object {$_.TaskName -like "*日报*" -or $_.TaskName -like "*reminder*"}
+
+# 2. 获取任务的操作配置
+$task = Get-ScheduledTask -TaskName "你的任务名称"
+$task.Actions
+
+# 3. 更新任务操作(需要管理员权限)
+$action = New-ScheduledTaskAction -Execute "C:\Python\pythonprogram\Personal_Information_Signaling_System\daily_reminder.py" -WorkingDirectory "C:\Python\pythonprogram\Personal_Information_Signaling_System"
+Set-ScheduledTask -TaskName "你的任务名称" -Action $action
+```
+
+---
+
+## ✅ 配置检查清单
+
+设置或更新任务计划程序后,请确认:
+
+- [ ] **程序或脚本**路径正确且文件存在
+- [ ] **起始于**路径正确且目录存在
+- [ ] **添加参数**正确(如果使用 Python 解释器运行脚本)
+- [ ] **触发器**时间正确(通常是每天 23:30)
+- [ ] **测试运行**成功(右键任务 → 运行)
+
+---
+
+## 🎯 快速复制粘贴
+
+### 方案 1(项目虚拟环境)
+
+```
+程序或脚本: C:\Python\pythonprogram\Personal_Information_Signaling_System\.venv\Scripts\python.exe
+添加参数: daily_reminder.py
+起始于: C:\Python\pythonprogram\Personal_Information_Signaling_System
+```
+
+### 方案 2(系统 Python)
+
+```
+程序或脚本: python.exe
+添加参数: daily_reminder.py
+起始于: C:\Python\pythonprogram\Personal_Information_Signaling_System
+```
+

+ 156 - 156
Co-creation-projects/Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/维度分析系统使用说明.md → Co-creation-projects/lh2021739-pixel-Personal_Information_Signaling_System/维度分析系统使用说明.md

@@ -1,156 +1,156 @@
-# 维度分析系统使用说明(V1 简化版)
-
-## 功能概述
-
-维度分析系统可以:
-1. 收集和分析用户记录(日报/周报/月报)中的维度数据
-2. 统计维度出现频率
-3. 自动生成维度管理建议(新增/删除/优先级调整)
-4. 支持用户交互确认建议
-5. 记录维度演化历史
-
-## 快速开始
-
-### 1. 准备数据
-
-确保你的日报 JSON 文件包含 `dimensions` 字段(可选):
-
-```json
-{
-  "date": "2025-12-28",
-  "themes_used": ["mcp", "agent"],
-  "dimensions": ["健康", "情绪", "工作"],  // 新增字段
-  "top3": [...],
-  "action": "...",
-  "risk": "..."
-}
-```
-
-### 2. 配置文件
-
-创建 `dimension_config.json` 配置文件(如果不存在,系统会自动创建默认配置):
-
-```json
-{
-  "active_dimensions": [
-    {
-      "name": "健康",
-      "priority": 0.75,
-      "added_date": "2025-12-01",
-      "last_seen": null
-    }
-  ],
-  "candidate_dimensions": ["娱乐", "学习"],
-  "removed_dimensions": []
-}
-```
-
-### 3. 运行分析
-
-```bash
-# 基本分析(生成报告)
-python analyze_dimensions.py
-
-# 交互模式(查看并处理建议)
-python analyze_dimensions.py --interactive
-
-# 显示历史记录
-python analyze_dimensions.py --show-history
-
-# 组合使用
-python analyze_dimensions.py --interactive --show-history
-```
-
-## 输出结果
-
-### 分析报告
-
-分析报告保存在 `archive/dimension_analysis/YYYY-MM-DD_analysis.json`,包含:
-
-- `dimension_statistics`: 维度统计信息(频率、出现日期等)
-- `suggestions`: 系统生成的建议
-  - `add`: 新增维度建议
-  - `remove`: 删除维度建议
-  - `priority_adjustment`: 优先级调整建议
-- `history_summary`: 历史记录摘要
-
-### 配置文件
-
-- `dimension_config.json`: 维度配置(活跃维度、候选维度等)
-- `dimension_history.json`: 维度演化历史记录
-
-## 建议生成规则
-
-1. **新增维度建议**:候选维度列表中,超过30天未在记录中出现
-2. **删除维度建议**:活跃维度中,超过60天未在记录中出现
-3. **优先级调整建议**:维度出现频率 > 70%,但当前优先级 < 频率
-
-## 优先级计算
-
-优先级 = 出现频率 = 维度出现次数 / 总记录数
-
-范围:0.0 - 1.0
-
-## 交互模式
-
-使用 `--interactive` 参数时,系统会:
-
-1. 展示所有建议
-2. 对每个建议询问:接受 (y) / 拒绝 (n)
-3. 应用用户接受的建议
-4. 更新配置文件和历史记录
-
-## 示例
-
-```bash
-# 运行分析
-$ python analyze_dimensions.py
-
-======================================================================
-维度分析工具 - V1 简化版
-======================================================================
-
-📂 正在加载记录...
-✅ 加载完成:
-   - 日报: 2 条
-   - 周报: 0 条
-   - 月报: 0 条
-   - 总计: 2 条
-
-📊 正在分析维度...
-✅ 发现 3 个维度
-   维度频率统计(Top 5):
-   - 工作: 2次 (100.0%)
-   - 健康: 1次 (50.0%)
-   - 情绪: 1次 (50.0%)
-
-💡 正在生成建议...
-✅ 生成 4 条建议
-
-💾 分析报告已保存到: archive/dimension_analysis/2025-12-28_analysis.json
-
-✅ 分析完成!
-```
-
-## 注意事项
-
-1. **向后兼容**:如果日报 JSON 中没有 `dimensions` 字段,系统会自动添加空数组
-2. **数据格式**:周报和月报使用相同的 JSON 格式(可选)
-3. **配置文件**:如果配置文件不存在,系统会使用默认配置
-4. **历史记录**:历史记录会自动保存,可以随时查看
-
-## 文件结构
-
-```
-Personal_Information_Signaling_System/
-├── dimension_analysis.py          # 核心模块
-├── analyze_dimensions.py          # 主脚本
-├── dimension_config.json          # 维度配置
-├── dimension_history.json         # 历史记录
-├── archive/
-│   ├── youtube/                   # 日报数据
-│   └── dimension_analysis/        # 分析报告
-│       └── YYYY-MM-DD_analysis.json
-└── 维度分析系统使用说明.md        # 本文件
-```
-
+# 维度分析系统使用说明(V1 简化版)
+
+## 功能概述
+
+维度分析系统可以:
+1. 收集和分析用户记录(日报/周报/月报)中的维度数据
+2. 统计维度出现频率
+3. 自动生成维度管理建议(新增/删除/优先级调整)
+4. 支持用户交互确认建议
+5. 记录维度演化历史
+
+## 快速开始
+
+### 1. 准备数据
+
+确保你的日报 JSON 文件包含 `dimensions` 字段(可选):
+
+```json
+{
+  "date": "2025-12-28",
+  "themes_used": ["mcp", "agent"],
+  "dimensions": ["健康", "情绪", "工作"],  // 新增字段
+  "top3": [...],
+  "action": "...",
+  "risk": "..."
+}
+```
+
+### 2. 配置文件
+
+创建 `dimension_config.json` 配置文件(如果不存在,系统会自动创建默认配置):
+
+```json
+{
+  "active_dimensions": [
+    {
+      "name": "健康",
+      "priority": 0.75,
+      "added_date": "2025-12-01",
+      "last_seen": null
+    }
+  ],
+  "candidate_dimensions": ["娱乐", "学习"],
+  "removed_dimensions": []
+}
+```
+
+### 3. 运行分析
+
+```bash
+# 基本分析(生成报告)
+python analyze_dimensions.py
+
+# 交互模式(查看并处理建议)
+python analyze_dimensions.py --interactive
+
+# 显示历史记录
+python analyze_dimensions.py --show-history
+
+# 组合使用
+python analyze_dimensions.py --interactive --show-history
+```
+
+## 输出结果
+
+### 分析报告
+
+分析报告保存在 `archive/dimension_analysis/YYYY-MM-DD_analysis.json`,包含:
+
+- `dimension_statistics`: 维度统计信息(频率、出现日期等)
+- `suggestions`: 系统生成的建议
+  - `add`: 新增维度建议
+  - `remove`: 删除维度建议
+  - `priority_adjustment`: 优先级调整建议
+- `history_summary`: 历史记录摘要
+
+### 配置文件
+
+- `dimension_config.json`: 维度配置(活跃维度、候选维度等)
+- `dimension_history.json`: 维度演化历史记录
+
+## 建议生成规则
+
+1. **新增维度建议**:候选维度列表中,超过30天未在记录中出现
+2. **删除维度建议**:活跃维度中,超过60天未在记录中出现
+3. **优先级调整建议**:维度出现频率 > 70%,但当前优先级 < 频率
+
+## 优先级计算
+
+优先级 = 出现频率 = 维度出现次数 / 总记录数
+
+范围:0.0 - 1.0
+
+## 交互模式
+
+使用 `--interactive` 参数时,系统会:
+
+1. 展示所有建议
+2. 对每个建议询问:接受 (y) / 拒绝 (n)
+3. 应用用户接受的建议
+4. 更新配置文件和历史记录
+
+## 示例
+
+```bash
+# 运行分析
+$ python analyze_dimensions.py
+
+======================================================================
+维度分析工具 - V1 简化版
+======================================================================
+
+📂 正在加载记录...
+✅ 加载完成:
+   - 日报: 2 条
+   - 周报: 0 条
+   - 月报: 0 条
+   - 总计: 2 条
+
+📊 正在分析维度...
+✅ 发现 3 个维度
+   维度频率统计(Top 5):
+   - 工作: 2次 (100.0%)
+   - 健康: 1次 (50.0%)
+   - 情绪: 1次 (50.0%)
+
+💡 正在生成建议...
+✅ 生成 4 条建议
+
+💾 分析报告已保存到: archive/dimension_analysis/2025-12-28_analysis.json
+
+✅ 分析完成!
+```
+
+## 注意事项
+
+1. **向后兼容**:如果日报 JSON 中没有 `dimensions` 字段,系统会自动添加空数组
+2. **数据格式**:周报和月报使用相同的 JSON 格式(可选)
+3. **配置文件**:如果配置文件不存在,系统会使用默认配置
+4. **历史记录**:历史记录会自动保存,可以随时查看
+
+## 文件结构
+
+```
+Personal_Information_Signaling_System/
+├── dimension_analysis.py          # 核心模块
+├── analyze_dimensions.py          # 主脚本
+├── dimension_config.json          # 维度配置
+├── dimension_history.json         # 历史记录
+├── archive/
+│   ├── youtube/                   # 日报数据
+│   └── dimension_analysis/        # 分析报告
+│       └── YYYY-MM-DD_analysis.json
+└── 维度分析系统使用说明.md        # 本文件
+```
+

+ 1 - 0
README.md

@@ -81,6 +81,7 @@
 
 | 社区精选                                                                                                                                      | 内容总结                  |
 | --------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------- |
+| [00-共创毕业设计](https://github.com/datawhalechina/hello-agents/blob/main/Co-creation-projects)                                             | 社区共创毕业设计项目      |
 | [01-Agent面试题总结](https://github.com/datawhalechina/hello-agents/blob/main/Extra-Chapter/Extra01-面试问题总结.md)                          | Agent 岗位相关面试问题    |
 | [01-Agent面试题答案](https://github.com/datawhalechina/hello-agents/blob/main/Extra-Chapter/Extra01-参考答案.md)                              | 相关面试问题答案          |
 | [02-上下文工程内容补充](https://github.com/datawhalechina/hello-agents/blob/main/Extra-Chapter/Extra02-上下文工程补充知识.md)                 | 上下文工程内容扩展        |

+ 1 - 0
README_EN.md

@@ -81,6 +81,7 @@ If you wish to read locally or contribute content, please refer to the learning
 
 | Community Selection                                                                                                                                            | Content Summary                            |
 | -------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------ |
+| [00-Co-creation Capstone Projects](https://github.com/datawhalechina/hello-agents/blob/main/Co-creation-projects)                                             | Community co-creation capstone projects    |
 | [01-Agent Interview Questions Summary](https://github.com/datawhalechina/hello-agents/blob/main/Extra-Chapter/Extra01-面试问题总结.md)                         | Agent position-related interview questions |
 | [01-Agent Interview Answers](https://github.com/datawhalechina/hello-agents/blob/main/Extra-Chapter/Extra01-参考答案.md)                                       | Answers to related interview questions     |
 | [02-Context Engineering Content Supplement](https://github.com/datawhalechina/hello-agents/blob/main/Extra-Chapter/Extra02-上下文工程补充知识.md)              | Context engineering content extension      |

+ 1 - 0
docs/README.md

@@ -77,6 +77,7 @@
 
 | 社区精选                                                                                                                                      | 内容总结                  |
 | --------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------- |
+| [00-共创毕业设计](https://github.com/datawhalechina/hello-agents/blob/main/Co-creation-projects)                                             | 社区共创毕业设计项目      |
 | [01-Agent面试题总结](https://github.com/datawhalechina/hello-agents/blob/main/Extra-Chapter/Extra01-面试问题总结.md)                          | Agent 岗位相关面试问题    |
 | [01-Agent面试题答案](https://github.com/datawhalechina/hello-agents/blob/main/Extra-Chapter/Extra01-参考答案.md)                              | 相关面试问题答案          |
 | [02-上下文工程内容补充](https://github.com/datawhalechina/hello-agents/blob/main/Extra-Chapter/Extra02-上下文工程补充知识.md)                 | 上下文工程内容扩展        |

+ 1 - 0
docs/README_EN.md

@@ -73,6 +73,7 @@
 
 | Community Highlights                                                                                                                                           | Content Summary                            |
 | -------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------ |
+| [00-Co-creation Capstone Projects](https://github.com/datawhalechina/hello-agents/blob/main/Co-creation-projects)                                             | Community co-creation capstone projects    |
 | [01-Agent Interview Questions Summary](https://github.com/datawhalechina/hello-agents/blob/main/Extra-Chapter/Extra01-面试问题总结.md)                         | Agent position-related interview questions |
 | [01-Agent Interview Answers](https://github.com/datawhalechina/hello-agents/blob/main/Extra-Chapter/Extra01-参考答案.md)                                       | Answers to related interview questions     |
 | [02-Context Engineering Content Supplement](https://github.com/datawhalechina/hello-agents/blob/main/Extra-Chapter/Extra02-上下文工程补充知识.md)              | Context engineering content extension      |