problem_repository.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. import os
  2. import re
  3. from typing import List, Dict
  4. class ProblemRepository:
  5. def __init__(self, root_dir="E:\PycharmProject_lmx\HelloAgents-main\output"):
  6. self.root_dir = root_dir
  7. self.problems = self._load_all_problems()
  8. def _load_all_problems(self) -> List[Dict]:
  9. problems = []
  10. for dirname in os.listdir(self.root_dir):
  11. readme_path = os.path.join(self.root_dir, dirname, "README.md")
  12. if not os.path.exists(readme_path):
  13. continue
  14. with open(readme_path, "r", encoding="utf-8") as f:
  15. content = f.read()
  16. problem = self._parse_problem(content)
  17. if problem:
  18. problem["slug"] = dirname
  19. problem["path"] = readme_path
  20. problem["content"] = content
  21. problems.append(problem)
  22. return problems
  23. def _parse_problem(self, text: str) -> Dict | None:
  24. title = self._extract(r"# \[(.*?)\]", text)
  25. if not title:
  26. return None
  27. description = self._extract_block(
  28. text,
  29. start="## Description",
  30. end="\\*\\*Example"
  31. )
  32. examples = self._parse_examples(text)
  33. constraints = self._extract_block(
  34. text,
  35. start="\\*\\*Constraints:\\*\\*",
  36. end="\\*\\*Follow-up"
  37. )
  38. tags = self._extract(r"\*\*Tags:\*\*(.*)", text)
  39. difficulty = self._extract(r"\*\*Difficulty:\*\*(.*)", text)
  40. return {
  41. "title": title.strip(),
  42. "description": description.strip() if description else "",
  43. "examples": examples,
  44. "constraints": constraints.strip() if constraints else "",
  45. "tags": [t.strip() for t in tags.split(",")] if tags else [],
  46. "difficulty": difficulty.strip() if difficulty else "Unknown",
  47. }
  48. def _extract_block(self, text: str, start: str, end: str) -> str | None:
  49. pattern = rf"{start}(.*?){end}"
  50. match = re.search(pattern, text, re.S)
  51. return match.group(1) if match else None
  52. def _parse_examples(self, text: str) -> List[Dict]:
  53. examples = []
  54. pattern = re.compile(
  55. r"\*\*Example\s*\d+:\*\*(.*?)(?=\*\*Example|\*\*Constraints|\Z)",
  56. re.S
  57. )
  58. for block in pattern.findall(text):
  59. input_ = self._extract(
  60. r"Input:\s*(.*?)(?=\s*Output:|\s*Explanation:|\Z)",
  61. block
  62. )
  63. output = self._extract(
  64. r"Output:\s*(.*?)(?=\s*Explanation:|\Z)",
  65. block
  66. )
  67. explanation = self._extract(
  68. r"Explanation:\s*(.*)",
  69. block
  70. )
  71. examples.append({
  72. "input": input_.strip() if input_ else "",
  73. "output": output.strip() if output else "",
  74. "explanation": explanation.strip() if explanation else ""
  75. })
  76. return examples
  77. def _extract(self, pattern: str, text: str) -> str | None:
  78. match = re.search(pattern, text)
  79. return match.group(1) if match else None
  80. def filter(self, *, tags=None, difficulty=None) -> List[Dict]:
  81. results = self.problems
  82. if tags:
  83. results = [
  84. p for p in results
  85. if any(tag in p["tags"] for tag in tags)
  86. ]
  87. if difficulty:
  88. results = [
  89. p for p in results
  90. if p["difficulty"].lower() == difficulty.lower()
  91. ]
  92. return results