1
0

citation_formatter.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526
  1. """
  2. InnoCore AI 引用格式化工具
  3. """
  4. import re
  5. from typing import Dict, List, Optional, Any
  6. from datetime import datetime
  7. class CitationFormatter:
  8. """引用格式化器"""
  9. def __init__(self):
  10. self.month_names = {
  11. 1: "Jan", 2: "Feb", 3: "Mar", 4: "Apr", 5: "May", 6: "Jun",
  12. 7: "Jul", 8: "Aug", 9: "Sep", 10: "Oct", 11: "Nov", 12: "Dec"
  13. }
  14. def format_bibtex(self, paper_info: Dict[str, Any]) -> str:
  15. """格式化为BibTeX"""
  16. # 生成引用键
  17. citation_key = self._generate_citation_key(paper_info)
  18. # 确定条目类型
  19. entry_type = self._determine_entry_type(paper_info)
  20. # 构建BibTeX条目
  21. bibtex_lines = [f"@{entry_type}{{{citation_key}"]
  22. # 添加作者
  23. authors = paper_info.get("authors", [])
  24. if authors:
  25. formatted_authors = self._format_bibtex_authors(authors)
  26. bibtex_lines.append(f" author = {{{formatted_authors}}}")
  27. # 添加标题
  28. title = paper_info.get("title", "")
  29. if title:
  30. bibtex_lines.append(f" title = {{{title}}}")
  31. # 添加期刊/会议信息
  32. if entry_type == "article":
  33. journal = paper_info.get("journal", "")
  34. if journal:
  35. bibtex_lines.append(f" journal = {{{journal}}}")
  36. volume = paper_info.get("volume", "")
  37. if volume:
  38. bibtex_lines.append(f" volume = {{{volume}}}")
  39. number = paper_info.get("number", "")
  40. if number:
  41. bibtex_lines.append(f" number = {{{number}}}")
  42. pages = paper_info.get("pages", "")
  43. if pages:
  44. bibtex_lines.append(f" pages = {{{pages}}}")
  45. elif entry_type == "inproceedings":
  46. booktitle = paper_info.get("booktitle", "")
  47. if booktitle:
  48. bibtex_lines.append(f" booktitle = {{{booktitle}}}")
  49. pages = paper_info.get("pages", "")
  50. if pages:
  51. bibtex_lines.append(f" pages = {{{pages}}}")
  52. elif entry_type == "book":
  53. publisher = paper_info.get("publisher", "")
  54. if publisher:
  55. bibtex_lines.append(f" publisher = {{{publisher}}}")
  56. # 添加年份
  57. year = paper_info.get("year", "")
  58. if year:
  59. bibtex_lines.append(f" year = {{{year}}}")
  60. # 添加月份
  61. month = paper_info.get("month", "")
  62. if month:
  63. bibtex_lines.append(f" month = {{{month}}}")
  64. # 添加DOI
  65. doi = paper_info.get("doi", "")
  66. if doi:
  67. bibtex_lines.append(f" doi = {{{doi}}}")
  68. # 添加URL
  69. url = paper_info.get("url", "")
  70. if url:
  71. bibtex_lines.append(f" url = {{{url}}}")
  72. # 添加笔记
  73. note = paper_info.get("note", "")
  74. if note:
  75. bibtex_lines.append(f" note = {{{note}}}")
  76. # 关闭条目
  77. bibtex_lines.append("}")
  78. return "\n".join(bibtex_lines)
  79. def format_apa(self, paper_info: Dict[str, Any]) -> str:
  80. """格式化为APA格式"""
  81. authors = paper_info.get("authors", [])
  82. year = paper_info.get("year", "")
  83. title = paper_info.get("title", "")
  84. # 格式化作者
  85. author_text = self._format_apa_authors(authors)
  86. # 构建基本引用
  87. if year:
  88. citation = f"{author_text} ({year}). {title}."
  89. else:
  90. citation = f"{author_text}. {title}."
  91. # 添加期刊信息
  92. journal = paper_info.get("journal", "")
  93. volume = paper_info.get("volume", "")
  94. number = paper_info.get("number", "")
  95. pages = paper_info.get("pages", "")
  96. if journal:
  97. if volume and number:
  98. citation += f" *{journal}*, *{volume}({number})*"
  99. elif volume:
  100. citation += f" *{journal}*, *{volume}*"
  101. else:
  102. citation += f" *{journal}*"
  103. if pages:
  104. citation += f", {pages}."
  105. else:
  106. citation += "."
  107. # 添加书籍信息
  108. publisher = paper_info.get("publisher", "")
  109. if publisher:
  110. citation += f" {publisher}."
  111. # 添加会议信息
  112. booktitle = paper_info.get("booktitle", "")
  113. if booktitle:
  114. citation += f" In *{booktitle}*"
  115. if pages:
  116. citation += f" (pp. {pages})."
  117. else:
  118. citation += "."
  119. # 添加DOI
  120. doi = paper_info.get("doi", "")
  121. if doi:
  122. citation += f" https://doi.org/{doi}"
  123. return citation
  124. def format_ieee(self, paper_info: Dict[str, Any]) -> str:
  125. """格式化为IEEE格式"""
  126. authors = paper_info.get("authors", [])
  127. year = paper_info.get("year", "")
  128. title = paper_info.get("title", "")
  129. # 格式化作者(IEEE格式)
  130. author_text = self._format_ieee_authors(authors)
  131. # 构建基本引用
  132. citation = f'{author_text}, "{title},"'
  133. # 添加期刊信息
  134. journal = paper_info.get("journal", "")
  135. volume = paper_info.get("volume", "")
  136. number = paper_info.get("number", "")
  137. pages = paper_info.get("pages", "")
  138. if journal:
  139. if volume and number:
  140. citation += f" *{journal}*, vol. {volume}, no. {number}"
  141. elif volume:
  142. citation += f" *{journal}*, vol. {volume}"
  143. else:
  144. citation += f" *{journal}*"
  145. if pages:
  146. citation += f", pp. {pages}"
  147. # 添加会议信息
  148. booktitle = paper_info.get("booktitle", "")
  149. if booktitle:
  150. citation += f" in *{booktitle}*"
  151. if pages:
  152. citation += f", pp. {pages}"
  153. # 添加书籍信息
  154. publisher = paper_info.get("publisher", "")
  155. if publisher:
  156. citation += f" {publisher}"
  157. # 添加年份和月份
  158. month = paper_info.get("month", "")
  159. if year:
  160. if month:
  161. citation += f", {month}. {year}."
  162. else:
  163. citation += f", {year}."
  164. # 添加DOI
  165. doi = paper_info.get("doi", "")
  166. if doi:
  167. citation += f" doi: {doi}"
  168. return citation
  169. def format_mla(self, paper_info: Dict[str, Any]) -> str:
  170. """格式化为MLA格式"""
  171. authors = paper_info.get("authors", [])
  172. title = paper_info.get("title", "")
  173. journal = paper_info.get("journal", "")
  174. year = paper_info.get("year", "")
  175. pages = paper_info.get("pages", "")
  176. # 格式化作者(MLA格式)
  177. author_text = self._format_mla_authors(authors)
  178. # 构建基本引用
  179. if author_text:
  180. citation = f'{author_text}. "{title}."'
  181. else:
  182. citation = f'"{title}."'
  183. # 添加期刊信息
  184. if journal:
  185. citation += f" *{journal}*"
  186. if volume and number:
  187. citation += f", vol. {volume}, no. {number}"
  188. elif volume:
  189. citation += f", vol. {volume}"
  190. if year:
  191. citation += f", {year}"
  192. if pages:
  193. citation += f", pp. {pages}."
  194. else:
  195. citation += "."
  196. # 添加书籍信息
  197. publisher = paper_info.get("publisher", "")
  198. if publisher:
  199. citation += f" {publisher}"
  200. if year:
  201. citation += f", {year}."
  202. else:
  203. citation += "."
  204. return citation
  205. def format_chicago(self, paper_info: Dict[str, Any]) -> str:
  206. """格式化为Chicago格式"""
  207. authors = paper_info.get("authors", [])
  208. title = paper_info.get("title", "")
  209. journal = paper_info.get("journal", "")
  210. volume = paper_info.get("volume", "")
  211. number = paper_info.get("number", "")
  212. year = paper_info.get("year", "")
  213. pages = paper_info.get("pages", "")
  214. # 格式化作者(Chicago格式)
  215. author_text = self._format_chicago_authors(authors)
  216. # 构建基本引用
  217. if author_text:
  218. citation = f'{author_text}. "{title}."'
  219. else:
  220. citation = f'"{title}."'
  221. # 添加期刊信息
  222. if journal:
  223. citation += f" *{journal}*"
  224. if volume and number:
  225. citation += f" {volume}, no. {number}"
  226. elif volume:
  227. citation += f" {volume}"
  228. if year:
  229. citation += f" ({year})"
  230. if pages:
  231. citation += f": {pages}."
  232. else:
  233. citation += "."
  234. return citation
  235. def _generate_citation_key(self, paper_info: Dict[str, Any]) -> str:
  236. """生成引用键"""
  237. # 获取第一作者的姓氏
  238. authors = paper_info.get("authors", [])
  239. if authors:
  240. first_author = authors[0]
  241. if isinstance(first_author, str):
  242. last_name = first_author.split()[-1].lower()
  243. else:
  244. last_name = "unknown"
  245. else:
  246. last_name = "unknown"
  247. # 获取年份
  248. year = str(paper_info.get("year", datetime.now().year))
  249. # 获取标题关键词
  250. title = paper_info.get("title", "")
  251. title_words = re.findall(r'\b[a-zA-Z]{3,}\b', title.lower())[:3]
  252. title_key = "".join(title_words)
  253. return f"{last_name}{year}{title_key}"
  254. def _determine_entry_type(self, paper_info: Dict[str, Any]) -> str:
  255. """确定BibTeX条目类型"""
  256. if paper_info.get("journal"):
  257. return "article"
  258. elif paper_info.get("booktitle"):
  259. return "inproceedings"
  260. elif paper_info.get("publisher"):
  261. return "book"
  262. else:
  263. return "misc"
  264. def _format_bibtex_authors(self, authors: List[str]) -> str:
  265. """格式化BibTeX作者"""
  266. formatted_authors = []
  267. for author in authors:
  268. if isinstance(author, str):
  269. # 将 "First Last" 转换为 "Last, First"
  270. parts = author.split()
  271. if len(parts) >= 2:
  272. last_name = parts[-1]
  273. first_names = " ".join(parts[:-1])
  274. formatted_authors.append(f"{last_name}, {first_names}")
  275. else:
  276. formatted_authors.append(author)
  277. else:
  278. formatted_authors.append(str(author))
  279. return " and ".join(formatted_authors)
  280. def _format_apa_authors(self, authors: List[str]) -> str:
  281. """格式化APA作者"""
  282. if not authors:
  283. return ""
  284. if len(authors) == 1:
  285. return authors[0]
  286. elif len(authors) == 2:
  287. return f"{authors[0]} & {authors[1]}"
  288. elif len(authors) <= 20:
  289. return ", ".join(authors[:-1]) + f", & {authors[-1]}"
  290. else:
  291. return ", ".join(authors[:19]) + f", ... {authors[-1]}"
  292. def _format_ieee_authors(self, authors: List[str]) -> str:
  293. """格式化IEEE作者"""
  294. formatted_authors = []
  295. for i, author in enumerate(authors[:3]): # IEEE通常只列出前3个作者
  296. if isinstance(author, str):
  297. parts = author.split()
  298. if len(parts) >= 2:
  299. # 转换为 "F. Last" 格式
  300. initials = " ".join([f"{p[0]}." for p in parts[:-1]])
  301. last_name = parts[-1]
  302. formatted_authors.append(f"{initials} {last_name}")
  303. else:
  304. formatted_authors.append(author)
  305. else:
  306. formatted_authors.append(str(author))
  307. if len(authors) > 3:
  308. formatted_authors.append("et al.")
  309. return ", ".join(formatted_authors)
  310. def _format_mla_authors(self, authors: List[str]) -> str:
  311. """格式化MLA作者"""
  312. if not authors:
  313. return ""
  314. if len(authors) == 1:
  315. return authors[0]
  316. elif len(authors) == 2:
  317. return f"{authors[0]} and {authors[1]}"
  318. else:
  319. return f"{authors[0]}, et al."
  320. def _format_chicago_authors(self, authors: List[str]) -> str:
  321. """格式化Chicago作者"""
  322. if not authors:
  323. return ""
  324. if len(authors) == 1:
  325. return authors[0]
  326. elif len(authors) == 2:
  327. return f"{authors[0]} and {authors[1]}"
  328. else:
  329. return f"{authors[0]}, et al."
  330. def parse_bibtex(self, bibtex_text: str) -> Dict[str, Any]:
  331. """解析BibTeX文本"""
  332. paper_info = {}
  333. # 提取条目类型和键
  334. entry_match = re.match(r'@(\w+)\{([^,]+),', bibtex_text)
  335. if entry_match:
  336. paper_info["entry_type"] = entry_match.group(1)
  337. paper_info["citation_key"] = entry_match.group(2)
  338. # 提取字段
  339. field_pattern = r'\s*(\w+)\s*=\s*\{([^}]*)\}'
  340. matches = re.findall(field_pattern, bibtex_text)
  341. for field_name, field_value in matches:
  342. paper_info[field_name] = field_value
  343. return paper_info
  344. def validate_citation(self, citation: str, style: str) -> Dict[str, Any]:
  345. """验证引用格式"""
  346. validation_result = {
  347. "is_valid": True,
  348. "errors": [],
  349. "warnings": [],
  350. "suggestions": []
  351. }
  352. if style.lower() == "bibtex":
  353. validation_result = self._validate_bibtex(citation, validation_result)
  354. elif style.lower() == "apa":
  355. validation_result = self._validate_apa(citation, validation_result)
  356. elif style.lower() == "ieee":
  357. validation_result = self._validate_ieee(citation, validation_result)
  358. return validation_result
  359. def _validate_bibtex(self, citation: str, result: Dict[str, Any]) -> Dict[str, Any]:
  360. """验证BibTeX格式"""
  361. # 检查基本结构
  362. if not citation.startswith('@'):
  363. result["is_valid"] = False
  364. result["errors"].append("BibTeX必须以@开头")
  365. if not citation.endswith('}'):
  366. result["is_valid"] = False
  367. result["errors"].append("BibTeX必须以}结尾")
  368. # 检查必需字段
  369. if 'title' not in citation:
  370. result["warnings"].append("缺少title字段")
  371. if 'author' not in citation:
  372. result["warnings"].append("缺少author字段")
  373. if 'year' not in citation:
  374. result["warnings"].append("缺少year字段")
  375. return result
  376. def _validate_apa(self, citation: str, result: Dict[str, Any]) -> Dict[str, Any]:
  377. """验证APA格式"""
  378. # 检查作者格式
  379. if '(' in citation and ')' in citation:
  380. year_pattern = r'\((\d{4})\)'
  381. if not re.search(year_pattern, citation):
  382. result["warnings"].append("APA格式应包含出版年份")
  383. # 检查标题格式
  384. if not citation.strip().endswith('.'):
  385. result["warnings"].append("APA引用应以句号结尾")
  386. return result
  387. def _validate_ieee(self, citation: str, result: Dict[str, Any]) -> Dict[str, Any]:
  388. """验证IEEE格式"""
  389. # 检查引用格式
  390. if '"' not in citation:
  391. result["warnings"].append("IEEE格式中标题应使用双引号")
  392. # 检查期刊格式
  393. if '*' not in citation:
  394. result["warnings"].append("IEEE格式中期刊名应使用斜体(*)")
  395. return result
  396. def convert_between_formats(self, citation: str, from_style: str, to_style: str) -> str:
  397. """在不同格式间转换引用"""
  398. try:
  399. # 解析原始格式
  400. if from_style.lower() == "bibtex":
  401. paper_info = self.parse_bibtex(citation)
  402. else:
  403. # 对于其他格式,需要更复杂的解析逻辑
  404. # 这里提供简化实现
  405. paper_info = {
  406. "title": "",
  407. "authors": [],
  408. "year": "",
  409. "journal": ""
  410. }
  411. # 转换为目标格式
  412. if to_style.lower() == "bibtex":
  413. return self.format_bibtex(paper_info)
  414. elif to_style.lower() == "apa":
  415. return self.format_apa(paper_info)
  416. elif to_style.lower() == "ieee":
  417. return self.format_ieee(paper_info)
  418. elif to_style.lower() == "mla":
  419. return self.format_mla(paper_info)
  420. elif to_style.lower() == "chicago":
  421. return self.format_chicago(paper_info)
  422. else:
  423. return citation
  424. except Exception as e:
  425. return f"转换失败: {str(e)}"