| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526 |
- """
- InnoCore AI 引用格式化工具
- """
- import re
- from typing import Dict, List, Optional, Any
- from datetime import datetime
- class CitationFormatter:
- """引用格式化器"""
-
- def __init__(self):
- self.month_names = {
- 1: "Jan", 2: "Feb", 3: "Mar", 4: "Apr", 5: "May", 6: "Jun",
- 7: "Jul", 8: "Aug", 9: "Sep", 10: "Oct", 11: "Nov", 12: "Dec"
- }
-
- def format_bibtex(self, paper_info: Dict[str, Any]) -> str:
- """格式化为BibTeX"""
- # 生成引用键
- citation_key = self._generate_citation_key(paper_info)
-
- # 确定条目类型
- entry_type = self._determine_entry_type(paper_info)
-
- # 构建BibTeX条目
- bibtex_lines = [f"@{entry_type}{{{citation_key}"]
-
- # 添加作者
- authors = paper_info.get("authors", [])
- if authors:
- formatted_authors = self._format_bibtex_authors(authors)
- bibtex_lines.append(f" author = {{{formatted_authors}}}")
-
- # 添加标题
- title = paper_info.get("title", "")
- if title:
- bibtex_lines.append(f" title = {{{title}}}")
-
- # 添加期刊/会议信息
- if entry_type == "article":
- journal = paper_info.get("journal", "")
- if journal:
- bibtex_lines.append(f" journal = {{{journal}}}")
-
- volume = paper_info.get("volume", "")
- if volume:
- bibtex_lines.append(f" volume = {{{volume}}}")
-
- number = paper_info.get("number", "")
- if number:
- bibtex_lines.append(f" number = {{{number}}}")
-
- pages = paper_info.get("pages", "")
- if pages:
- bibtex_lines.append(f" pages = {{{pages}}}")
-
- elif entry_type == "inproceedings":
- booktitle = paper_info.get("booktitle", "")
- if booktitle:
- bibtex_lines.append(f" booktitle = {{{booktitle}}}")
-
- pages = paper_info.get("pages", "")
- if pages:
- bibtex_lines.append(f" pages = {{{pages}}}")
-
- elif entry_type == "book":
- publisher = paper_info.get("publisher", "")
- if publisher:
- bibtex_lines.append(f" publisher = {{{publisher}}}")
-
- # 添加年份
- year = paper_info.get("year", "")
- if year:
- bibtex_lines.append(f" year = {{{year}}}")
-
- # 添加月份
- month = paper_info.get("month", "")
- if month:
- bibtex_lines.append(f" month = {{{month}}}")
-
- # 添加DOI
- doi = paper_info.get("doi", "")
- if doi:
- bibtex_lines.append(f" doi = {{{doi}}}")
-
- # 添加URL
- url = paper_info.get("url", "")
- if url:
- bibtex_lines.append(f" url = {{{url}}}")
-
- # 添加笔记
- note = paper_info.get("note", "")
- if note:
- bibtex_lines.append(f" note = {{{note}}}")
-
- # 关闭条目
- bibtex_lines.append("}")
-
- return "\n".join(bibtex_lines)
-
- def format_apa(self, paper_info: Dict[str, Any]) -> str:
- """格式化为APA格式"""
- authors = paper_info.get("authors", [])
- year = paper_info.get("year", "")
- title = paper_info.get("title", "")
-
- # 格式化作者
- author_text = self._format_apa_authors(authors)
-
- # 构建基本引用
- if year:
- citation = f"{author_text} ({year}). {title}."
- else:
- citation = f"{author_text}. {title}."
-
- # 添加期刊信息
- journal = paper_info.get("journal", "")
- volume = paper_info.get("volume", "")
- number = paper_info.get("number", "")
- pages = paper_info.get("pages", "")
-
- if journal:
- if volume and number:
- citation += f" *{journal}*, *{volume}({number})*"
- elif volume:
- citation += f" *{journal}*, *{volume}*"
- else:
- citation += f" *{journal}*"
-
- if pages:
- citation += f", {pages}."
- else:
- citation += "."
-
- # 添加书籍信息
- publisher = paper_info.get("publisher", "")
- if publisher:
- citation += f" {publisher}."
-
- # 添加会议信息
- booktitle = paper_info.get("booktitle", "")
- if booktitle:
- citation += f" In *{booktitle}*"
- if pages:
- citation += f" (pp. {pages})."
- else:
- citation += "."
-
- # 添加DOI
- doi = paper_info.get("doi", "")
- if doi:
- citation += f" https://doi.org/{doi}"
-
- return citation
-
- def format_ieee(self, paper_info: Dict[str, Any]) -> str:
- """格式化为IEEE格式"""
- authors = paper_info.get("authors", [])
- year = paper_info.get("year", "")
- title = paper_info.get("title", "")
-
- # 格式化作者(IEEE格式)
- author_text = self._format_ieee_authors(authors)
-
- # 构建基本引用
- citation = f'{author_text}, "{title},"'
-
- # 添加期刊信息
- journal = paper_info.get("journal", "")
- volume = paper_info.get("volume", "")
- number = paper_info.get("number", "")
- pages = paper_info.get("pages", "")
-
- if journal:
- if volume and number:
- citation += f" *{journal}*, vol. {volume}, no. {number}"
- elif volume:
- citation += f" *{journal}*, vol. {volume}"
- else:
- citation += f" *{journal}*"
-
- if pages:
- citation += f", pp. {pages}"
-
- # 添加会议信息
- booktitle = paper_info.get("booktitle", "")
- if booktitle:
- citation += f" in *{booktitle}*"
- if pages:
- citation += f", pp. {pages}"
-
- # 添加书籍信息
- publisher = paper_info.get("publisher", "")
- if publisher:
- citation += f" {publisher}"
-
- # 添加年份和月份
- month = paper_info.get("month", "")
- if year:
- if month:
- citation += f", {month}. {year}."
- else:
- citation += f", {year}."
-
- # 添加DOI
- doi = paper_info.get("doi", "")
- if doi:
- citation += f" doi: {doi}"
-
- return citation
-
- def format_mla(self, paper_info: Dict[str, Any]) -> str:
- """格式化为MLA格式"""
- authors = paper_info.get("authors", [])
- title = paper_info.get("title", "")
- journal = paper_info.get("journal", "")
- year = paper_info.get("year", "")
- pages = paper_info.get("pages", "")
-
- # 格式化作者(MLA格式)
- author_text = self._format_mla_authors(authors)
-
- # 构建基本引用
- if author_text:
- citation = f'{author_text}. "{title}."'
- else:
- citation = f'"{title}."'
-
- # 添加期刊信息
- if journal:
- citation += f" *{journal}*"
-
- if volume and number:
- citation += f", vol. {volume}, no. {number}"
- elif volume:
- citation += f", vol. {volume}"
-
- if year:
- citation += f", {year}"
-
- if pages:
- citation += f", pp. {pages}."
- else:
- citation += "."
-
- # 添加书籍信息
- publisher = paper_info.get("publisher", "")
- if publisher:
- citation += f" {publisher}"
- if year:
- citation += f", {year}."
- else:
- citation += "."
-
- return citation
-
- def format_chicago(self, paper_info: Dict[str, Any]) -> str:
- """格式化为Chicago格式"""
- authors = paper_info.get("authors", [])
- title = paper_info.get("title", "")
- journal = paper_info.get("journal", "")
- volume = paper_info.get("volume", "")
- number = paper_info.get("number", "")
- year = paper_info.get("year", "")
- pages = paper_info.get("pages", "")
-
- # 格式化作者(Chicago格式)
- author_text = self._format_chicago_authors(authors)
-
- # 构建基本引用
- if author_text:
- citation = f'{author_text}. "{title}."'
- else:
- citation = f'"{title}."'
-
- # 添加期刊信息
- if journal:
- citation += f" *{journal}*"
-
- if volume and number:
- citation += f" {volume}, no. {number}"
- elif volume:
- citation += f" {volume}"
-
- if year:
- citation += f" ({year})"
-
- if pages:
- citation += f": {pages}."
- else:
- citation += "."
-
- return citation
-
- def _generate_citation_key(self, paper_info: Dict[str, Any]) -> str:
- """生成引用键"""
- # 获取第一作者的姓氏
- authors = paper_info.get("authors", [])
- if authors:
- first_author = authors[0]
- if isinstance(first_author, str):
- last_name = first_author.split()[-1].lower()
- else:
- last_name = "unknown"
- else:
- last_name = "unknown"
-
- # 获取年份
- year = str(paper_info.get("year", datetime.now().year))
-
- # 获取标题关键词
- title = paper_info.get("title", "")
- title_words = re.findall(r'\b[a-zA-Z]{3,}\b', title.lower())[:3]
- title_key = "".join(title_words)
-
- return f"{last_name}{year}{title_key}"
-
- def _determine_entry_type(self, paper_info: Dict[str, Any]) -> str:
- """确定BibTeX条目类型"""
- if paper_info.get("journal"):
- return "article"
- elif paper_info.get("booktitle"):
- return "inproceedings"
- elif paper_info.get("publisher"):
- return "book"
- else:
- return "misc"
-
- def _format_bibtex_authors(self, authors: List[str]) -> str:
- """格式化BibTeX作者"""
- formatted_authors = []
-
- for author in authors:
- if isinstance(author, str):
- # 将 "First Last" 转换为 "Last, First"
- parts = author.split()
- if len(parts) >= 2:
- last_name = parts[-1]
- first_names = " ".join(parts[:-1])
- formatted_authors.append(f"{last_name}, {first_names}")
- else:
- formatted_authors.append(author)
- else:
- formatted_authors.append(str(author))
-
- return " and ".join(formatted_authors)
-
- def _format_apa_authors(self, authors: List[str]) -> str:
- """格式化APA作者"""
- if not authors:
- return ""
-
- if len(authors) == 1:
- return authors[0]
- elif len(authors) == 2:
- return f"{authors[0]} & {authors[1]}"
- elif len(authors) <= 20:
- return ", ".join(authors[:-1]) + f", & {authors[-1]}"
- else:
- return ", ".join(authors[:19]) + f", ... {authors[-1]}"
-
- def _format_ieee_authors(self, authors: List[str]) -> str:
- """格式化IEEE作者"""
- formatted_authors = []
-
- for i, author in enumerate(authors[:3]): # IEEE通常只列出前3个作者
- if isinstance(author, str):
- parts = author.split()
- if len(parts) >= 2:
- # 转换为 "F. Last" 格式
- initials = " ".join([f"{p[0]}." for p in parts[:-1]])
- last_name = parts[-1]
- formatted_authors.append(f"{initials} {last_name}")
- else:
- formatted_authors.append(author)
- else:
- formatted_authors.append(str(author))
-
- if len(authors) > 3:
- formatted_authors.append("et al.")
-
- return ", ".join(formatted_authors)
-
- def _format_mla_authors(self, authors: List[str]) -> str:
- """格式化MLA作者"""
- if not authors:
- return ""
-
- if len(authors) == 1:
- return authors[0]
- elif len(authors) == 2:
- return f"{authors[0]} and {authors[1]}"
- else:
- return f"{authors[0]}, et al."
-
- def _format_chicago_authors(self, authors: List[str]) -> str:
- """格式化Chicago作者"""
- if not authors:
- return ""
-
- if len(authors) == 1:
- return authors[0]
- elif len(authors) == 2:
- return f"{authors[0]} and {authors[1]}"
- else:
- return f"{authors[0]}, et al."
-
- def parse_bibtex(self, bibtex_text: str) -> Dict[str, Any]:
- """解析BibTeX文本"""
- paper_info = {}
-
- # 提取条目类型和键
- entry_match = re.match(r'@(\w+)\{([^,]+),', bibtex_text)
- if entry_match:
- paper_info["entry_type"] = entry_match.group(1)
- paper_info["citation_key"] = entry_match.group(2)
-
- # 提取字段
- field_pattern = r'\s*(\w+)\s*=\s*\{([^}]*)\}'
- matches = re.findall(field_pattern, bibtex_text)
-
- for field_name, field_value in matches:
- paper_info[field_name] = field_value
-
- return paper_info
-
- def validate_citation(self, citation: str, style: str) -> Dict[str, Any]:
- """验证引用格式"""
- validation_result = {
- "is_valid": True,
- "errors": [],
- "warnings": [],
- "suggestions": []
- }
-
- if style.lower() == "bibtex":
- validation_result = self._validate_bibtex(citation, validation_result)
- elif style.lower() == "apa":
- validation_result = self._validate_apa(citation, validation_result)
- elif style.lower() == "ieee":
- validation_result = self._validate_ieee(citation, validation_result)
-
- return validation_result
-
- def _validate_bibtex(self, citation: str, result: Dict[str, Any]) -> Dict[str, Any]:
- """验证BibTeX格式"""
- # 检查基本结构
- if not citation.startswith('@'):
- result["is_valid"] = False
- result["errors"].append("BibTeX必须以@开头")
-
- if not citation.endswith('}'):
- result["is_valid"] = False
- result["errors"].append("BibTeX必须以}结尾")
-
- # 检查必需字段
- if 'title' not in citation:
- result["warnings"].append("缺少title字段")
-
- if 'author' not in citation:
- result["warnings"].append("缺少author字段")
-
- if 'year' not in citation:
- result["warnings"].append("缺少year字段")
-
- return result
-
- def _validate_apa(self, citation: str, result: Dict[str, Any]) -> Dict[str, Any]:
- """验证APA格式"""
- # 检查作者格式
- if '(' in citation and ')' in citation:
- year_pattern = r'\((\d{4})\)'
- if not re.search(year_pattern, citation):
- result["warnings"].append("APA格式应包含出版年份")
-
- # 检查标题格式
- if not citation.strip().endswith('.'):
- result["warnings"].append("APA引用应以句号结尾")
-
- return result
-
- def _validate_ieee(self, citation: str, result: Dict[str, Any]) -> Dict[str, Any]:
- """验证IEEE格式"""
- # 检查引用格式
- if '"' not in citation:
- result["warnings"].append("IEEE格式中标题应使用双引号")
-
- # 检查期刊格式
- if '*' not in citation:
- result["warnings"].append("IEEE格式中期刊名应使用斜体(*)")
-
- return result
-
- def convert_between_formats(self, citation: str, from_style: str, to_style: str) -> str:
- """在不同格式间转换引用"""
- try:
- # 解析原始格式
- if from_style.lower() == "bibtex":
- paper_info = self.parse_bibtex(citation)
- else:
- # 对于其他格式,需要更复杂的解析逻辑
- # 这里提供简化实现
- paper_info = {
- "title": "",
- "authors": [],
- "year": "",
- "journal": ""
- }
-
- # 转换为目标格式
- if to_style.lower() == "bibtex":
- return self.format_bibtex(paper_info)
- elif to_style.lower() == "apa":
- return self.format_apa(paper_info)
- elif to_style.lower() == "ieee":
- return self.format_ieee(paper_info)
- elif to_style.lower() == "mla":
- return self.format_mla(paper_info)
- elif to_style.lower() == "chicago":
- return self.format_chicago(paper_info)
- else:
- return citation
-
- except Exception as e:
- return f"转换失败: {str(e)}"
|