from llama_index.core import Document from typing import Dict, Any, Optional, List from pydantic import Field import json class CustomDocument(Document): """自定义文档类,扩展了额外的元数据和功能""" # 添加新的字段 document_type: str = Field(default="general", description="文档类型") priority: int = Field(default=1, description="处理优先级") custom_tags: List[str] = Field(default_factory=list, description="自定义标签") def __init__(self, **data: Any): # 调用父类初始化,处理向后兼容性 super().__init__(**data) def add_custom_tag(self, tag: str) -> None: """添加自定义标签""" self.custom_tags.append(tag) def get_document_info(self) -> Dict[str, Any]: """获取文档完整信息""" return { "id": self.id_, "type": self.document_type, "priority": self.priority, "tags": self.custom_tags, "metadata": self.metadata, "text_length": len(self.text) if self.text else 0 } # 创建自定义文档实例 custom_doc = CustomDocument( text="这是文档内容...", metadata={"source": "内部文档", "author": "张三"}, document_type="report", priority=5, custom_tags=["重要", "待审核"] ) # 使用自定义方法 custom_doc.add_custom_tag("技术文档") info = custom_doc.get_document_info() print(info)