""" Data models for infospace entity metadata. """ from dataclasses import dataclass, field, asdict from typing import Any, Dict, List @dataclass class EntityMeta: """Structured metadata extracted from a single entity markdown file. The parser populates every field it can find; missing optional sections are left as empty strings (validation is a separate step). """ # Identity slug: str title: str h1_raw: str # verbatim H1 text before any normalisation # Section contents (plain text, empty string if section missing) definition: str = "" source_chapter: str = "" context: str = "" domain: str = "" original_wording: str = "" modern_interpretation: str = "" # Derived flags h1_is_title_case: bool = False has_original_wording: bool = False # Metrics-ready numbers definition_word_count: int = 0 total_word_count: int = 0 # All H2 section slugs found (preserves order) section_slugs: List[str] = field(default_factory=list) # Source file path (as string for serialisation) source_path: str = "" def to_dict(self) -> Dict[str, Any]: """Serialise to a plain dictionary.""" return asdict(self) @classmethod def from_dict(cls, data: Dict[str, Any]) -> "EntityMeta": """Deserialise from a plain dictionary.""" known_fields = {f.name for f in cls.__dataclass_fields__.values()} filtered = {k: v for k, v in data.items() if k in known_fields} return cls(**filtered)