""" C2 — Coverage completeness. Uses FCA and cross-tabulation to detect structural coverage gaps: attribute combinations (domain × VSM system) with no entities. """ from __future__ import annotations from dataclasses import dataclass, field from typing import Any, Dict, List, Optional from markitect.infospace.models import EntityMeta from markitect.analysis.fca import FormalContext, find_empty_cells, find_gap_concepts @dataclass class CoverageReport: """Results from coverage analysis.""" coverage_ratio: float = 0.0 empty_cells: List[dict] = field(default_factory=list) gap_concepts: List[dict] = field(default_factory=list) domain_counts: Dict[str, int] = field(default_factory=dict) entity_count: int = 0 def to_dict(self) -> dict: return { "concern": "C2", "coverage_ratio": round(self.coverage_ratio, 4), "empty_cells": self.empty_cells, "gap_concepts_count": len(self.gap_concepts), "domain_counts": self.domain_counts, "entity_count": self.entity_count, } def _extract_attributes(entity: EntityMeta) -> set[str]: """Extract FCA attributes from an entity.""" attrs: set[str] = set() if entity.domain: attrs.add(f"domain:{entity.domain}") if entity.source_chapter: attrs.add(f"chapter:{entity.source_chapter}") return attrs def check_coverage( entities: List[EntityMeta], extra_attributes: Optional[Dict[str, set[str]]] = None, ) -> CoverageReport: """Check coverage completeness using FCA gap analysis. Args: entities: Entity metadata list. extra_attributes: Optional ``{slug: {attr, ...}}`` to merge with auto-extracted attributes (e.g. VSM mappings). Returns: :class:`CoverageReport` with gaps and coverage ratio. """ n = len(entities) if n == 0: return CoverageReport() # Build entity → attributes mapping entity_attrs: Dict[str, set[str]] = {} for e in entities: attrs = _extract_attributes(e) if extra_attributes and e.slug in extra_attributes: attrs.update(extra_attributes[e.slug]) entity_attrs[e.slug] = attrs # Domain counts domain_counts: Dict[str, int] = {} for e in entities: d = e.domain or "(unspecified)" domain_counts[d] = domain_counts.get(d, 0) + 1 # Build FCA context context = FormalContext.from_dict(entity_attrs) # Cross-tabulation: domain × chapter domains = sorted({a for attrs in entity_attrs.values() for a in attrs if a.startswith("domain:")}) chapters = sorted({a for attrs in entity_attrs.values() for a in attrs if a.startswith("chapter:")}) empty = [] if domains and chapters: raw_empty = find_empty_cells(context, domains, chapters) empty = [{"dimension_a": a, "dimension_b": b} for a, b in raw_empty] # FCA gap concepts gaps = find_gap_concepts(context) gap_dicts = [ {"intent": sorted(g.intent), "extent_size": g.extent_size} for g in gaps if g.intent_size <= 4 # Only report manageable gaps ] # Coverage ratio: populated cells / total possible cells total_cells = len(domains) * len(chapters) if domains and chapters else 1 populated = total_cells - len(empty) ratio = populated / total_cells if total_cells > 0 else 0.0 return CoverageReport( coverage_ratio=ratio, empty_cells=empty, gap_concepts=gap_dicts, domain_counts=domain_counts, entity_count=n, )