Five concern checks: Redundancy (embedding/word overlap), Coverage (FCA gap analysis), Coherence (graph connectivity), Consistency (cycle detection), Granularity (Shannon entropy). Orchestrator runs all or selected checks, CLI `markitect infospace check` command added. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
112 lines
3.5 KiB
Python
112 lines
3.5 KiB
Python
"""
|
||
C2 — Coverage completeness.
|
||
|
||
Uses FCA and cross-tabulation to detect structural coverage gaps:
|
||
attribute combinations (domain × VSM system) with no entities.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from dataclasses import dataclass, field
|
||
from typing import Any, Dict, List, Optional
|
||
|
||
from markitect.infospace.models import EntityMeta
|
||
from markitect.analysis.fca import FormalContext, find_empty_cells, find_gap_concepts
|
||
|
||
|
||
@dataclass
|
||
class CoverageReport:
|
||
"""Results from coverage analysis."""
|
||
|
||
coverage_ratio: float = 0.0
|
||
empty_cells: List[dict] = field(default_factory=list)
|
||
gap_concepts: List[dict] = field(default_factory=list)
|
||
domain_counts: Dict[str, int] = field(default_factory=dict)
|
||
entity_count: int = 0
|
||
|
||
def to_dict(self) -> dict:
|
||
return {
|
||
"concern": "C2",
|
||
"coverage_ratio": round(self.coverage_ratio, 4),
|
||
"empty_cells": self.empty_cells,
|
||
"gap_concepts_count": len(self.gap_concepts),
|
||
"domain_counts": self.domain_counts,
|
||
"entity_count": self.entity_count,
|
||
}
|
||
|
||
|
||
def _extract_attributes(entity: EntityMeta) -> set[str]:
|
||
"""Extract FCA attributes from an entity."""
|
||
attrs: set[str] = set()
|
||
if entity.domain:
|
||
attrs.add(f"domain:{entity.domain}")
|
||
if entity.source_chapter:
|
||
attrs.add(f"chapter:{entity.source_chapter}")
|
||
return attrs
|
||
|
||
|
||
def check_coverage(
|
||
entities: List[EntityMeta],
|
||
extra_attributes: Optional[Dict[str, set[str]]] = None,
|
||
) -> CoverageReport:
|
||
"""Check coverage completeness using FCA gap analysis.
|
||
|
||
Args:
|
||
entities: Entity metadata list.
|
||
extra_attributes: Optional ``{slug: {attr, ...}}`` to merge
|
||
with auto-extracted attributes (e.g. VSM mappings).
|
||
|
||
Returns:
|
||
:class:`CoverageReport` with gaps and coverage ratio.
|
||
"""
|
||
n = len(entities)
|
||
if n == 0:
|
||
return CoverageReport()
|
||
|
||
# Build entity → attributes mapping
|
||
entity_attrs: Dict[str, set[str]] = {}
|
||
for e in entities:
|
||
attrs = _extract_attributes(e)
|
||
if extra_attributes and e.slug in extra_attributes:
|
||
attrs.update(extra_attributes[e.slug])
|
||
entity_attrs[e.slug] = attrs
|
||
|
||
# Domain counts
|
||
domain_counts: Dict[str, int] = {}
|
||
for e in entities:
|
||
d = e.domain or "(unspecified)"
|
||
domain_counts[d] = domain_counts.get(d, 0) + 1
|
||
|
||
# Build FCA context
|
||
context = FormalContext.from_dict(entity_attrs)
|
||
|
||
# Cross-tabulation: domain × chapter
|
||
domains = sorted({a for attrs in entity_attrs.values() for a in attrs if a.startswith("domain:")})
|
||
chapters = sorted({a for attrs in entity_attrs.values() for a in attrs if a.startswith("chapter:")})
|
||
|
||
empty = []
|
||
if domains and chapters:
|
||
raw_empty = find_empty_cells(context, domains, chapters)
|
||
empty = [{"dimension_a": a, "dimension_b": b} for a, b in raw_empty]
|
||
|
||
# FCA gap concepts
|
||
gaps = find_gap_concepts(context)
|
||
gap_dicts = [
|
||
{"intent": sorted(g.intent), "extent_size": g.extent_size}
|
||
for g in gaps
|
||
if g.intent_size <= 4 # Only report manageable gaps
|
||
]
|
||
|
||
# Coverage ratio: populated cells / total possible cells
|
||
total_cells = len(domains) * len(chapters) if domains and chapters else 1
|
||
populated = total_cells - len(empty)
|
||
ratio = populated / total_cells if total_cells > 0 else 0.0
|
||
|
||
return CoverageReport(
|
||
coverage_ratio=ratio,
|
||
empty_cells=empty,
|
||
gap_concepts=gap_dicts,
|
||
domain_counts=domain_counts,
|
||
entity_count=n,
|
||
)
|