Files
markitect-main/markitect/infospace/checks/orchestrator.py
tegwick 11585e6968 feat(infospace): add collection-level quality checks C1–C5 (S2.4)
Five concern checks: Redundancy (embedding/word overlap), Coverage
(FCA gap analysis), Coherence (graph connectivity), Consistency
(cycle detection), Granularity (Shannon entropy). Orchestrator runs
all or selected checks, CLI `markitect infospace check` command added.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 01:54:22 +01:00

103 lines
3.6 KiB
Python

"""
Unified orchestrator for all five collection-level checks.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
from markitect.infospace.models import EntityMeta
from markitect.prompts.dependencies.models import DependencyGraph
from .redundancy import RedundancyReport, check_redundancy
from .coverage import CoverageReport, check_coverage
from .coherence import CoherenceReport, check_coherence
from .consistency import ConsistencyReport, check_consistency
from .granularity import GranularityReport, check_granularity
@dataclass
class CheckReport:
"""Unified report from all five collection-level checks."""
redundancy: Optional[RedundancyReport] = None
coverage: Optional[CoverageReport] = None
coherence: Optional[CoherenceReport] = None
consistency: Optional[ConsistencyReport] = None
granularity: Optional[GranularityReport] = None
def to_dict(self) -> Dict[str, Any]:
d: Dict[str, Any] = {}
if self.redundancy:
d["redundancy"] = self.redundancy.to_dict()
if self.coverage:
d["coverage"] = self.coverage.to_dict()
if self.coherence:
d["coherence"] = self.coherence.to_dict()
if self.consistency:
d["consistency"] = self.consistency.to_dict()
if self.granularity:
d["granularity"] = self.granularity.to_dict()
return d
def metrics(self) -> Dict[str, float]:
"""Extract key metrics for viability checking."""
m: Dict[str, float] = {}
if self.redundancy:
m["redundancy_ratio"] = self.redundancy.redundancy_ratio
if self.coverage:
m["coverage_ratio"] = self.coverage.coverage_ratio
if self.coherence:
m["coherence_components"] = float(self.coherence.connected_components)
m["modularity"] = self.coherence.modularity
if self.consistency:
m["consistency_cycles"] = float(self.consistency.cycle_count)
if self.granularity:
m["granularity_entropy"] = self.granularity.domain_entropy
return m
def run_all_checks(
entities: List[EntityMeta],
embeddings: Optional[Dict[str, list[float]]] = None,
graph: Optional[DependencyGraph] = None,
extra_attributes: Optional[Dict[str, set[str]]] = None,
checks: Optional[List[str]] = None,
) -> CheckReport:
"""Run all (or selected) collection-level checks.
Args:
entities: Entity metadata list.
embeddings: Pre-computed embedding vectors for C1.
graph: Entity relationship graph for C3 and C4.
extra_attributes: Extra FCA attributes for C2.
checks: List of check names to run. If ``None``, runs all five.
Valid names: ``redundancy``, ``coverage``, ``coherence``,
``consistency``, ``granularity``.
Returns:
:class:`CheckReport` with results from each check.
"""
run_all = checks is None
check_set = set(checks) if checks else set()
report = CheckReport()
if run_all or "redundancy" in check_set:
report.redundancy = check_redundancy(entities, embeddings=embeddings)
if run_all or "coverage" in check_set:
report.coverage = check_coverage(entities, extra_attributes=extra_attributes)
if run_all or "coherence" in check_set:
report.coherence = check_coherence(graph=graph, entity_count=len(entities))
if run_all or "consistency" in check_set:
report.consistency = check_consistency(entities, graph=graph)
if run_all or "granularity" in check_set:
report.granularity = check_granularity(entities)
return report