Five concern checks: Redundancy (embedding/word overlap), Coverage (FCA gap analysis), Coherence (graph connectivity), Consistency (cycle detection), Granularity (Shannon entropy). Orchestrator runs all or selected checks, CLI `markitect infospace check` command added. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
103 lines
3.6 KiB
Python
103 lines
3.6 KiB
Python
"""
|
|
Unified orchestrator for all five collection-level checks.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
from markitect.infospace.models import EntityMeta
|
|
from markitect.prompts.dependencies.models import DependencyGraph
|
|
|
|
from .redundancy import RedundancyReport, check_redundancy
|
|
from .coverage import CoverageReport, check_coverage
|
|
from .coherence import CoherenceReport, check_coherence
|
|
from .consistency import ConsistencyReport, check_consistency
|
|
from .granularity import GranularityReport, check_granularity
|
|
|
|
|
|
@dataclass
|
|
class CheckReport:
|
|
"""Unified report from all five collection-level checks."""
|
|
|
|
redundancy: Optional[RedundancyReport] = None
|
|
coverage: Optional[CoverageReport] = None
|
|
coherence: Optional[CoherenceReport] = None
|
|
consistency: Optional[ConsistencyReport] = None
|
|
granularity: Optional[GranularityReport] = None
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
d: Dict[str, Any] = {}
|
|
if self.redundancy:
|
|
d["redundancy"] = self.redundancy.to_dict()
|
|
if self.coverage:
|
|
d["coverage"] = self.coverage.to_dict()
|
|
if self.coherence:
|
|
d["coherence"] = self.coherence.to_dict()
|
|
if self.consistency:
|
|
d["consistency"] = self.consistency.to_dict()
|
|
if self.granularity:
|
|
d["granularity"] = self.granularity.to_dict()
|
|
return d
|
|
|
|
def metrics(self) -> Dict[str, float]:
|
|
"""Extract key metrics for viability checking."""
|
|
m: Dict[str, float] = {}
|
|
if self.redundancy:
|
|
m["redundancy_ratio"] = self.redundancy.redundancy_ratio
|
|
if self.coverage:
|
|
m["coverage_ratio"] = self.coverage.coverage_ratio
|
|
if self.coherence:
|
|
m["coherence_components"] = float(self.coherence.connected_components)
|
|
m["modularity"] = self.coherence.modularity
|
|
if self.consistency:
|
|
m["consistency_cycles"] = float(self.consistency.cycle_count)
|
|
if self.granularity:
|
|
m["granularity_entropy"] = self.granularity.domain_entropy
|
|
return m
|
|
|
|
|
|
def run_all_checks(
|
|
entities: List[EntityMeta],
|
|
embeddings: Optional[Dict[str, list[float]]] = None,
|
|
graph: Optional[DependencyGraph] = None,
|
|
extra_attributes: Optional[Dict[str, set[str]]] = None,
|
|
checks: Optional[List[str]] = None,
|
|
) -> CheckReport:
|
|
"""Run all (or selected) collection-level checks.
|
|
|
|
Args:
|
|
entities: Entity metadata list.
|
|
embeddings: Pre-computed embedding vectors for C1.
|
|
graph: Entity relationship graph for C3 and C4.
|
|
extra_attributes: Extra FCA attributes for C2.
|
|
checks: List of check names to run. If ``None``, runs all five.
|
|
Valid names: ``redundancy``, ``coverage``, ``coherence``,
|
|
``consistency``, ``granularity``.
|
|
|
|
Returns:
|
|
:class:`CheckReport` with results from each check.
|
|
"""
|
|
run_all = checks is None
|
|
check_set = set(checks) if checks else set()
|
|
|
|
report = CheckReport()
|
|
|
|
if run_all or "redundancy" in check_set:
|
|
report.redundancy = check_redundancy(entities, embeddings=embeddings)
|
|
|
|
if run_all or "coverage" in check_set:
|
|
report.coverage = check_coverage(entities, extra_attributes=extra_attributes)
|
|
|
|
if run_all or "coherence" in check_set:
|
|
report.coherence = check_coherence(graph=graph, entity_count=len(entities))
|
|
|
|
if run_all or "consistency" in check_set:
|
|
report.consistency = check_consistency(entities, graph=graph)
|
|
|
|
if run_all or "granularity" in check_set:
|
|
report.granularity = check_granularity(entities)
|
|
|
|
return report
|