feat(infospace): add collection-level quality checks C1–C5 (S2.4)
Five concern checks: Redundancy (embedding/word overlap), Coverage (FCA gap analysis), Coherence (graph connectivity), Consistency (cycle detection), Granularity (Shannon entropy). Orchestrator runs all or selected checks, CLI `markitect infospace check` command added. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
102
markitect/infospace/checks/orchestrator.py
Normal file
102
markitect/infospace/checks/orchestrator.py
Normal file
@@ -0,0 +1,102 @@
|
||||
"""
|
||||
Unified orchestrator for all five collection-level checks.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from markitect.infospace.models import EntityMeta
|
||||
from markitect.prompts.dependencies.models import DependencyGraph
|
||||
|
||||
from .redundancy import RedundancyReport, check_redundancy
|
||||
from .coverage import CoverageReport, check_coverage
|
||||
from .coherence import CoherenceReport, check_coherence
|
||||
from .consistency import ConsistencyReport, check_consistency
|
||||
from .granularity import GranularityReport, check_granularity
|
||||
|
||||
|
||||
@dataclass
|
||||
class CheckReport:
|
||||
"""Unified report from all five collection-level checks."""
|
||||
|
||||
redundancy: Optional[RedundancyReport] = None
|
||||
coverage: Optional[CoverageReport] = None
|
||||
coherence: Optional[CoherenceReport] = None
|
||||
consistency: Optional[ConsistencyReport] = None
|
||||
granularity: Optional[GranularityReport] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d: Dict[str, Any] = {}
|
||||
if self.redundancy:
|
||||
d["redundancy"] = self.redundancy.to_dict()
|
||||
if self.coverage:
|
||||
d["coverage"] = self.coverage.to_dict()
|
||||
if self.coherence:
|
||||
d["coherence"] = self.coherence.to_dict()
|
||||
if self.consistency:
|
||||
d["consistency"] = self.consistency.to_dict()
|
||||
if self.granularity:
|
||||
d["granularity"] = self.granularity.to_dict()
|
||||
return d
|
||||
|
||||
def metrics(self) -> Dict[str, float]:
|
||||
"""Extract key metrics for viability checking."""
|
||||
m: Dict[str, float] = {}
|
||||
if self.redundancy:
|
||||
m["redundancy_ratio"] = self.redundancy.redundancy_ratio
|
||||
if self.coverage:
|
||||
m["coverage_ratio"] = self.coverage.coverage_ratio
|
||||
if self.coherence:
|
||||
m["coherence_components"] = float(self.coherence.connected_components)
|
||||
m["modularity"] = self.coherence.modularity
|
||||
if self.consistency:
|
||||
m["consistency_cycles"] = float(self.consistency.cycle_count)
|
||||
if self.granularity:
|
||||
m["granularity_entropy"] = self.granularity.domain_entropy
|
||||
return m
|
||||
|
||||
|
||||
def run_all_checks(
|
||||
entities: List[EntityMeta],
|
||||
embeddings: Optional[Dict[str, list[float]]] = None,
|
||||
graph: Optional[DependencyGraph] = None,
|
||||
extra_attributes: Optional[Dict[str, set[str]]] = None,
|
||||
checks: Optional[List[str]] = None,
|
||||
) -> CheckReport:
|
||||
"""Run all (or selected) collection-level checks.
|
||||
|
||||
Args:
|
||||
entities: Entity metadata list.
|
||||
embeddings: Pre-computed embedding vectors for C1.
|
||||
graph: Entity relationship graph for C3 and C4.
|
||||
extra_attributes: Extra FCA attributes for C2.
|
||||
checks: List of check names to run. If ``None``, runs all five.
|
||||
Valid names: ``redundancy``, ``coverage``, ``coherence``,
|
||||
``consistency``, ``granularity``.
|
||||
|
||||
Returns:
|
||||
:class:`CheckReport` with results from each check.
|
||||
"""
|
||||
run_all = checks is None
|
||||
check_set = set(checks) if checks else set()
|
||||
|
||||
report = CheckReport()
|
||||
|
||||
if run_all or "redundancy" in check_set:
|
||||
report.redundancy = check_redundancy(entities, embeddings=embeddings)
|
||||
|
||||
if run_all or "coverage" in check_set:
|
||||
report.coverage = check_coverage(entities, extra_attributes=extra_attributes)
|
||||
|
||||
if run_all or "coherence" in check_set:
|
||||
report.coherence = check_coherence(graph=graph, entity_count=len(entities))
|
||||
|
||||
if run_all or "consistency" in check_set:
|
||||
report.consistency = check_consistency(entities, graph=graph)
|
||||
|
||||
if run_all or "granularity" in check_set:
|
||||
report.granularity = check_granularity(entities)
|
||||
|
||||
return report
|
||||
Reference in New Issue
Block a user