feat(infospace): add collection-level quality checks C1–C5 (S2.4)
Five concern checks: Redundancy (embedding/word overlap), Coverage (FCA gap analysis), Coherence (graph connectivity), Consistency (cycle detection), Granularity (Shannon entropy). Orchestrator runs all or selected checks, CLI `markitect infospace check` command added. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
23
markitect/infospace/checks/__init__.py
Normal file
23
markitect/infospace/checks/__init__.py
Normal file
@@ -0,0 +1,23 @@
|
||||
"""
|
||||
Collection-level quality checks for infospaces.
|
||||
|
||||
Five concerns: Redundancy (C1), Coverage (C2), Coherence (C3),
|
||||
Consistency (C4), Granularity (C5).
|
||||
"""
|
||||
|
||||
from markitect.infospace.checks.redundancy import check_redundancy
|
||||
from markitect.infospace.checks.coverage import check_coverage
|
||||
from markitect.infospace.checks.coherence import check_coherence
|
||||
from markitect.infospace.checks.consistency import check_consistency
|
||||
from markitect.infospace.checks.granularity import check_granularity
|
||||
from markitect.infospace.checks.orchestrator import run_all_checks, CheckReport
|
||||
|
||||
__all__ = [
|
||||
"check_redundancy",
|
||||
"check_coverage",
|
||||
"check_coherence",
|
||||
"check_consistency",
|
||||
"check_granularity",
|
||||
"run_all_checks",
|
||||
"CheckReport",
|
||||
]
|
||||
81
markitect/infospace/checks/coherence.py
Normal file
81
markitect/infospace/checks/coherence.py
Normal file
@@ -0,0 +1,81 @@
|
||||
"""
|
||||
C3 — Structural coherence.
|
||||
|
||||
Uses graph analysis to check that the entity relationship graph is
|
||||
well-connected and has meaningful community structure.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from markitect.prompts.dependencies.models import DependencyGraph
|
||||
|
||||
|
||||
@dataclass
|
||||
class CoherenceReport:
|
||||
"""Results from coherence analysis."""
|
||||
|
||||
connected_components: int = 0
|
||||
largest_component_size: int = 0
|
||||
modularity: float = 0.0
|
||||
community_count: int = 0
|
||||
cohesion: float = 0.0
|
||||
coupling: float = 0.0
|
||||
entity_count: int = 0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"concern": "C3",
|
||||
"connected_components": self.connected_components,
|
||||
"largest_component_size": self.largest_component_size,
|
||||
"modularity": round(self.modularity, 4),
|
||||
"community_count": self.community_count,
|
||||
"cohesion": round(self.cohesion, 4),
|
||||
"coupling": round(self.coupling, 4),
|
||||
"entity_count": self.entity_count,
|
||||
}
|
||||
|
||||
|
||||
def check_coherence(
|
||||
graph: Optional[DependencyGraph] = None,
|
||||
entity_count: int = 0,
|
||||
) -> CoherenceReport:
|
||||
"""Check structural coherence of the entity relationship graph.
|
||||
|
||||
Args:
|
||||
graph: The entity relationship graph. If ``None``, returns
|
||||
a report with zero values.
|
||||
entity_count: Total number of entities (for context).
|
||||
|
||||
Returns:
|
||||
:class:`CoherenceReport` with connectivity and community metrics.
|
||||
"""
|
||||
if graph is None or len(graph.nodes) == 0:
|
||||
return CoherenceReport(entity_count=entity_count)
|
||||
|
||||
try:
|
||||
from markitect.analysis.graph import (
|
||||
connected_components,
|
||||
modularity_score,
|
||||
detect_communities,
|
||||
cohesion_coupling,
|
||||
)
|
||||
except ImportError:
|
||||
return CoherenceReport(entity_count=entity_count)
|
||||
|
||||
components = connected_components(graph)
|
||||
communities = detect_communities(graph, seed=42)
|
||||
mod = modularity_score(graph, communities=communities)
|
||||
cc = cohesion_coupling(graph, communities=communities)
|
||||
|
||||
return CoherenceReport(
|
||||
connected_components=len(components),
|
||||
largest_component_size=len(components[0]) if components else 0,
|
||||
modularity=mod,
|
||||
community_count=len(communities),
|
||||
cohesion=cc["cohesion"],
|
||||
coupling=cc["coupling"],
|
||||
entity_count=entity_count or len(graph.nodes),
|
||||
)
|
||||
58
markitect/infospace/checks/consistency.py
Normal file
58
markitect/infospace/checks/consistency.py
Normal file
@@ -0,0 +1,58 @@
|
||||
"""
|
||||
C4 — Definitional consistency.
|
||||
|
||||
Checks for cycles in the dependency graph and definitional conflicts
|
||||
between entities.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from markitect.infospace.models import EntityMeta
|
||||
from markitect.prompts.dependencies.models import DependencyGraph
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConsistencyReport:
|
||||
"""Results from consistency analysis."""
|
||||
|
||||
cycles: List[List[str]] = field(default_factory=list)
|
||||
cycle_count: int = 0
|
||||
entity_count: int = 0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"concern": "C4",
|
||||
"cycle_count": self.cycle_count,
|
||||
"cycles": self.cycles,
|
||||
"entity_count": self.entity_count,
|
||||
}
|
||||
|
||||
|
||||
def check_consistency(
|
||||
entities: List[EntityMeta],
|
||||
graph: Optional[DependencyGraph] = None,
|
||||
) -> ConsistencyReport:
|
||||
"""Check definitional consistency.
|
||||
|
||||
Args:
|
||||
entities: Entity metadata list.
|
||||
graph: Optional dependency graph for cycle detection.
|
||||
|
||||
Returns:
|
||||
:class:`ConsistencyReport` with cycles found.
|
||||
"""
|
||||
n = len(entities)
|
||||
cycles: List[List[str]] = []
|
||||
|
||||
if graph is not None and len(graph.nodes) > 0:
|
||||
raw_cycles = graph.detect_cycles()
|
||||
cycles = raw_cycles
|
||||
|
||||
return ConsistencyReport(
|
||||
cycles=cycles,
|
||||
cycle_count=len(cycles),
|
||||
entity_count=n,
|
||||
)
|
||||
111
markitect/infospace/checks/coverage.py
Normal file
111
markitect/infospace/checks/coverage.py
Normal file
@@ -0,0 +1,111 @@
|
||||
"""
|
||||
C2 — Coverage completeness.
|
||||
|
||||
Uses FCA and cross-tabulation to detect structural coverage gaps:
|
||||
attribute combinations (domain × VSM system) with no entities.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from markitect.infospace.models import EntityMeta
|
||||
from markitect.analysis.fca import FormalContext, find_empty_cells, find_gap_concepts
|
||||
|
||||
|
||||
@dataclass
|
||||
class CoverageReport:
|
||||
"""Results from coverage analysis."""
|
||||
|
||||
coverage_ratio: float = 0.0
|
||||
empty_cells: List[dict] = field(default_factory=list)
|
||||
gap_concepts: List[dict] = field(default_factory=list)
|
||||
domain_counts: Dict[str, int] = field(default_factory=dict)
|
||||
entity_count: int = 0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"concern": "C2",
|
||||
"coverage_ratio": round(self.coverage_ratio, 4),
|
||||
"empty_cells": self.empty_cells,
|
||||
"gap_concepts_count": len(self.gap_concepts),
|
||||
"domain_counts": self.domain_counts,
|
||||
"entity_count": self.entity_count,
|
||||
}
|
||||
|
||||
|
||||
def _extract_attributes(entity: EntityMeta) -> set[str]:
|
||||
"""Extract FCA attributes from an entity."""
|
||||
attrs: set[str] = set()
|
||||
if entity.domain:
|
||||
attrs.add(f"domain:{entity.domain}")
|
||||
if entity.source_chapter:
|
||||
attrs.add(f"chapter:{entity.source_chapter}")
|
||||
return attrs
|
||||
|
||||
|
||||
def check_coverage(
|
||||
entities: List[EntityMeta],
|
||||
extra_attributes: Optional[Dict[str, set[str]]] = None,
|
||||
) -> CoverageReport:
|
||||
"""Check coverage completeness using FCA gap analysis.
|
||||
|
||||
Args:
|
||||
entities: Entity metadata list.
|
||||
extra_attributes: Optional ``{slug: {attr, ...}}`` to merge
|
||||
with auto-extracted attributes (e.g. VSM mappings).
|
||||
|
||||
Returns:
|
||||
:class:`CoverageReport` with gaps and coverage ratio.
|
||||
"""
|
||||
n = len(entities)
|
||||
if n == 0:
|
||||
return CoverageReport()
|
||||
|
||||
# Build entity → attributes mapping
|
||||
entity_attrs: Dict[str, set[str]] = {}
|
||||
for e in entities:
|
||||
attrs = _extract_attributes(e)
|
||||
if extra_attributes and e.slug in extra_attributes:
|
||||
attrs.update(extra_attributes[e.slug])
|
||||
entity_attrs[e.slug] = attrs
|
||||
|
||||
# Domain counts
|
||||
domain_counts: Dict[str, int] = {}
|
||||
for e in entities:
|
||||
d = e.domain or "(unspecified)"
|
||||
domain_counts[d] = domain_counts.get(d, 0) + 1
|
||||
|
||||
# Build FCA context
|
||||
context = FormalContext.from_dict(entity_attrs)
|
||||
|
||||
# Cross-tabulation: domain × chapter
|
||||
domains = sorted({a for attrs in entity_attrs.values() for a in attrs if a.startswith("domain:")})
|
||||
chapters = sorted({a for attrs in entity_attrs.values() for a in attrs if a.startswith("chapter:")})
|
||||
|
||||
empty = []
|
||||
if domains and chapters:
|
||||
raw_empty = find_empty_cells(context, domains, chapters)
|
||||
empty = [{"dimension_a": a, "dimension_b": b} for a, b in raw_empty]
|
||||
|
||||
# FCA gap concepts
|
||||
gaps = find_gap_concepts(context)
|
||||
gap_dicts = [
|
||||
{"intent": sorted(g.intent), "extent_size": g.extent_size}
|
||||
for g in gaps
|
||||
if g.intent_size <= 4 # Only report manageable gaps
|
||||
]
|
||||
|
||||
# Coverage ratio: populated cells / total possible cells
|
||||
total_cells = len(domains) * len(chapters) if domains and chapters else 1
|
||||
populated = total_cells - len(empty)
|
||||
ratio = populated / total_cells if total_cells > 0 else 0.0
|
||||
|
||||
return CoverageReport(
|
||||
coverage_ratio=ratio,
|
||||
empty_cells=empty,
|
||||
gap_concepts=gap_dicts,
|
||||
domain_counts=domain_counts,
|
||||
entity_count=n,
|
||||
)
|
||||
98
markitect/infospace/checks/granularity.py
Normal file
98
markitect/infospace/checks/granularity.py
Normal file
@@ -0,0 +1,98 @@
|
||||
"""
|
||||
C5 — Granularity balance.
|
||||
|
||||
Checks that entities are at a consistent level of abstraction,
|
||||
measured by word count distribution and Shannon entropy of domain
|
||||
assignments.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List
|
||||
|
||||
from markitect.infospace.models import EntityMeta
|
||||
|
||||
|
||||
@dataclass
|
||||
class GranularityReport:
|
||||
"""Results from granularity analysis."""
|
||||
|
||||
domain_entropy: float = 0.0
|
||||
word_count_stats: Dict[str, float] = field(default_factory=dict)
|
||||
domain_distribution: Dict[str, int] = field(default_factory=dict)
|
||||
entity_count: int = 0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"concern": "C5",
|
||||
"domain_entropy": round(self.domain_entropy, 4),
|
||||
"word_count_stats": {
|
||||
k: round(v, 2) for k, v in self.word_count_stats.items()
|
||||
},
|
||||
"domain_distribution": self.domain_distribution,
|
||||
"entity_count": self.entity_count,
|
||||
}
|
||||
|
||||
|
||||
def _shannon_entropy(counts: Dict[str, int]) -> float:
|
||||
"""Compute Shannon entropy of a distribution."""
|
||||
total = sum(counts.values())
|
||||
if total == 0:
|
||||
return 0.0
|
||||
entropy = 0.0
|
||||
for count in counts.values():
|
||||
if count > 0:
|
||||
p = count / total
|
||||
entropy -= p * math.log2(p)
|
||||
return entropy
|
||||
|
||||
|
||||
def check_granularity(entities: List[EntityMeta]) -> GranularityReport:
|
||||
"""Check granularity balance across entities.
|
||||
|
||||
Metrics:
|
||||
- Domain entropy: higher = more balanced distribution.
|
||||
- Word count statistics: mean, min, max, std dev.
|
||||
|
||||
Args:
|
||||
entities: Entity metadata list.
|
||||
|
||||
Returns:
|
||||
:class:`GranularityReport` with balance metrics.
|
||||
"""
|
||||
n = len(entities)
|
||||
if n == 0:
|
||||
return GranularityReport()
|
||||
|
||||
# Domain distribution
|
||||
domain_counts: Dict[str, int] = {}
|
||||
for e in entities:
|
||||
d = e.domain or "(unspecified)"
|
||||
domain_counts[d] = domain_counts.get(d, 0) + 1
|
||||
|
||||
entropy = _shannon_entropy(domain_counts)
|
||||
|
||||
# Word count statistics
|
||||
word_counts = [e.definition_word_count for e in entities]
|
||||
if not word_counts:
|
||||
word_counts = [0]
|
||||
|
||||
mean_wc = sum(word_counts) / len(word_counts)
|
||||
min_wc = min(word_counts)
|
||||
max_wc = max(word_counts)
|
||||
variance = sum((wc - mean_wc) ** 2 for wc in word_counts) / len(word_counts)
|
||||
std_wc = math.sqrt(variance)
|
||||
|
||||
return GranularityReport(
|
||||
domain_entropy=entropy,
|
||||
word_count_stats={
|
||||
"mean": mean_wc,
|
||||
"min": float(min_wc),
|
||||
"max": float(max_wc),
|
||||
"std": std_wc,
|
||||
},
|
||||
domain_distribution=domain_counts,
|
||||
entity_count=n,
|
||||
)
|
||||
102
markitect/infospace/checks/orchestrator.py
Normal file
102
markitect/infospace/checks/orchestrator.py
Normal file
@@ -0,0 +1,102 @@
|
||||
"""
|
||||
Unified orchestrator for all five collection-level checks.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from markitect.infospace.models import EntityMeta
|
||||
from markitect.prompts.dependencies.models import DependencyGraph
|
||||
|
||||
from .redundancy import RedundancyReport, check_redundancy
|
||||
from .coverage import CoverageReport, check_coverage
|
||||
from .coherence import CoherenceReport, check_coherence
|
||||
from .consistency import ConsistencyReport, check_consistency
|
||||
from .granularity import GranularityReport, check_granularity
|
||||
|
||||
|
||||
@dataclass
|
||||
class CheckReport:
|
||||
"""Unified report from all five collection-level checks."""
|
||||
|
||||
redundancy: Optional[RedundancyReport] = None
|
||||
coverage: Optional[CoverageReport] = None
|
||||
coherence: Optional[CoherenceReport] = None
|
||||
consistency: Optional[ConsistencyReport] = None
|
||||
granularity: Optional[GranularityReport] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
d: Dict[str, Any] = {}
|
||||
if self.redundancy:
|
||||
d["redundancy"] = self.redundancy.to_dict()
|
||||
if self.coverage:
|
||||
d["coverage"] = self.coverage.to_dict()
|
||||
if self.coherence:
|
||||
d["coherence"] = self.coherence.to_dict()
|
||||
if self.consistency:
|
||||
d["consistency"] = self.consistency.to_dict()
|
||||
if self.granularity:
|
||||
d["granularity"] = self.granularity.to_dict()
|
||||
return d
|
||||
|
||||
def metrics(self) -> Dict[str, float]:
|
||||
"""Extract key metrics for viability checking."""
|
||||
m: Dict[str, float] = {}
|
||||
if self.redundancy:
|
||||
m["redundancy_ratio"] = self.redundancy.redundancy_ratio
|
||||
if self.coverage:
|
||||
m["coverage_ratio"] = self.coverage.coverage_ratio
|
||||
if self.coherence:
|
||||
m["coherence_components"] = float(self.coherence.connected_components)
|
||||
m["modularity"] = self.coherence.modularity
|
||||
if self.consistency:
|
||||
m["consistency_cycles"] = float(self.consistency.cycle_count)
|
||||
if self.granularity:
|
||||
m["granularity_entropy"] = self.granularity.domain_entropy
|
||||
return m
|
||||
|
||||
|
||||
def run_all_checks(
|
||||
entities: List[EntityMeta],
|
||||
embeddings: Optional[Dict[str, list[float]]] = None,
|
||||
graph: Optional[DependencyGraph] = None,
|
||||
extra_attributes: Optional[Dict[str, set[str]]] = None,
|
||||
checks: Optional[List[str]] = None,
|
||||
) -> CheckReport:
|
||||
"""Run all (or selected) collection-level checks.
|
||||
|
||||
Args:
|
||||
entities: Entity metadata list.
|
||||
embeddings: Pre-computed embedding vectors for C1.
|
||||
graph: Entity relationship graph for C3 and C4.
|
||||
extra_attributes: Extra FCA attributes for C2.
|
||||
checks: List of check names to run. If ``None``, runs all five.
|
||||
Valid names: ``redundancy``, ``coverage``, ``coherence``,
|
||||
``consistency``, ``granularity``.
|
||||
|
||||
Returns:
|
||||
:class:`CheckReport` with results from each check.
|
||||
"""
|
||||
run_all = checks is None
|
||||
check_set = set(checks) if checks else set()
|
||||
|
||||
report = CheckReport()
|
||||
|
||||
if run_all or "redundancy" in check_set:
|
||||
report.redundancy = check_redundancy(entities, embeddings=embeddings)
|
||||
|
||||
if run_all or "coverage" in check_set:
|
||||
report.coverage = check_coverage(entities, extra_attributes=extra_attributes)
|
||||
|
||||
if run_all or "coherence" in check_set:
|
||||
report.coherence = check_coherence(graph=graph, entity_count=len(entities))
|
||||
|
||||
if run_all or "consistency" in check_set:
|
||||
report.consistency = check_consistency(entities, graph=graph)
|
||||
|
||||
if run_all or "granularity" in check_set:
|
||||
report.granularity = check_granularity(entities)
|
||||
|
||||
return report
|
||||
98
markitect/infospace/checks/redundancy.py
Normal file
98
markitect/infospace/checks/redundancy.py
Normal file
@@ -0,0 +1,98 @@
|
||||
"""
|
||||
C1 — Redundancy detection.
|
||||
|
||||
Uses embedding similarity to find entity pairs with overlapping
|
||||
meanings that may be candidates for merging.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from markitect.infospace.models import EntityMeta
|
||||
from markitect.llm.similarity import find_similar_pairs
|
||||
|
||||
|
||||
@dataclass
|
||||
class RedundancyReport:
|
||||
"""Results from redundancy analysis."""
|
||||
|
||||
similar_pairs: List[dict] = field(default_factory=list)
|
||||
redundancy_ratio: float = 0.0
|
||||
entity_count: int = 0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"concern": "C1",
|
||||
"redundancy_ratio": round(self.redundancy_ratio, 4),
|
||||
"similar_pairs": self.similar_pairs,
|
||||
"entity_count": self.entity_count,
|
||||
}
|
||||
|
||||
|
||||
def check_redundancy(
|
||||
entities: List[EntityMeta],
|
||||
embeddings: Optional[Dict[str, list[float]]] = None,
|
||||
threshold: float = 0.85,
|
||||
) -> RedundancyReport:
|
||||
"""Check for redundant entities using embedding similarity.
|
||||
|
||||
Args:
|
||||
entities: Entity metadata list.
|
||||
embeddings: Pre-computed ``{slug: vector}`` mapping.
|
||||
If ``None``, redundancy is checked structurally (title overlap).
|
||||
threshold: Similarity threshold for flagging pairs.
|
||||
|
||||
Returns:
|
||||
:class:`RedundancyReport` with similar pairs and ratio.
|
||||
"""
|
||||
n = len(entities)
|
||||
if n < 2:
|
||||
return RedundancyReport(entity_count=n)
|
||||
|
||||
pairs: list[dict] = []
|
||||
|
||||
if embeddings:
|
||||
# Embedding-based similarity
|
||||
raw_pairs = find_similar_pairs(embeddings, threshold=threshold)
|
||||
for slug_a, slug_b, sim in raw_pairs:
|
||||
pairs.append({
|
||||
"entity_a": slug_a,
|
||||
"entity_b": slug_b,
|
||||
"similarity": round(sim, 4),
|
||||
"method": "embedding",
|
||||
})
|
||||
else:
|
||||
# Fallback: structural overlap (shared definition words)
|
||||
slug_to_words = {}
|
||||
for e in entities:
|
||||
words = set(e.definition.lower().split()) if e.definition else set()
|
||||
slug_to_words[e.slug] = words
|
||||
|
||||
slugs = sorted(slug_to_words)
|
||||
for i, a in enumerate(slugs):
|
||||
for b in slugs[i + 1:]:
|
||||
wa, wb = slug_to_words[a], slug_to_words[b]
|
||||
if wa and wb:
|
||||
overlap = len(wa & wb) / min(len(wa), len(wb))
|
||||
if overlap >= threshold:
|
||||
pairs.append({
|
||||
"entity_a": a,
|
||||
"entity_b": b,
|
||||
"similarity": round(overlap, 4),
|
||||
"method": "word_overlap",
|
||||
})
|
||||
|
||||
# redundancy_ratio: fraction of entities involved in similar pairs
|
||||
involved = set()
|
||||
for p in pairs:
|
||||
involved.add(p["entity_a"])
|
||||
involved.add(p["entity_b"])
|
||||
ratio = len(involved) / n if n > 0 else 0.0
|
||||
|
||||
return RedundancyReport(
|
||||
similar_pairs=pairs,
|
||||
redundancy_ratio=ratio,
|
||||
entity_count=n,
|
||||
)
|
||||
@@ -273,3 +273,61 @@ def viability(config_path: Optional[str]):
|
||||
click.echo(f"Viable: YES ({state.viability_pass_count}/{state.viability_total_count} thresholds met)")
|
||||
else:
|
||||
click.echo(f"Viable: NO ({state.viability_pass_count}/{state.viability_total_count} thresholds met)")
|
||||
|
||||
|
||||
# ── check ───────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
@infospace_commands.command()
|
||||
@click.option("--config", "config_path", default=None, help="Path to infospace.yaml.")
|
||||
@click.option(
|
||||
"--concern", "concerns", multiple=True,
|
||||
type=click.Choice(["redundancy", "coverage", "coherence", "consistency", "granularity"]),
|
||||
help="Run specific concern(s). Omit to run all five.",
|
||||
)
|
||||
@click.option("--json", "as_json", is_flag=True, help="Output results as JSON.")
|
||||
def check(config_path: Optional[str], concerns: tuple, as_json: bool):
|
||||
"""Run collection-level quality checks (C1–C5)."""
|
||||
cfg, cfg_path = _load_config_or_exit(config_path)
|
||||
root = cfg_path.parent
|
||||
|
||||
entities_dir = root / cfg.entities_dir
|
||||
if not entities_dir.is_dir():
|
||||
click.echo("Error: No entities directory found.", err=True)
|
||||
raise SystemExit(1)
|
||||
|
||||
entity_list = parse_entity_directory(entities_dir)
|
||||
if not entity_list:
|
||||
click.echo("No entities to check.")
|
||||
return
|
||||
|
||||
from markitect.infospace.checks import run_all_checks
|
||||
|
||||
checks_list = list(concerns) if concerns else None
|
||||
|
||||
report = run_all_checks(
|
||||
entities=entity_list,
|
||||
checks=checks_list,
|
||||
)
|
||||
|
||||
if as_json:
|
||||
import json
|
||||
click.echo(json.dumps(report.to_dict(), indent=2))
|
||||
else:
|
||||
click.echo(f"Collection checks — {len(entity_list)} entities\n")
|
||||
d = report.to_dict()
|
||||
for concern_name, concern_data in d.items():
|
||||
label = concern_data.get("concern", concern_name.upper())
|
||||
click.echo(f" {label} — {concern_name}")
|
||||
for k, v in concern_data.items():
|
||||
if k == "concern":
|
||||
continue
|
||||
click.echo(f" {k}: {v}")
|
||||
click.echo()
|
||||
|
||||
# Show summary metrics
|
||||
m = report.metrics()
|
||||
if m and not as_json:
|
||||
click.echo("Metrics summary:")
|
||||
for k, v in sorted(m.items()):
|
||||
click.echo(f" {k}: {v:.4f}")
|
||||
|
||||
Reference in New Issue
Block a user