Files
markitect-main/markitect/infospace/checks/coherence.py
tegwick 11585e6968 feat(infospace): add collection-level quality checks C1–C5 (S2.4)
Five concern checks: Redundancy (embedding/word overlap), Coverage
(FCA gap analysis), Coherence (graph connectivity), Consistency
(cycle detection), Granularity (Shannon entropy). Orchestrator runs
all or selected checks, CLI `markitect infospace check` command added.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 01:54:22 +01:00

82 lines
2.4 KiB
Python

"""
C3 — Structural coherence.
Uses graph analysis to check that the entity relationship graph is
well-connected and has meaningful community structure.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Dict, List, Optional
from markitect.prompts.dependencies.models import DependencyGraph
@dataclass
class CoherenceReport:
"""Results from coherence analysis."""
connected_components: int = 0
largest_component_size: int = 0
modularity: float = 0.0
community_count: int = 0
cohesion: float = 0.0
coupling: float = 0.0
entity_count: int = 0
def to_dict(self) -> dict:
return {
"concern": "C3",
"connected_components": self.connected_components,
"largest_component_size": self.largest_component_size,
"modularity": round(self.modularity, 4),
"community_count": self.community_count,
"cohesion": round(self.cohesion, 4),
"coupling": round(self.coupling, 4),
"entity_count": self.entity_count,
}
def check_coherence(
graph: Optional[DependencyGraph] = None,
entity_count: int = 0,
) -> CoherenceReport:
"""Check structural coherence of the entity relationship graph.
Args:
graph: The entity relationship graph. If ``None``, returns
a report with zero values.
entity_count: Total number of entities (for context).
Returns:
:class:`CoherenceReport` with connectivity and community metrics.
"""
if graph is None or len(graph.nodes) == 0:
return CoherenceReport(entity_count=entity_count)
try:
from markitect.analysis.graph import (
connected_components,
modularity_score,
detect_communities,
cohesion_coupling,
)
except ImportError:
return CoherenceReport(entity_count=entity_count)
components = connected_components(graph)
communities = detect_communities(graph, seed=42)
mod = modularity_score(graph, communities=communities)
cc = cohesion_coupling(graph, communities=communities)
return CoherenceReport(
connected_components=len(components),
largest_component_size=len(components[0]) if components else 0,
modularity=mod,
community_count=len(communities),
cohesion=cc["cohesion"],
coupling=cc["coupling"],
entity_count=entity_count or len(graph.nodes),
)