Add connected components, betweenness centrality, Louvain community detection, modularity scoring, degree distribution, and cohesion/coupling computation. Wraps DependencyGraph via networkx (optional dependency) for downstream collection-level coherence metrics. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
185 lines
5.4 KiB
Python
185 lines
5.4 KiB
Python
"""
|
|
Graph analysis utilities for collection-level metrics.
|
|
|
|
Provides connected components, centrality, community detection,
|
|
modularity, degree distribution, and cohesion/coupling computation.
|
|
|
|
Requires ``networkx`` (optional dependency)::
|
|
|
|
pip install networkx
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Optional
|
|
|
|
from markitect.prompts.dependencies.models import DependencyGraph
|
|
|
|
|
|
def _require_networkx():
|
|
"""Import and return networkx, raising a clear error if missing."""
|
|
try:
|
|
import networkx as nx
|
|
return nx
|
|
except ImportError:
|
|
raise ImportError(
|
|
"networkx is required for graph analysis. "
|
|
"Install it with: pip install networkx"
|
|
) from None
|
|
|
|
|
|
def to_networkx(graph: DependencyGraph):
|
|
"""Convert a :class:`DependencyGraph` to a networkx ``DiGraph``.
|
|
|
|
Each edge carries an ``edge_type`` attribute (string value of the
|
|
:class:`EdgeType` enum, or ``None``).
|
|
"""
|
|
nx = _require_networkx()
|
|
G = nx.DiGraph()
|
|
G.add_nodes_from(graph.nodes)
|
|
for node in graph.nodes:
|
|
for succ in graph.get_successors(node):
|
|
edge_type = graph.get_edge_type(node, succ)
|
|
G.add_edge(
|
|
node, succ,
|
|
edge_type=edge_type.value if edge_type else None,
|
|
)
|
|
return G
|
|
|
|
|
|
def connected_components(graph: DependencyGraph) -> list[set[str]]:
|
|
"""Find weakly connected components (edges treated as undirected).
|
|
|
|
Returns a list of node sets, one per component, sorted largest-first.
|
|
"""
|
|
nx = _require_networkx()
|
|
G = to_networkx(graph)
|
|
components = list(nx.weakly_connected_components(G))
|
|
components.sort(key=len, reverse=True)
|
|
return [set(c) for c in components]
|
|
|
|
|
|
def betweenness_centrality(graph: DependencyGraph) -> dict[str, float]:
|
|
"""Compute betweenness centrality for all nodes.
|
|
|
|
Returns a dict mapping node ID to centrality score in [0, 1].
|
|
"""
|
|
nx = _require_networkx()
|
|
G = to_networkx(graph)
|
|
return nx.betweenness_centrality(G)
|
|
|
|
|
|
def detect_communities(
|
|
graph: DependencyGraph,
|
|
seed: Optional[int] = None,
|
|
) -> list[set[str]]:
|
|
"""Detect communities using the Louvain algorithm.
|
|
|
|
Operates on an undirected projection of the graph. Returns a list
|
|
of node sets, one per community, sorted largest-first.
|
|
|
|
Args:
|
|
graph: The dependency graph to analyse.
|
|
seed: Random seed for reproducibility (passed to Louvain).
|
|
"""
|
|
nx = _require_networkx()
|
|
G = to_networkx(graph).to_undirected()
|
|
if len(G.nodes) == 0:
|
|
return []
|
|
communities = list(nx.community.louvain_communities(G, seed=seed))
|
|
communities.sort(key=len, reverse=True)
|
|
return [set(c) for c in communities]
|
|
|
|
|
|
def modularity_score(
|
|
graph: DependencyGraph,
|
|
communities: Optional[list[set[str]]] = None,
|
|
seed: Optional[int] = None,
|
|
) -> float:
|
|
"""Compute the modularity score for a community partition.
|
|
|
|
Args:
|
|
graph: The dependency graph.
|
|
communities: Pre-computed communities. If ``None``, communities
|
|
are detected via :func:`detect_communities`.
|
|
seed: Random seed (used only when *communities* is ``None``).
|
|
|
|
Returns:
|
|
Modularity in [-0.5, 1.0]. Returns 0.0 for graphs with no edges.
|
|
"""
|
|
nx = _require_networkx()
|
|
G = to_networkx(graph).to_undirected()
|
|
if len(G.edges) == 0:
|
|
return 0.0
|
|
if communities is None:
|
|
communities = detect_communities(graph, seed=seed)
|
|
return nx.community.modularity(G, communities)
|
|
|
|
|
|
def degree_distribution(graph: DependencyGraph) -> dict[str, dict[str, int]]:
|
|
"""Compute in-degree, out-degree, and total degree for each node.
|
|
|
|
Returns::
|
|
|
|
{"node_id": {"in_degree": 2, "out_degree": 1, "total_degree": 3}, ...}
|
|
"""
|
|
nx = _require_networkx()
|
|
G = to_networkx(graph)
|
|
result = {}
|
|
for node in G.nodes:
|
|
ind = G.in_degree(node)
|
|
outd = G.out_degree(node)
|
|
result[node] = {
|
|
"in_degree": ind,
|
|
"out_degree": outd,
|
|
"total_degree": ind + outd,
|
|
}
|
|
return result
|
|
|
|
|
|
def cohesion_coupling(
|
|
graph: DependencyGraph,
|
|
communities: Optional[list[set[str]]] = None,
|
|
seed: Optional[int] = None,
|
|
) -> dict:
|
|
"""Compute cohesion (intra-community edges) and coupling (inter-community edges).
|
|
|
|
Args:
|
|
graph: The dependency graph.
|
|
communities: Pre-computed communities. If ``None``, detected
|
|
via :func:`detect_communities`.
|
|
seed: Random seed (used only when *communities* is ``None``).
|
|
|
|
Returns:
|
|
Dict with keys ``cohesion``, ``coupling`` (ratios in [0, 1]),
|
|
``intra_edges``, ``inter_edges``, ``total_edges``, ``communities``.
|
|
"""
|
|
_require_networkx()
|
|
G = to_networkx(graph)
|
|
if communities is None:
|
|
communities = detect_communities(graph, seed=seed)
|
|
|
|
# Build node → community index
|
|
node_community: dict[str, int] = {}
|
|
for i, comm in enumerate(communities):
|
|
for node in comm:
|
|
node_community[node] = i
|
|
|
|
intra = 0
|
|
inter = 0
|
|
for u, v in G.edges:
|
|
if node_community.get(u) == node_community.get(v):
|
|
intra += 1
|
|
else:
|
|
inter += 1
|
|
|
|
total = intra + inter
|
|
return {
|
|
"cohesion": intra / total if total > 0 else 0.0,
|
|
"coupling": inter / total if total > 0 else 0.0,
|
|
"intra_edges": intra,
|
|
"inter_edges": inter,
|
|
"total_edges": total,
|
|
"communities": len(communities),
|
|
}
|