""" Graph analysis utilities for collection-level metrics. Provides connected components, centrality, community detection, modularity, degree distribution, and cohesion/coupling computation. Requires ``networkx`` (optional dependency):: pip install networkx """ from __future__ import annotations from typing import Optional from markitect.prompts.dependencies.models import DependencyGraph def _require_networkx(): """Import and return networkx, raising a clear error if missing.""" try: import networkx as nx return nx except ImportError: raise ImportError( "networkx is required for graph analysis. " "Install it with: pip install networkx" ) from None def to_networkx(graph: DependencyGraph): """Convert a :class:`DependencyGraph` to a networkx ``DiGraph``. Each edge carries an ``edge_type`` attribute (string value of the :class:`EdgeType` enum, or ``None``). """ nx = _require_networkx() G = nx.DiGraph() G.add_nodes_from(graph.nodes) for node in graph.nodes: for succ in graph.get_successors(node): edge_type = graph.get_edge_type(node, succ) G.add_edge( node, succ, edge_type=edge_type.value if edge_type else None, ) return G def connected_components(graph: DependencyGraph) -> list[set[str]]: """Find weakly connected components (edges treated as undirected). Returns a list of node sets, one per component, sorted largest-first. """ nx = _require_networkx() G = to_networkx(graph) components = list(nx.weakly_connected_components(G)) components.sort(key=len, reverse=True) return [set(c) for c in components] def betweenness_centrality(graph: DependencyGraph) -> dict[str, float]: """Compute betweenness centrality for all nodes. Returns a dict mapping node ID to centrality score in [0, 1]. """ nx = _require_networkx() G = to_networkx(graph) return nx.betweenness_centrality(G) def detect_communities( graph: DependencyGraph, seed: Optional[int] = None, ) -> list[set[str]]: """Detect communities using the Louvain algorithm. Operates on an undirected projection of the graph. Returns a list of node sets, one per community, sorted largest-first. Args: graph: The dependency graph to analyse. seed: Random seed for reproducibility (passed to Louvain). """ nx = _require_networkx() G = to_networkx(graph).to_undirected() if len(G.nodes) == 0: return [] communities = list(nx.community.louvain_communities(G, seed=seed)) communities.sort(key=len, reverse=True) return [set(c) for c in communities] def modularity_score( graph: DependencyGraph, communities: Optional[list[set[str]]] = None, seed: Optional[int] = None, ) -> float: """Compute the modularity score for a community partition. Args: graph: The dependency graph. communities: Pre-computed communities. If ``None``, communities are detected via :func:`detect_communities`. seed: Random seed (used only when *communities* is ``None``). Returns: Modularity in [-0.5, 1.0]. Returns 0.0 for graphs with no edges. """ nx = _require_networkx() G = to_networkx(graph).to_undirected() if len(G.edges) == 0: return 0.0 if communities is None: communities = detect_communities(graph, seed=seed) return nx.community.modularity(G, communities) def degree_distribution(graph: DependencyGraph) -> dict[str, dict[str, int]]: """Compute in-degree, out-degree, and total degree for each node. Returns:: {"node_id": {"in_degree": 2, "out_degree": 1, "total_degree": 3}, ...} """ nx = _require_networkx() G = to_networkx(graph) result = {} for node in G.nodes: ind = G.in_degree(node) outd = G.out_degree(node) result[node] = { "in_degree": ind, "out_degree": outd, "total_degree": ind + outd, } return result def cohesion_coupling( graph: DependencyGraph, communities: Optional[list[set[str]]] = None, seed: Optional[int] = None, ) -> dict: """Compute cohesion (intra-community edges) and coupling (inter-community edges). Args: graph: The dependency graph. communities: Pre-computed communities. If ``None``, detected via :func:`detect_communities`. seed: Random seed (used only when *communities* is ``None``). Returns: Dict with keys ``cohesion``, ``coupling`` (ratios in [0, 1]), ``intra_edges``, ``inter_edges``, ``total_edges``, ``communities``. """ _require_networkx() G = to_networkx(graph) if communities is None: communities = detect_communities(graph, seed=seed) # Build node → community index node_community: dict[str, int] = {} for i, comm in enumerate(communities): for node in comm: node_community[node] = i intra = 0 inter = 0 for u, v in G.edges: if node_community.get(u) == node_community.get(v): intra += 1 else: inter += 1 total = intra + inter return { "cohesion": intra / total if total > 0 else 0.0, "coupling": inter / total if total > 0 else 0.0, "intra_edges": intra, "inter_edges": inter, "total_edges": total, "communities": len(communities), }