markitect-main/markitect/analysis/graph.py

"""
Graph analysis utilities for collection-level metrics.

Provides connected components, centrality, community detection,
modularity, degree distribution, and cohesion/coupling computation.

Requires ``networkx`` (optional dependency)::

    pip install networkx
"""

from __future__ import annotations

from typing import Optional

from markitect.prompts.dependencies.models import DependencyGraph


def _require_networkx():
    """Import and return networkx, raising a clear error if missing."""
    try:
        import networkx as nx
        return nx
    except ImportError:
        raise ImportError(
            "networkx is required for graph analysis. "
            "Install it with: pip install networkx"
        ) from None


def to_networkx(graph: DependencyGraph):
    """Convert a :class:`DependencyGraph` to a networkx ``DiGraph``.

    Each edge carries an ``edge_type`` attribute (string value of the
    :class:`EdgeType` enum, or ``None``).
    """
    nx = _require_networkx()
    G = nx.DiGraph()
    G.add_nodes_from(graph.nodes)
    for node in graph.nodes:
        for succ in graph.get_successors(node):
            edge_type = graph.get_edge_type(node, succ)
            G.add_edge(
                node, succ,
                edge_type=edge_type.value if edge_type else None,
            )
    return G


def connected_components(graph: DependencyGraph) -> list[set[str]]:
    """Find weakly connected components (edges treated as undirected).

    Returns a list of node sets, one per component, sorted largest-first.
    """
    nx = _require_networkx()
    G = to_networkx(graph)
    components = list(nx.weakly_connected_components(G))
    components.sort(key=len, reverse=True)
    return [set(c) for c in components]


def betweenness_centrality(graph: DependencyGraph) -> dict[str, float]:
    """Compute betweenness centrality for all nodes.

    Returns a dict mapping node ID to centrality score in [0, 1].
    """
    nx = _require_networkx()
    G = to_networkx(graph)
    return nx.betweenness_centrality(G)


def detect_communities(
    graph: DependencyGraph,
    seed: Optional[int] = None,
) -> list[set[str]]:
    """Detect communities using the Louvain algorithm.

    Operates on an undirected projection of the graph.  Returns a list
    of node sets, one per community, sorted largest-first.

    Args:
        graph: The dependency graph to analyse.
        seed: Random seed for reproducibility (passed to Louvain).
    """
    nx = _require_networkx()
    G = to_networkx(graph).to_undirected()
    if len(G.nodes) == 0:
        return []
    communities = list(nx.community.louvain_communities(G, seed=seed))
    communities.sort(key=len, reverse=True)
    return [set(c) for c in communities]


def modularity_score(
    graph: DependencyGraph,
    communities: Optional[list[set[str]]] = None,
    seed: Optional[int] = None,
) -> float:
    """Compute the modularity score for a community partition.

    Args:
        graph: The dependency graph.
        communities: Pre-computed communities. If ``None``, communities
            are detected via :func:`detect_communities`.
        seed: Random seed (used only when *communities* is ``None``).

    Returns:
        Modularity in [-0.5, 1.0].  Returns 0.0 for graphs with no edges.
    """
    nx = _require_networkx()
    G = to_networkx(graph).to_undirected()
    if len(G.edges) == 0:
        return 0.0
    if communities is None:
        communities = detect_communities(graph, seed=seed)
    return nx.community.modularity(G, communities)


def degree_distribution(graph: DependencyGraph) -> dict[str, dict[str, int]]:
    """Compute in-degree, out-degree, and total degree for each node.

    Returns::

        {"node_id": {"in_degree": 2, "out_degree": 1, "total_degree": 3}, ...}
    """
    nx = _require_networkx()
    G = to_networkx(graph)
    result = {}
    for node in G.nodes:
        ind = G.in_degree(node)
        outd = G.out_degree(node)
        result[node] = {
            "in_degree": ind,
            "out_degree": outd,
            "total_degree": ind + outd,
        }
    return result


def cohesion_coupling(
    graph: DependencyGraph,
    communities: Optional[list[set[str]]] = None,
    seed: Optional[int] = None,
) -> dict:
    """Compute cohesion (intra-community edges) and coupling (inter-community edges).

    Args:
        graph: The dependency graph.
        communities: Pre-computed communities.  If ``None``, detected
            via :func:`detect_communities`.
        seed: Random seed (used only when *communities* is ``None``).

    Returns:
        Dict with keys ``cohesion``, ``coupling`` (ratios in [0, 1]),
        ``intra_edges``, ``inter_edges``, ``total_edges``, ``communities``.
    """
    _require_networkx()
    G = to_networkx(graph)
    if communities is None:
        communities = detect_communities(graph, seed=seed)

    # Build node → community index
    node_community: dict[str, int] = {}
    for i, comm in enumerate(communities):
        for node in comm:
            node_community[node] = i

    intra = 0
    inter = 0
    for u, v in G.edges:
        if node_community.get(u) == node_community.get(v):
            intra += 1
        else:
            inter += 1

    total = intra + inter
    return {
        "cohesion": intra / total if total > 0 else 0.0,
        "coupling": inter / total if total > 0 else 0.0,
        "intra_edges": intra,
        "inter_edges": inter,
        "total_edges": total,
        "communities": len(communities),
    }