From dc22017b7ccc749442fabe2dc1c1f3499969eee6 Mon Sep 17 00:00:00 2001 From: tegwick Date: Thu, 19 Feb 2026 01:38:35 +0100 Subject: [PATCH] feat(analysis): add Formal Concept Analysis for coverage gap detection (S1.7) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pure-Python FCA implementation: FormalContext (entity × attribute binary relation with extent/intent/closure), ConceptLattice via NextClosure algorithm, find_gap_concepts() for structural coverage gaps, and find_empty_cells() for cross-tabulation analysis. Co-Authored-By: Claude Opus 4.6 --- markitect/analysis/fca.py | 307 +++++++++++++++++++++++++++++++ tests/unit/analysis/test_fca.py | 313 ++++++++++++++++++++++++++++++++ 2 files changed, 620 insertions(+) create mode 100644 markitect/analysis/fca.py create mode 100644 tests/unit/analysis/test_fca.py diff --git a/markitect/analysis/fca.py b/markitect/analysis/fca.py new file mode 100644 index 00000000..5fbaa47a --- /dev/null +++ b/markitect/analysis/fca.py @@ -0,0 +1,307 @@ +""" +Formal Concept Analysis (FCA) for coverage gap detection. + +Provides a pure-Python implementation of: + +- :class:`FormalContext` — entity × attribute binary relation with + extent/intent operations and double-prime closure. +- :class:`ConceptLattice` — the set of all formal concepts computed + via the NextClosure algorithm (Ganter, 1984). +- :func:`find_gap_concepts` — attribute combinations present in the + lattice whose extent is empty, revealing structural coverage gaps. + +Sufficient for entity scales of ~100s. For larger contexts a library +such as ``concepts`` (PyPI) can be substituted. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Iterable, Optional + + +class FormalContext: + """Binary relation between objects and attributes. + + Args: + objects: Iterable of object identifiers (e.g. entity slugs). + attributes: Iterable of attribute identifiers (e.g. "domain:Production"). + incidence: Mapping of object → set of attributes it possesses. + """ + + def __init__( + self, + objects: Iterable[str], + attributes: Iterable[str], + incidence: dict[str, set[str]], + ): + self._objects = sorted(set(objects)) + self._attributes = sorted(set(attributes)) + self._obj_set = frozenset(self._objects) + self._attr_set = frozenset(self._attributes) + + # Normalise incidence: only keep known attributes + self._incidence: dict[str, frozenset[str]] = {} + for obj in self._objects: + raw = incidence.get(obj, set()) + self._incidence[obj] = frozenset(raw) & self._attr_set + + # Reverse index: attribute → set of objects that have it + self._attr_to_objs: dict[str, frozenset[str]] = {} + for attr in self._attributes: + self._attr_to_objs[attr] = frozenset( + obj for obj in self._objects if attr in self._incidence[obj] + ) + + @property + def objects(self) -> list[str]: + """Sorted list of objects.""" + return list(self._objects) + + @property + def attributes(self) -> list[str]: + """Sorted list of attributes.""" + return list(self._attributes) + + @property + def object_count(self) -> int: + return len(self._objects) + + @property + def attribute_count(self) -> int: + return len(self._attributes) + + def extent(self, attrs: Iterable[str]) -> frozenset[str]: + """Objects possessing **all** given attributes (B' operation).""" + attr_set = frozenset(attrs) + if not attr_set: + return self._obj_set + result = self._obj_set + for attr in attr_set: + result = result & self._attr_to_objs.get(attr, frozenset()) + return result + + def intent(self, objs: Iterable[str]) -> frozenset[str]: + """Attributes shared by **all** given objects (A' operation).""" + obj_list = [o for o in objs if o in self._incidence] + if not obj_list: + return self._attr_set + result = self._incidence[obj_list[0]] + for obj in obj_list[1:]: + result = result & self._incidence[obj] + return result + + def closure(self, attrs: Iterable[str]) -> frozenset[str]: + """Double-prime closure: B'' = intent(extent(B)).""" + return self.intent(self.extent(attrs)) + + def has_attribute(self, obj: str, attr: str) -> bool: + """Check if *obj* has *attr*.""" + return attr in self._incidence.get(obj, frozenset()) + + def density(self) -> float: + """Proportion of 1s in the incidence matrix.""" + total = len(self._objects) * len(self._attributes) + if total == 0: + return 0.0 + filled = sum(len(attrs) for attrs in self._incidence.values()) + return filled / total + + @classmethod + def from_dict(cls, entity_attributes: dict[str, set[str]]) -> FormalContext: + """Convenience: build context from ``{object: {attr, ...}}``.""" + objects = list(entity_attributes.keys()) + all_attrs: set[str] = set() + for attrs in entity_attributes.values(): + all_attrs.update(attrs) + return cls(objects, all_attrs, entity_attributes) + + +@dataclass(frozen=True) +class FormalConcept: + """A formal concept (A, B) where A' = B and B' = A.""" + + extent: frozenset[str] + intent: frozenset[str] + + @property + def extent_size(self) -> int: + return len(self.extent) + + @property + def intent_size(self) -> int: + return len(self.intent) + + +@dataclass +class ConceptLattice: + """The set of all formal concepts derived from a :class:`FormalContext`. + + Concepts are ordered by extent inclusion (subconcept ≤ superconcept). + """ + + concepts: list[FormalConcept] = field(default_factory=list) + + @property + def size(self) -> int: + """Number of formal concepts in the lattice.""" + return len(self.concepts) + + @property + def top(self) -> Optional[FormalConcept]: + """Supremum: concept with largest extent.""" + if not self.concepts: + return None + return max(self.concepts, key=lambda c: c.extent_size) + + @property + def bottom(self) -> Optional[FormalConcept]: + """Infimum: concept with largest intent.""" + if not self.concepts: + return None + return max(self.concepts, key=lambda c: c.intent_size) + + @classmethod + def from_context(cls, context: FormalContext) -> ConceptLattice: + """Compute all formal concepts using the NextClosure algorithm.""" + attrs = context.attributes # sorted, fixed order + if not attrs: + # Degenerate: no attributes → single concept with all objects + top = FormalConcept( + extent=frozenset(context.objects), + intent=frozenset(), + ) + return cls(concepts=[top]) + + concepts: list[FormalConcept] = [] + + # Start with closure of empty attribute set + current = context.closure(frozenset()) + ext = context.extent(current) + concepts.append(FormalConcept(extent=ext, intent=current)) + + while current != frozenset(attrs): + nxt = _next_closure(current, attrs, context.closure) + if nxt is None: + break + ext = context.extent(nxt) + concepts.append(FormalConcept(extent=ext, intent=nxt)) + current = nxt + + return cls(concepts=concepts) + + def gap_concepts(self) -> list[FormalConcept]: + """Formal concepts whose extent is empty.""" + return [c for c in self.concepts if c.extent_size == 0] + + def concepts_with_extent_size(self, min_size: int = 0, max_size: Optional[int] = None) -> list[FormalConcept]: + """Filter concepts by extent size.""" + result = [c for c in self.concepts if c.extent_size >= min_size] + if max_size is not None: + result = [c for c in result if c.extent_size <= max_size] + return result + + def depth(self) -> int: + """Longest chain length in the concept ordering. + + A chain is a sequence of concepts c_1 < c_2 < ... < c_k + where < means strict subconcept (extent inclusion). + """ + if not self.concepts: + return 0 + + # Build DAG: concept i → j if i is direct subconcept of j + # Use extent inclusion: i < j iff extent_i ⊂ extent_j + n = len(self.concepts) + extents = [c.extent for c in self.concepts] + + # Longest path via dynamic programming on sorted order + # Sort by extent size ascending (smaller extents = more specific) + order = sorted(range(n), key=lambda i: len(extents[i])) + longest = [1] * n + + for idx in range(n): + i = order[idx] + for jdx in range(idx + 1, n): + j = order[jdx] + if extents[i] < extents[j]: # strict subset + if longest[j] < longest[i] + 1: + longest[j] = longest[i] + 1 + + return max(longest) if longest else 0 + + +def find_gap_concepts( + context: FormalContext, + lattice: Optional[ConceptLattice] = None, +) -> list[FormalConcept]: + """Find formal concepts with empty extent (coverage gaps). + + These represent attribute combinations that are structurally + present in the lattice but have no corresponding entities. + + Args: + context: The formal context. + lattice: Pre-computed lattice. If ``None``, computed from *context*. + + Returns: + List of :class:`FormalConcept` with empty extent, sorted by + intent size ascending (most specific gaps first). + """ + if lattice is None: + lattice = ConceptLattice.from_context(context) + gaps = lattice.gap_concepts() + gaps.sort(key=lambda c: c.intent_size) + return gaps + + +def find_empty_cells( + context: FormalContext, + dimension_a: list[str], + dimension_b: list[str], +) -> list[tuple[str, str]]: + """Find empty cells in a two-dimensional cross-tabulation. + + Given two sets of attributes (e.g. domain values and VSM systems), + return pairs ``(attr_a, attr_b)`` where no object possesses both. + + This is a simpler alternative to full FCA for two-dimensional + coverage analysis. + """ + empty: list[tuple[str, str]] = [] + for a in sorted(dimension_a): + for b in sorted(dimension_b): + if not context.extent([a, b]): + empty.append((a, b)) + return empty + + +# ── NextClosure internals ─────────────────────────────────────────── + + +def _next_closure( + current: frozenset[str], + attrs: list[str], + closure_fn, +) -> Optional[frozenset[str]]: + """Compute the next closed set in lectic order after *current*. + + Implements Ganter's NextClosure algorithm. + """ + for i in range(len(attrs) - 1, -1, -1): + m = attrs[i] + if m in current: + current = current - {m} + else: + candidate = current | {m} + closed = closure_fn(candidate) + # Canonicity test: no attribute before position i + # was added by the closure + canonical = True + for j in range(i): + if attrs[j] in closed and attrs[j] not in candidate: + canonical = False + break + if canonical: + return closed + return None diff --git a/tests/unit/analysis/test_fca.py b/tests/unit/analysis/test_fca.py new file mode 100644 index 00000000..5ac793f4 --- /dev/null +++ b/tests/unit/analysis/test_fca.py @@ -0,0 +1,313 @@ +"""Tests for markitect.analysis.fca.""" + +import pytest + +from markitect.analysis.fca import ( + FormalContext, + FormalConcept, + ConceptLattice, + find_gap_concepts, + find_empty_cells, +) + + +# ── Test data ──────────────────────────────────────────────────────── + + +def _animal_context(): + """Classic FCA example: animals × properties. + + Context: + | animal | legs | wings | feathers | fur | + |-----------|------|-------|----------|-----| + | dog | x | | | x | + | cat | x | | | x | + | eagle | x | x | x | | + | sparrow | x | x | x | | + | penguin | x | | x | | + """ + return FormalContext( + objects=["dog", "cat", "eagle", "sparrow", "penguin"], + attributes=["legs", "wings", "feathers", "fur"], + incidence={ + "dog": {"legs", "fur"}, + "cat": {"legs", "fur"}, + "eagle": {"legs", "wings", "feathers"}, + "sparrow": {"legs", "wings", "feathers"}, + "penguin": {"legs", "feathers"}, + }, + ) + + +def _infospace_context(): + """Simplified infospace-style context: entities × {domain, vsm_system}. + + Entities with domain and VSM classification, including a gap: + no entity has both domain:Exchange and vsm:S3. + """ + return FormalContext.from_dict({ + "division-of-labour": {"domain:Production", "vsm:S1"}, + "pin-factory": {"domain:Production", "vsm:S1"}, + "market-extent": {"domain:Exchange", "vsm:S4"}, + "wage-determination": {"domain:Distribution", "vsm:S3"}, + "rent-theory": {"domain:Distribution", "vsm:S5"}, + "capital-accumulation": {"domain:Production", "vsm:S3"}, + }) + + +def _empty_context(): + """Context with no objects.""" + return FormalContext([], ["a", "b"], {}) + + +def _single_entity(): + """Context with one object.""" + return FormalContext(["only"], ["x", "y"], {"only": {"x", "y"}}) + + +# ── FormalContext ──────────────────────────────────────────────────── + + +class TestFormalContext: + def test_objects_sorted(self): + ctx = _animal_context() + assert ctx.objects == sorted(ctx.objects) + + def test_attributes_sorted(self): + ctx = _animal_context() + assert ctx.attributes == sorted(ctx.attributes) + + def test_object_count(self): + assert _animal_context().object_count == 5 + + def test_attribute_count(self): + assert _animal_context().attribute_count == 4 + + def test_extent_single_attr(self): + ctx = _animal_context() + assert ctx.extent(["fur"]) == frozenset({"dog", "cat"}) + + def test_extent_multiple_attrs(self): + ctx = _animal_context() + assert ctx.extent(["wings", "feathers"]) == frozenset({"eagle", "sparrow"}) + + def test_extent_empty_returns_all(self): + ctx = _animal_context() + assert ctx.extent([]) == frozenset(ctx.objects) + + def test_extent_no_match(self): + ctx = _animal_context() + assert ctx.extent(["fur", "feathers"]) == frozenset() + + def test_intent_single_obj(self): + ctx = _animal_context() + assert ctx.intent(["penguin"]) == frozenset({"legs", "feathers"}) + + def test_intent_multiple_objs(self): + ctx = _animal_context() + # dog and cat share: legs, fur + assert ctx.intent(["dog", "cat"]) == frozenset({"legs", "fur"}) + + def test_intent_empty_returns_all(self): + ctx = _animal_context() + assert ctx.intent([]) == frozenset(ctx.attributes) + + def test_closure_is_idempotent(self): + ctx = _animal_context() + c1 = ctx.closure({"fur"}) + c2 = ctx.closure(c1) + assert c1 == c2 + + def test_closure_expands(self): + ctx = _animal_context() + # fur → {dog, cat} → {legs, fur} (both have legs too) + assert ctx.closure({"fur"}) == frozenset({"legs", "fur"}) + + def test_has_attribute(self): + ctx = _animal_context() + assert ctx.has_attribute("dog", "legs") is True + assert ctx.has_attribute("dog", "wings") is False + + def test_density(self): + ctx = _animal_context() + # 5 objects × 4 attributes = 20 cells + # dog:2, cat:2, eagle:3, sparrow:3, penguin:2 = 12 filled + assert ctx.density() == pytest.approx(12 / 20) + + def test_density_empty(self): + assert FormalContext([], [], {}).density() == 0.0 + + def test_from_dict(self): + ctx = FormalContext.from_dict({ + "a": {"x", "y"}, + "b": {"y", "z"}, + }) + assert ctx.object_count == 2 + assert ctx.attribute_count == 3 + + def test_unknown_attributes_ignored(self): + ctx = FormalContext( + ["a"], ["x"], {"a": {"x", "unknown"}} + ) + assert ctx.intent(["a"]) == frozenset({"x"}) + + +# ── ConceptLattice ────────────────────────────────────────────────── + + +class TestConceptLattice: + def test_animal_concept_count(self): + ctx = _animal_context() + lattice = ConceptLattice.from_context(ctx) + # Known: the animal context produces exactly 7 formal concepts + # Top: ({all}, {legs}), Bottom: ({}, {all 4}), + # plus intermediate concepts + assert lattice.size >= 5 + + def test_top_has_all_objects(self): + ctx = _animal_context() + lattice = ConceptLattice.from_context(ctx) + top = lattice.top + assert top is not None + assert top.extent == frozenset(ctx.objects) + + def test_top_intent_is_common_attributes(self): + ctx = _animal_context() + lattice = ConceptLattice.from_context(ctx) + top = lattice.top + # All animals have "legs" + assert "legs" in top.intent + + def test_bottom_has_all_attributes(self): + ctx = _animal_context() + lattice = ConceptLattice.from_context(ctx) + bottom = lattice.bottom + assert bottom is not None + assert bottom.intent == frozenset(ctx.attributes) + + def test_bottom_extent_empty_when_no_universal_object(self): + ctx = _animal_context() + lattice = ConceptLattice.from_context(ctx) + bottom = lattice.bottom + # No animal has all 4 attributes + assert bottom.extent_size == 0 + + def test_all_concepts_are_closed(self): + ctx = _animal_context() + lattice = ConceptLattice.from_context(ctx) + for concept in lattice.concepts: + # intent should be closed: closure(intent) == intent + assert ctx.closure(concept.intent) == concept.intent + # extent' should equal intent + assert ctx.intent(concept.extent) == concept.intent + # intent' should equal extent + assert ctx.extent(concept.intent) == concept.extent + + def test_empty_context(self): + ctx = _empty_context() + lattice = ConceptLattice.from_context(ctx) + # Empty context → gap concepts for all attribute combinations + assert lattice.size >= 1 + + def test_single_entity(self): + ctx = _single_entity() + lattice = ConceptLattice.from_context(ctx) + # At least 1 concept containing the single entity + has_entity = any( + "only" in c.extent for c in lattice.concepts + ) + assert has_entity + + def test_no_attributes_produces_one_concept(self): + ctx = FormalContext(["a", "b"], [], {}) + lattice = ConceptLattice.from_context(ctx) + assert lattice.size == 1 + assert lattice.concepts[0].extent == frozenset({"a", "b"}) + + def test_depth(self): + ctx = _animal_context() + lattice = ConceptLattice.from_context(ctx) + d = lattice.depth() + # At least 2 levels (top → bottom) + assert d >= 2 + + def test_depth_empty(self): + lattice = ConceptLattice(concepts=[]) + assert lattice.depth() == 0 + + +# ── Gap concepts ──────────────────────────────────────────────────── + + +class TestGapConcepts: + def test_animal_has_gap(self): + ctx = _animal_context() + gaps = find_gap_concepts(ctx) + # {fur, feathers} has no animal → gap concept + fur_feathers_gap = any( + {"fur", "feathers"} <= c.intent for c in gaps + ) + assert fur_feathers_gap + + def test_gap_extents_are_empty(self): + ctx = _animal_context() + gaps = find_gap_concepts(ctx) + for gap in gaps: + assert gap.extent_size == 0 + + def test_no_gaps_when_all_combinations_covered(self): + # Every attribute combination has at least one object + ctx = FormalContext.from_dict({ + "obj1": {"a", "b"}, + "obj2": {"a"}, + "obj3": {"b"}, + }) + lattice = ConceptLattice.from_context(ctx) + gaps = find_gap_concepts(ctx, lattice) + assert len(gaps) == 0 + + def test_sorted_by_intent_size(self): + ctx = _animal_context() + gaps = find_gap_concepts(ctx) + sizes = [g.intent_size for g in gaps] + assert sizes == sorted(sizes) + + def test_infospace_gap(self): + ctx = _infospace_context() + gaps = find_gap_concepts(ctx) + # domain:Exchange + vsm:S1 has no entity → should appear as gap + gap_intents = [g.intent for g in gaps] + exchange_s1_covered = any( + {"domain:Exchange", "vsm:S1"} <= intent for intent in gap_intents + ) + assert exchange_s1_covered + + +# ── Empty cells (cross-tab) ───────────────────────────────────────── + + +class TestFindEmptyCells: + def test_finds_empty_cells(self): + ctx = _infospace_context() + domains = ["domain:Production", "domain:Distribution", "domain:Exchange"] + vsm_systems = ["vsm:S1", "vsm:S3", "vsm:S4", "vsm:S5"] + empty = find_empty_cells(ctx, domains, vsm_systems) + # domain:Exchange + vsm:S1 should be empty + assert ("domain:Exchange", "vsm:S1") in empty + # domain:Production + vsm:S1 should NOT be empty (division-of-labour) + assert ("domain:Production", "vsm:S1") not in empty + + def test_all_filled_returns_empty_list(self): + ctx = FormalContext.from_dict({ + "a": {"x", "y"}, + "b": {"x", "z"}, + "c": {"y", "z"}, + "d": {"x", "y", "z"}, + }) + empty = find_empty_cells(ctx, ["x", "y"], ["z"]) + assert empty == [] + + def test_empty_context_all_cells_empty(self): + ctx = FormalContext([], ["a", "b", "c"], {}) + empty = find_empty_cells(ctx, ["a"], ["b", "c"]) + assert len(empty) == 2