feat(analysis): add Formal Concept Analysis for coverage gap detection (S1.7)

Pure-Python FCA implementation: FormalContext (entity × attribute
binary relation with extent/intent/closure), ConceptLattice via
NextClosure algorithm, find_gap_concepts() for structural coverage
gaps, and find_empty_cells() for cross-tabulation analysis.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-19 01:38:35 +01:00
parent f8c9ab33f0
commit dc22017b7c
2 changed files with 620 additions and 0 deletions

307
markitect/analysis/fca.py Normal file
View File

@@ -0,0 +1,307 @@
"""
Formal Concept Analysis (FCA) for coverage gap detection.
Provides a pure-Python implementation of:
- :class:`FormalContext` — entity × attribute binary relation with
extent/intent operations and double-prime closure.
- :class:`ConceptLattice` — the set of all formal concepts computed
via the NextClosure algorithm (Ganter, 1984).
- :func:`find_gap_concepts` — attribute combinations present in the
lattice whose extent is empty, revealing structural coverage gaps.
Sufficient for entity scales of ~100s. For larger contexts a library
such as ``concepts`` (PyPI) can be substituted.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Iterable, Optional
class FormalContext:
"""Binary relation between objects and attributes.
Args:
objects: Iterable of object identifiers (e.g. entity slugs).
attributes: Iterable of attribute identifiers (e.g. "domain:Production").
incidence: Mapping of object → set of attributes it possesses.
"""
def __init__(
self,
objects: Iterable[str],
attributes: Iterable[str],
incidence: dict[str, set[str]],
):
self._objects = sorted(set(objects))
self._attributes = sorted(set(attributes))
self._obj_set = frozenset(self._objects)
self._attr_set = frozenset(self._attributes)
# Normalise incidence: only keep known attributes
self._incidence: dict[str, frozenset[str]] = {}
for obj in self._objects:
raw = incidence.get(obj, set())
self._incidence[obj] = frozenset(raw) & self._attr_set
# Reverse index: attribute → set of objects that have it
self._attr_to_objs: dict[str, frozenset[str]] = {}
for attr in self._attributes:
self._attr_to_objs[attr] = frozenset(
obj for obj in self._objects if attr in self._incidence[obj]
)
@property
def objects(self) -> list[str]:
"""Sorted list of objects."""
return list(self._objects)
@property
def attributes(self) -> list[str]:
"""Sorted list of attributes."""
return list(self._attributes)
@property
def object_count(self) -> int:
return len(self._objects)
@property
def attribute_count(self) -> int:
return len(self._attributes)
def extent(self, attrs: Iterable[str]) -> frozenset[str]:
"""Objects possessing **all** given attributes (B' operation)."""
attr_set = frozenset(attrs)
if not attr_set:
return self._obj_set
result = self._obj_set
for attr in attr_set:
result = result & self._attr_to_objs.get(attr, frozenset())
return result
def intent(self, objs: Iterable[str]) -> frozenset[str]:
"""Attributes shared by **all** given objects (A' operation)."""
obj_list = [o for o in objs if o in self._incidence]
if not obj_list:
return self._attr_set
result = self._incidence[obj_list[0]]
for obj in obj_list[1:]:
result = result & self._incidence[obj]
return result
def closure(self, attrs: Iterable[str]) -> frozenset[str]:
"""Double-prime closure: B'' = intent(extent(B))."""
return self.intent(self.extent(attrs))
def has_attribute(self, obj: str, attr: str) -> bool:
"""Check if *obj* has *attr*."""
return attr in self._incidence.get(obj, frozenset())
def density(self) -> float:
"""Proportion of 1s in the incidence matrix."""
total = len(self._objects) * len(self._attributes)
if total == 0:
return 0.0
filled = sum(len(attrs) for attrs in self._incidence.values())
return filled / total
@classmethod
def from_dict(cls, entity_attributes: dict[str, set[str]]) -> FormalContext:
"""Convenience: build context from ``{object: {attr, ...}}``."""
objects = list(entity_attributes.keys())
all_attrs: set[str] = set()
for attrs in entity_attributes.values():
all_attrs.update(attrs)
return cls(objects, all_attrs, entity_attributes)
@dataclass(frozen=True)
class FormalConcept:
"""A formal concept (A, B) where A' = B and B' = A."""
extent: frozenset[str]
intent: frozenset[str]
@property
def extent_size(self) -> int:
return len(self.extent)
@property
def intent_size(self) -> int:
return len(self.intent)
@dataclass
class ConceptLattice:
"""The set of all formal concepts derived from a :class:`FormalContext`.
Concepts are ordered by extent inclusion (subconcept ≤ superconcept).
"""
concepts: list[FormalConcept] = field(default_factory=list)
@property
def size(self) -> int:
"""Number of formal concepts in the lattice."""
return len(self.concepts)
@property
def top(self) -> Optional[FormalConcept]:
"""Supremum: concept with largest extent."""
if not self.concepts:
return None
return max(self.concepts, key=lambda c: c.extent_size)
@property
def bottom(self) -> Optional[FormalConcept]:
"""Infimum: concept with largest intent."""
if not self.concepts:
return None
return max(self.concepts, key=lambda c: c.intent_size)
@classmethod
def from_context(cls, context: FormalContext) -> ConceptLattice:
"""Compute all formal concepts using the NextClosure algorithm."""
attrs = context.attributes # sorted, fixed order
if not attrs:
# Degenerate: no attributes → single concept with all objects
top = FormalConcept(
extent=frozenset(context.objects),
intent=frozenset(),
)
return cls(concepts=[top])
concepts: list[FormalConcept] = []
# Start with closure of empty attribute set
current = context.closure(frozenset())
ext = context.extent(current)
concepts.append(FormalConcept(extent=ext, intent=current))
while current != frozenset(attrs):
nxt = _next_closure(current, attrs, context.closure)
if nxt is None:
break
ext = context.extent(nxt)
concepts.append(FormalConcept(extent=ext, intent=nxt))
current = nxt
return cls(concepts=concepts)
def gap_concepts(self) -> list[FormalConcept]:
"""Formal concepts whose extent is empty."""
return [c for c in self.concepts if c.extent_size == 0]
def concepts_with_extent_size(self, min_size: int = 0, max_size: Optional[int] = None) -> list[FormalConcept]:
"""Filter concepts by extent size."""
result = [c for c in self.concepts if c.extent_size >= min_size]
if max_size is not None:
result = [c for c in result if c.extent_size <= max_size]
return result
def depth(self) -> int:
"""Longest chain length in the concept ordering.
A chain is a sequence of concepts c_1 < c_2 < ... < c_k
where < means strict subconcept (extent inclusion).
"""
if not self.concepts:
return 0
# Build DAG: concept i → j if i is direct subconcept of j
# Use extent inclusion: i < j iff extent_i ⊂ extent_j
n = len(self.concepts)
extents = [c.extent for c in self.concepts]
# Longest path via dynamic programming on sorted order
# Sort by extent size ascending (smaller extents = more specific)
order = sorted(range(n), key=lambda i: len(extents[i]))
longest = [1] * n
for idx in range(n):
i = order[idx]
for jdx in range(idx + 1, n):
j = order[jdx]
if extents[i] < extents[j]: # strict subset
if longest[j] < longest[i] + 1:
longest[j] = longest[i] + 1
return max(longest) if longest else 0
def find_gap_concepts(
context: FormalContext,
lattice: Optional[ConceptLattice] = None,
) -> list[FormalConcept]:
"""Find formal concepts with empty extent (coverage gaps).
These represent attribute combinations that are structurally
present in the lattice but have no corresponding entities.
Args:
context: The formal context.
lattice: Pre-computed lattice. If ``None``, computed from *context*.
Returns:
List of :class:`FormalConcept` with empty extent, sorted by
intent size ascending (most specific gaps first).
"""
if lattice is None:
lattice = ConceptLattice.from_context(context)
gaps = lattice.gap_concepts()
gaps.sort(key=lambda c: c.intent_size)
return gaps
def find_empty_cells(
context: FormalContext,
dimension_a: list[str],
dimension_b: list[str],
) -> list[tuple[str, str]]:
"""Find empty cells in a two-dimensional cross-tabulation.
Given two sets of attributes (e.g. domain values and VSM systems),
return pairs ``(attr_a, attr_b)`` where no object possesses both.
This is a simpler alternative to full FCA for two-dimensional
coverage analysis.
"""
empty: list[tuple[str, str]] = []
for a in sorted(dimension_a):
for b in sorted(dimension_b):
if not context.extent([a, b]):
empty.append((a, b))
return empty
# ── NextClosure internals ───────────────────────────────────────────
def _next_closure(
current: frozenset[str],
attrs: list[str],
closure_fn,
) -> Optional[frozenset[str]]:
"""Compute the next closed set in lectic order after *current*.
Implements Ganter's NextClosure algorithm.
"""
for i in range(len(attrs) - 1, -1, -1):
m = attrs[i]
if m in current:
current = current - {m}
else:
candidate = current | {m}
closed = closure_fn(candidate)
# Canonicity test: no attribute before position i
# was added by the closure
canonical = True
for j in range(i):
if attrs[j] in closed and attrs[j] not in candidate:
canonical = False
break
if canonical:
return closed
return None

View File

@@ -0,0 +1,313 @@
"""Tests for markitect.analysis.fca."""
import pytest
from markitect.analysis.fca import (
FormalContext,
FormalConcept,
ConceptLattice,
find_gap_concepts,
find_empty_cells,
)
# ── Test data ────────────────────────────────────────────────────────
def _animal_context():
"""Classic FCA example: animals × properties.
Context:
| animal | legs | wings | feathers | fur |
|-----------|------|-------|----------|-----|
| dog | x | | | x |
| cat | x | | | x |
| eagle | x | x | x | |
| sparrow | x | x | x | |
| penguin | x | | x | |
"""
return FormalContext(
objects=["dog", "cat", "eagle", "sparrow", "penguin"],
attributes=["legs", "wings", "feathers", "fur"],
incidence={
"dog": {"legs", "fur"},
"cat": {"legs", "fur"},
"eagle": {"legs", "wings", "feathers"},
"sparrow": {"legs", "wings", "feathers"},
"penguin": {"legs", "feathers"},
},
)
def _infospace_context():
"""Simplified infospace-style context: entities × {domain, vsm_system}.
Entities with domain and VSM classification, including a gap:
no entity has both domain:Exchange and vsm:S3.
"""
return FormalContext.from_dict({
"division-of-labour": {"domain:Production", "vsm:S1"},
"pin-factory": {"domain:Production", "vsm:S1"},
"market-extent": {"domain:Exchange", "vsm:S4"},
"wage-determination": {"domain:Distribution", "vsm:S3"},
"rent-theory": {"domain:Distribution", "vsm:S5"},
"capital-accumulation": {"domain:Production", "vsm:S3"},
})
def _empty_context():
"""Context with no objects."""
return FormalContext([], ["a", "b"], {})
def _single_entity():
"""Context with one object."""
return FormalContext(["only"], ["x", "y"], {"only": {"x", "y"}})
# ── FormalContext ────────────────────────────────────────────────────
class TestFormalContext:
def test_objects_sorted(self):
ctx = _animal_context()
assert ctx.objects == sorted(ctx.objects)
def test_attributes_sorted(self):
ctx = _animal_context()
assert ctx.attributes == sorted(ctx.attributes)
def test_object_count(self):
assert _animal_context().object_count == 5
def test_attribute_count(self):
assert _animal_context().attribute_count == 4
def test_extent_single_attr(self):
ctx = _animal_context()
assert ctx.extent(["fur"]) == frozenset({"dog", "cat"})
def test_extent_multiple_attrs(self):
ctx = _animal_context()
assert ctx.extent(["wings", "feathers"]) == frozenset({"eagle", "sparrow"})
def test_extent_empty_returns_all(self):
ctx = _animal_context()
assert ctx.extent([]) == frozenset(ctx.objects)
def test_extent_no_match(self):
ctx = _animal_context()
assert ctx.extent(["fur", "feathers"]) == frozenset()
def test_intent_single_obj(self):
ctx = _animal_context()
assert ctx.intent(["penguin"]) == frozenset({"legs", "feathers"})
def test_intent_multiple_objs(self):
ctx = _animal_context()
# dog and cat share: legs, fur
assert ctx.intent(["dog", "cat"]) == frozenset({"legs", "fur"})
def test_intent_empty_returns_all(self):
ctx = _animal_context()
assert ctx.intent([]) == frozenset(ctx.attributes)
def test_closure_is_idempotent(self):
ctx = _animal_context()
c1 = ctx.closure({"fur"})
c2 = ctx.closure(c1)
assert c1 == c2
def test_closure_expands(self):
ctx = _animal_context()
# fur → {dog, cat} → {legs, fur} (both have legs too)
assert ctx.closure({"fur"}) == frozenset({"legs", "fur"})
def test_has_attribute(self):
ctx = _animal_context()
assert ctx.has_attribute("dog", "legs") is True
assert ctx.has_attribute("dog", "wings") is False
def test_density(self):
ctx = _animal_context()
# 5 objects × 4 attributes = 20 cells
# dog:2, cat:2, eagle:3, sparrow:3, penguin:2 = 12 filled
assert ctx.density() == pytest.approx(12 / 20)
def test_density_empty(self):
assert FormalContext([], [], {}).density() == 0.0
def test_from_dict(self):
ctx = FormalContext.from_dict({
"a": {"x", "y"},
"b": {"y", "z"},
})
assert ctx.object_count == 2
assert ctx.attribute_count == 3
def test_unknown_attributes_ignored(self):
ctx = FormalContext(
["a"], ["x"], {"a": {"x", "unknown"}}
)
assert ctx.intent(["a"]) == frozenset({"x"})
# ── ConceptLattice ──────────────────────────────────────────────────
class TestConceptLattice:
def test_animal_concept_count(self):
ctx = _animal_context()
lattice = ConceptLattice.from_context(ctx)
# Known: the animal context produces exactly 7 formal concepts
# Top: ({all}, {legs}), Bottom: ({}, {all 4}),
# plus intermediate concepts
assert lattice.size >= 5
def test_top_has_all_objects(self):
ctx = _animal_context()
lattice = ConceptLattice.from_context(ctx)
top = lattice.top
assert top is not None
assert top.extent == frozenset(ctx.objects)
def test_top_intent_is_common_attributes(self):
ctx = _animal_context()
lattice = ConceptLattice.from_context(ctx)
top = lattice.top
# All animals have "legs"
assert "legs" in top.intent
def test_bottom_has_all_attributes(self):
ctx = _animal_context()
lattice = ConceptLattice.from_context(ctx)
bottom = lattice.bottom
assert bottom is not None
assert bottom.intent == frozenset(ctx.attributes)
def test_bottom_extent_empty_when_no_universal_object(self):
ctx = _animal_context()
lattice = ConceptLattice.from_context(ctx)
bottom = lattice.bottom
# No animal has all 4 attributes
assert bottom.extent_size == 0
def test_all_concepts_are_closed(self):
ctx = _animal_context()
lattice = ConceptLattice.from_context(ctx)
for concept in lattice.concepts:
# intent should be closed: closure(intent) == intent
assert ctx.closure(concept.intent) == concept.intent
# extent' should equal intent
assert ctx.intent(concept.extent) == concept.intent
# intent' should equal extent
assert ctx.extent(concept.intent) == concept.extent
def test_empty_context(self):
ctx = _empty_context()
lattice = ConceptLattice.from_context(ctx)
# Empty context → gap concepts for all attribute combinations
assert lattice.size >= 1
def test_single_entity(self):
ctx = _single_entity()
lattice = ConceptLattice.from_context(ctx)
# At least 1 concept containing the single entity
has_entity = any(
"only" in c.extent for c in lattice.concepts
)
assert has_entity
def test_no_attributes_produces_one_concept(self):
ctx = FormalContext(["a", "b"], [], {})
lattice = ConceptLattice.from_context(ctx)
assert lattice.size == 1
assert lattice.concepts[0].extent == frozenset({"a", "b"})
def test_depth(self):
ctx = _animal_context()
lattice = ConceptLattice.from_context(ctx)
d = lattice.depth()
# At least 2 levels (top → bottom)
assert d >= 2
def test_depth_empty(self):
lattice = ConceptLattice(concepts=[])
assert lattice.depth() == 0
# ── Gap concepts ────────────────────────────────────────────────────
class TestGapConcepts:
def test_animal_has_gap(self):
ctx = _animal_context()
gaps = find_gap_concepts(ctx)
# {fur, feathers} has no animal → gap concept
fur_feathers_gap = any(
{"fur", "feathers"} <= c.intent for c in gaps
)
assert fur_feathers_gap
def test_gap_extents_are_empty(self):
ctx = _animal_context()
gaps = find_gap_concepts(ctx)
for gap in gaps:
assert gap.extent_size == 0
def test_no_gaps_when_all_combinations_covered(self):
# Every attribute combination has at least one object
ctx = FormalContext.from_dict({
"obj1": {"a", "b"},
"obj2": {"a"},
"obj3": {"b"},
})
lattice = ConceptLattice.from_context(ctx)
gaps = find_gap_concepts(ctx, lattice)
assert len(gaps) == 0
def test_sorted_by_intent_size(self):
ctx = _animal_context()
gaps = find_gap_concepts(ctx)
sizes = [g.intent_size for g in gaps]
assert sizes == sorted(sizes)
def test_infospace_gap(self):
ctx = _infospace_context()
gaps = find_gap_concepts(ctx)
# domain:Exchange + vsm:S1 has no entity → should appear as gap
gap_intents = [g.intent for g in gaps]
exchange_s1_covered = any(
{"domain:Exchange", "vsm:S1"} <= intent for intent in gap_intents
)
assert exchange_s1_covered
# ── Empty cells (cross-tab) ─────────────────────────────────────────
class TestFindEmptyCells:
def test_finds_empty_cells(self):
ctx = _infospace_context()
domains = ["domain:Production", "domain:Distribution", "domain:Exchange"]
vsm_systems = ["vsm:S1", "vsm:S3", "vsm:S4", "vsm:S5"]
empty = find_empty_cells(ctx, domains, vsm_systems)
# domain:Exchange + vsm:S1 should be empty
assert ("domain:Exchange", "vsm:S1") in empty
# domain:Production + vsm:S1 should NOT be empty (division-of-labour)
assert ("domain:Production", "vsm:S1") not in empty
def test_all_filled_returns_empty_list(self):
ctx = FormalContext.from_dict({
"a": {"x", "y"},
"b": {"x", "z"},
"c": {"y", "z"},
"d": {"x", "y", "z"},
})
empty = find_empty_cells(ctx, ["x", "y"], ["z"])
assert empty == []
def test_empty_context_all_cells_empty(self):
ctx = FormalContext([], ["a", "b", "c"], {})
empty = find_empty_cells(ctx, ["a"], ["b", "c"])
assert len(empty) == 2