generated from coulomb/repo-seed
session-memory Phase 1: Detect pipeline (T04-T07)
- detect/signals.py: pure extractors over digests (retry storm, repeated errors, budget overrun vs corpus p90, abandoned, clean pass, recovery) - detect/cluster.py: deterministic clustering into candidate Patterns with evidence (sessions/repos/flavors/cost impact) + cross-flavor flagging - detect/__main__.py: python -m session_memory.detect, ranked report (cross-flavor first) + --json; persists candidates to Tier 2 patterns table - core/store.py: list_digests + save_patterns - tests for signals, cluster, detect entrypoint Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
78
session_memory/detect/cluster.py
Normal file
78
session_memory/detect/cluster.py
Normal file
@@ -0,0 +1,78 @@
|
||||
"""Pattern clusterer + evidence (PRD §5, §6.2; T05/T06).
|
||||
|
||||
Groups recurring :class:`Signal`s into candidate ``Pattern`` records. Clustering
|
||||
is deterministic and keyed on ``(polarity, signal-type, locus)`` — enough to
|
||||
surface "the same thing keeps happening" without embeddings (a later option).
|
||||
|
||||
Each candidate carries evidence (FR-D3): supporting sessions, frequency, affected
|
||||
repos, affected **flavors**, and an estimated cost-impact score. Candidates whose
|
||||
evidence spans more than one flavor are flagged ``cross_flavor`` (FR-D4) — the
|
||||
highest-value reuse targets.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import collections
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from .signals import PROBLEM, Signal
|
||||
|
||||
|
||||
@dataclass
|
||||
class Pattern:
|
||||
key: str # stable cluster key
|
||||
polarity: str # problem | success
|
||||
signal_type: str
|
||||
locus: str
|
||||
frequency: int # number of supporting signals
|
||||
sessions: list[str] = field(default_factory=list)
|
||||
repos: list[str] = field(default_factory=list)
|
||||
flavors: list[str] = field(default_factory=list)
|
||||
cross_flavor: bool = False
|
||||
cost_impact: float = 0.0 # frequency-weighted magnitude
|
||||
score: float = 0.0 # ranking score (impact x frequency)
|
||||
title: str = ""
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
def _key(s: Signal) -> str:
|
||||
return f"{s.polarity}:{s.type}:{s.locus}"
|
||||
|
||||
|
||||
def _title(polarity: str, signal_type: str, n_flavors: int) -> str:
|
||||
scope = "cross-flavor " if n_flavors > 1 else ""
|
||||
verb = "problem" if polarity == PROBLEM else "success"
|
||||
return f"{scope}{verb}: {signal_type.replace('_', ' ')}"
|
||||
|
||||
|
||||
def cluster(signals: list[Signal], *, min_frequency: int = 2) -> list[Pattern]:
|
||||
"""Group signals into candidate patterns; keep clusters >= min_frequency."""
|
||||
groups: dict[str, list[Signal]] = collections.defaultdict(list)
|
||||
for s in signals:
|
||||
groups[_key(s)].append(s)
|
||||
|
||||
patterns: list[Pattern] = []
|
||||
for key, members in groups.items():
|
||||
if len(members) < min_frequency:
|
||||
continue
|
||||
sessions = sorted({m.session_uid for m in members})
|
||||
repos = sorted({m.repo for m in members if m.repo})
|
||||
flavors = sorted({m.flavor for m in members})
|
||||
cost_impact = sum(m.magnitude for m in members)
|
||||
first = members[0]
|
||||
p = Pattern(
|
||||
key=key, polarity=first.polarity, signal_type=first.type, locus=first.locus,
|
||||
frequency=len(members), sessions=sessions, repos=repos, flavors=flavors,
|
||||
cross_flavor=len(flavors) > 1, cost_impact=round(cost_impact, 3),
|
||||
title=_title(first.polarity, first.type, len(flavors)),
|
||||
)
|
||||
# rank: impact x frequency, with a boost for cross-flavor reuse value
|
||||
p.score = round(p.cost_impact * p.frequency * (1.5 if p.cross_flavor else 1.0), 3)
|
||||
patterns.append(p)
|
||||
|
||||
# cross-flavor first, then by score
|
||||
patterns.sort(key=lambda p: (not p.cross_flavor, -p.score))
|
||||
return patterns
|
||||
Reference in New Issue
Block a user