chore(consistency): sync task status from DB [auto]

Updated by fix-consistency on 2026-05-15:
  - update .custodian-brief.md for repo-scoping
This commit is contained in:
2026-05-15 21:14:21 +02:00
parent f38ed6847c
commit 084159e51c
42 changed files with 5 additions and 5 deletions

View File

@@ -0,0 +1,11 @@
from repo_registry.semantic.embeddings import (
EmbeddingProvider,
HashingEmbeddingProvider,
cosine_similarity,
)
__all__ = [
"EmbeddingProvider",
"HashingEmbeddingProvider",
"cosine_similarity",
]

View File

@@ -0,0 +1,58 @@
from __future__ import annotations
import hashlib
import math
import re
from typing import Protocol
class EmbeddingProvider(Protocol):
name: str
def embed(self, text: str) -> list[float]:
"""Return a deterministic vector for the supplied text."""
class HashingEmbeddingProvider:
"""Offline test provider using hashed token buckets.
This is intentionally simple: it gives tests and local development a stable
semantic path without depending on an external model service.
"""
name = "hashing-v1"
def __init__(self, dimensions: int = 64) -> None:
self.dimensions = dimensions
def embed(self, text: str) -> list[float]:
vector = [0.0] * self.dimensions
for token in _tokens(text):
digest = hashlib.sha256(token.encode("utf-8")).digest()
index = int.from_bytes(digest[:2], "big") % self.dimensions
sign = 1.0 if digest[2] % 2 == 0 else -1.0
vector[index] += sign
norm = math.sqrt(sum(value * value for value in vector))
if norm == 0:
return vector
return [value / norm for value in vector]
def cosine_similarity(left: list[float], right: list[float]) -> float:
if not left or not right or len(left) != len(right):
return 0.0
return sum(a * b for a, b in zip(left, right, strict=True))
def _tokens(text: str) -> list[str]:
tokens = []
for token in re.findall(r"[A-Za-z0-9]+", text.lower()):
tokens.append(_stem(token))
return tokens
def _stem(token: str) -> str:
for suffix in ("ing", "ed", "es", "s"):
if len(token) > len(suffix) + 3 and token.endswith(suffix):
return token[: -len(suffix)]
return token