Files
repo-scoping/src/repo_registry/storage/sqlite.py

2198 lines
78 KiB
Python

from __future__ import annotations
import json
import sqlite3
from pathlib import Path
from repo_registry.core.models import (
Ability,
AbilitySummary,
AnalysisRun,
CandidateAbility,
CandidateCapability,
CandidateEvidence,
CandidateFeature,
CandidateGraph,
Capability,
CapabilitySummary,
ContentChunk,
Evidence,
Feature,
ObservedFact,
Repository,
RepositoryAbilityMap,
RepositorySnapshot,
ReviewDecision,
SearchResult,
SourceReference,
confidence_label,
)
from repo_registry.content_indexing.extractor import ContentChunkCandidate
from repo_registry.candidate_graph.generator import CandidateAbilityDraft
from repo_registry.repo_scanning.scanner import FactCandidate, ScanResult
class NotFoundError(ValueError):
pass
class RegistryStore:
def __init__(self, database_path: str | Path) -> None:
self.database_path = str(database_path)
def initialize(self) -> None:
migration_path = Path(__file__).parents[3] / "migrations" / "0001_initial.sql"
with self.connect() as connection:
connection.executescript(migration_path.read_text(encoding="utf-8"))
self._ensure_content_chunks_table(connection)
self._ensure_approved_source_ref_columns(connection)
def connect(self) -> sqlite3.Connection:
connection = sqlite3.connect(self.database_path)
connection.row_factory = sqlite3.Row
connection.execute("PRAGMA foreign_keys = ON")
return connection
def _ensure_approved_source_ref_columns(
self,
connection: sqlite3.Connection,
) -> None:
for table in ("approved_features", "approved_evidence"):
columns = {
row["name"]
for row in connection.execute(f"PRAGMA table_info({table})").fetchall()
}
if "source_refs" not in columns:
connection.execute(
f"ALTER TABLE {table} ADD COLUMN source_refs TEXT NOT NULL DEFAULT '[]'"
)
def _ensure_content_chunks_table(self, connection: sqlite3.Connection) -> None:
connection.execute(
"""
CREATE TABLE IF NOT EXISTS content_chunks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
repository_id INTEGER NOT NULL REFERENCES repositories(id) ON DELETE CASCADE,
analysis_run_id INTEGER NOT NULL REFERENCES analysis_runs(id) ON DELETE CASCADE,
snapshot_id INTEGER REFERENCES repository_snapshots(id) ON DELETE CASCADE,
path TEXT NOT NULL,
kind TEXT NOT NULL,
start_line INTEGER NOT NULL,
end_line INTEGER NOT NULL,
text TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
)
"""
)
connection.execute(
"CREATE INDEX IF NOT EXISTS idx_content_chunks_repository ON content_chunks(repository_id)"
)
connection.execute(
"CREATE INDEX IF NOT EXISTS idx_content_chunks_run ON content_chunks(analysis_run_id)"
)
def create_repository(
self,
*,
name: str,
url: str,
description: str | None,
branch: str,
) -> Repository:
with self.connect() as connection:
cursor = connection.execute(
"""
INSERT INTO repositories (name, url, description, branch)
VALUES (?, ?, ?, ?)
""",
(name, url, description, branch),
)
repository_id = int(cursor.lastrowid)
return self.get_repository(repository_id)
def update_repository(
self,
repository_id: int,
*,
name: str | None = None,
description: str | None = None,
branch: str | None = None,
) -> Repository:
self.get_repository(repository_id)
assignments: list[str] = []
values: list[str | int | None] = []
if name is not None:
assignments.append("name = ?")
values.append(name)
if description is not None:
assignments.append("description = ?")
values.append(description)
if branch is not None:
assignments.append("branch = ?")
values.append(branch)
if not assignments:
return self.get_repository(repository_id)
values.append(repository_id)
with self.connect() as connection:
cursor = connection.execute(
f"""
UPDATE repositories
SET {", ".join(assignments)}, updated_at = CURRENT_TIMESTAMP
WHERE id = ?
""",
values,
)
if cursor.rowcount == 0:
raise NotFoundError(f"repository {repository_id} was not found")
return self.get_repository(repository_id)
def delete_repository(self, repository_id: int) -> None:
with self.connect() as connection:
cursor = connection.execute(
"DELETE FROM repositories WHERE id = ?",
(repository_id,),
)
if cursor.rowcount == 0:
raise NotFoundError(f"repository {repository_id} was not found")
def update_repository_status(self, repository_id: int, status: str) -> None:
with self.connect() as connection:
cursor = connection.execute(
"""
UPDATE repositories
SET status = ?, updated_at = CURRENT_TIMESTAMP
WHERE id = ?
""",
(status, repository_id),
)
if cursor.rowcount == 0:
raise NotFoundError(f"repository {repository_id} was not found")
def list_repositories(self) -> list[Repository]:
with self.connect() as connection:
rows = connection.execute(
"""
SELECT id, name, url, description, branch, status
FROM repositories
ORDER BY created_at DESC, id DESC
"""
).fetchall()
return [self._repository_from_row(row) for row in rows]
def get_repository(self, repository_id: int) -> Repository:
with self.connect() as connection:
row = connection.execute(
"""
SELECT id, name, url, description, branch, status
FROM repositories
WHERE id = ?
""",
(repository_id,),
).fetchone()
if row is None:
raise NotFoundError(f"repository {repository_id} was not found")
return self._repository_from_row(row)
def create_analysis_run(self, repository_id: int) -> AnalysisRun:
self.get_repository(repository_id)
with self.connect() as connection:
cursor = connection.execute(
"""
INSERT INTO analysis_runs (repository_id, status)
VALUES (?, 'running')
""",
(repository_id,),
)
analysis_run_id = int(cursor.lastrowid)
return self.get_analysis_run(repository_id, analysis_run_id)
def complete_analysis_run(
self,
repository_id: int,
analysis_run_id: int,
scan_result: ScanResult,
) -> AnalysisRun:
with self.connect() as connection:
snapshot_cursor = connection.execute(
"""
INSERT INTO repository_snapshots
(repository_id, commit_hash, branch, source_path, file_count)
VALUES (?, ?, ?, ?, ?)
""",
(
repository_id,
scan_result.commit_hash,
scan_result.branch,
scan_result.source_path,
scan_result.file_count,
),
)
snapshot_id = int(snapshot_cursor.lastrowid)
self._insert_facts(
connection,
repository_id=repository_id,
analysis_run_id=analysis_run_id,
snapshot_id=snapshot_id,
facts=scan_result.facts,
)
connection.execute(
"""
UPDATE analysis_runs
SET status = 'completed',
snapshot_id = ?,
completed_at = CURRENT_TIMESTAMP,
error_message = NULL
WHERE id = ? AND repository_id = ?
""",
(snapshot_id, analysis_run_id, repository_id),
)
connection.execute(
"""
UPDATE repositories
SET status = 'analyzed', updated_at = CURRENT_TIMESTAMP
WHERE id = ?
""",
(repository_id,),
)
return self.get_analysis_run(repository_id, analysis_run_id)
def replace_candidate_graph(
self,
repository_id: int,
analysis_run_id: int,
abilities: list[CandidateAbilityDraft],
) -> None:
with self.connect() as connection:
connection.execute(
"DELETE FROM candidate_abilities WHERE analysis_run_id = ?",
(analysis_run_id,),
)
for ability in abilities:
ability_cursor = connection.execute(
"""
INSERT INTO candidate_abilities
(repository_id, analysis_run_id, name, description, confidence, source_refs)
VALUES (?, ?, ?, ?, ?, ?)
""",
(
repository_id,
analysis_run_id,
ability.name,
ability.description,
ability.confidence,
self._source_refs_to_json(ability.source_refs),
),
)
ability_id = int(ability_cursor.lastrowid)
for capability in ability.capabilities:
capability_cursor = connection.execute(
"""
INSERT INTO candidate_capabilities
(repository_id, analysis_run_id, ability_id, name, description,
inputs, outputs, confidence, source_refs)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
repository_id,
analysis_run_id,
ability_id,
capability.name,
capability.description,
json.dumps(capability.inputs),
json.dumps(capability.outputs),
capability.confidence,
self._source_refs_to_json(capability.source_refs),
),
)
capability_id = int(capability_cursor.lastrowid)
for feature in capability.features:
connection.execute(
"""
INSERT INTO candidate_features
(repository_id, analysis_run_id, capability_id, name, type,
location, confidence, source_refs)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""",
(
repository_id,
analysis_run_id,
capability_id,
feature.name,
feature.type,
feature.location,
feature.confidence,
self._source_refs_to_json(feature.source_refs),
),
)
for evidence in capability.evidence:
connection.execute(
"""
INSERT INTO candidate_evidence
(repository_id, analysis_run_id, capability_id, type,
reference, strength, source_refs)
VALUES (?, ?, ?, ?, ?, ?, ?)
""",
(
repository_id,
analysis_run_id,
capability_id,
evidence.type,
evidence.reference,
evidence.strength,
self._source_refs_to_json(evidence.source_refs),
),
)
def get_candidate_graph(
self,
repository_id: int,
analysis_run_id: int,
) -> CandidateGraph:
repository = self.get_repository(repository_id)
analysis_run = self.get_analysis_run(repository_id, analysis_run_id)
with self.connect() as connection:
ability_rows = connection.execute(
"""
SELECT id, name, description, confidence, status, source_refs
FROM candidate_abilities
WHERE repository_id = ? AND analysis_run_id = ?
ORDER BY id
""",
(repository_id, analysis_run_id),
).fetchall()
capability_rows = connection.execute(
"""
SELECT id, ability_id, name, description, inputs, outputs,
confidence, status, source_refs
FROM candidate_capabilities
WHERE repository_id = ? AND analysis_run_id = ?
ORDER BY id
""",
(repository_id, analysis_run_id),
).fetchall()
feature_rows = connection.execute(
"""
SELECT id, capability_id, name, type, location, confidence,
status, source_refs
FROM candidate_features
WHERE repository_id = ? AND analysis_run_id = ?
ORDER BY id
""",
(repository_id, analysis_run_id),
).fetchall()
evidence_rows = connection.execute(
"""
SELECT id, capability_id, type, reference, strength, status, source_refs
FROM candidate_evidence
WHERE repository_id = ? AND analysis_run_id = ?
ORDER BY id
""",
(repository_id, analysis_run_id),
).fetchall()
features_by_capability: dict[int, list[CandidateFeature]] = {}
for row in feature_rows:
features_by_capability.setdefault(row["capability_id"], []).append(
CandidateFeature(
id=row["id"],
name=row["name"],
type=row["type"],
location=row["location"],
confidence=row["confidence"],
status=row["status"],
source_refs=self._source_refs_from_json(row["source_refs"]),
confidence_label=confidence_label(row["confidence"]),
)
)
evidence_by_capability: dict[int, list[CandidateEvidence]] = {}
for row in evidence_rows:
evidence_by_capability.setdefault(row["capability_id"], []).append(
CandidateEvidence(
id=row["id"],
type=row["type"],
reference=row["reference"],
strength=row["strength"],
status=row["status"],
source_refs=self._source_refs_from_json(row["source_refs"]),
)
)
capabilities_by_ability: dict[int, list[CandidateCapability]] = {}
for row in capability_rows:
capabilities_by_ability.setdefault(row["ability_id"], []).append(
CandidateCapability(
id=row["id"],
name=row["name"],
description=row["description"],
inputs=json.loads(row["inputs"]),
outputs=json.loads(row["outputs"]),
confidence=row["confidence"],
status=row["status"],
source_refs=self._source_refs_from_json(row["source_refs"]),
confidence_label=confidence_label(row["confidence"]),
features=features_by_capability.get(row["id"], []),
evidence=evidence_by_capability.get(row["id"], []),
)
)
abilities = [
CandidateAbility(
id=row["id"],
name=row["name"],
description=row["description"],
confidence=row["confidence"],
status=row["status"],
source_refs=self._source_refs_from_json(row["source_refs"]),
confidence_label=confidence_label(row["confidence"]),
capabilities=capabilities_by_ability.get(row["id"], []),
)
for row in ability_rows
]
return CandidateGraph(
repository=repository,
analysis_run=analysis_run,
abilities=abilities,
)
def mark_candidate_graph_status(
self,
repository_id: int,
analysis_run_id: int,
status: str,
) -> None:
with self.connect() as connection:
for table in (
"candidate_abilities",
"candidate_capabilities",
"candidate_features",
"candidate_evidence",
):
connection.execute(
f"""
UPDATE {table}
SET status = ?
WHERE repository_id = ? AND analysis_run_id = ?
""",
(status, repository_id, analysis_run_id),
)
def reject_candidate_ability(
self,
repository_id: int,
analysis_run_id: int,
candidate_ability_id: int,
) -> None:
with self.connect() as connection:
ability_cursor = connection.execute(
"""
UPDATE candidate_abilities
SET status = 'rejected'
WHERE id = ?
AND repository_id = ?
AND analysis_run_id = ?
AND status = 'candidate'
""",
(candidate_ability_id, repository_id, analysis_run_id),
)
if ability_cursor.rowcount == 0:
raise NotFoundError(
"candidate ability "
f"{candidate_ability_id} was not found for repository "
f"{repository_id} analysis run {analysis_run_id}"
)
capability_rows = connection.execute(
"""
SELECT id FROM candidate_capabilities
WHERE ability_id = ? AND repository_id = ? AND analysis_run_id = ?
""",
(candidate_ability_id, repository_id, analysis_run_id),
).fetchall()
capability_ids = [row["id"] for row in capability_rows]
connection.execute(
"""
UPDATE candidate_capabilities
SET status = 'rejected'
WHERE ability_id = ? AND repository_id = ? AND analysis_run_id = ?
""",
(candidate_ability_id, repository_id, analysis_run_id),
)
for capability_id in capability_ids:
connection.execute(
"""
UPDATE candidate_features
SET status = 'rejected'
WHERE capability_id = ? AND repository_id = ? AND analysis_run_id = ?
""",
(capability_id, repository_id, analysis_run_id),
)
connection.execute(
"""
UPDATE candidate_evidence
SET status = 'rejected'
WHERE capability_id = ? AND repository_id = ? AND analysis_run_id = ?
""",
(capability_id, repository_id, analysis_run_id),
)
def reject_candidate_capability(
self,
repository_id: int,
analysis_run_id: int,
candidate_capability_id: int,
) -> None:
with self.connect() as connection:
cursor = connection.execute(
"""
UPDATE candidate_capabilities
SET status = 'rejected'
WHERE id = ?
AND repository_id = ?
AND analysis_run_id = ?
AND status = 'candidate'
""",
(candidate_capability_id, repository_id, analysis_run_id),
)
if cursor.rowcount == 0:
raise NotFoundError(
"candidate capability "
f"{candidate_capability_id} was not found for repository "
f"{repository_id} analysis run {analysis_run_id}"
)
connection.execute(
"""
UPDATE candidate_features
SET status = 'rejected'
WHERE capability_id = ? AND repository_id = ? AND analysis_run_id = ?
""",
(candidate_capability_id, repository_id, analysis_run_id),
)
connection.execute(
"""
UPDATE candidate_evidence
SET status = 'rejected'
WHERE capability_id = ? AND repository_id = ? AND analysis_run_id = ?
""",
(candidate_capability_id, repository_id, analysis_run_id),
)
def reject_candidate_feature(
self,
repository_id: int,
analysis_run_id: int,
candidate_feature_id: int,
) -> None:
self._reject_candidate_leaf(
table="candidate_features",
label="candidate feature",
repository_id=repository_id,
analysis_run_id=analysis_run_id,
candidate_id=candidate_feature_id,
)
def reject_candidate_evidence(
self,
repository_id: int,
analysis_run_id: int,
candidate_evidence_id: int,
) -> None:
self._reject_candidate_leaf(
table="candidate_evidence",
label="candidate evidence",
repository_id=repository_id,
analysis_run_id=analysis_run_id,
candidate_id=candidate_evidence_id,
)
def update_candidate_ability(
self,
repository_id: int,
analysis_run_id: int,
candidate_ability_id: int,
*,
name: str,
description: str,
confidence: float,
) -> None:
with self.connect() as connection:
cursor = connection.execute(
"""
UPDATE candidate_abilities
SET name = ?, description = ?, confidence = ?
WHERE id = ? AND repository_id = ? AND analysis_run_id = ?
""",
(
name,
description,
confidence,
candidate_ability_id,
repository_id,
analysis_run_id,
),
)
if cursor.rowcount == 0:
raise NotFoundError(
"candidate ability "
f"{candidate_ability_id} was not found for repository "
f"{repository_id} analysis run {analysis_run_id}"
)
def update_candidate_capability(
self,
repository_id: int,
analysis_run_id: int,
candidate_capability_id: int,
*,
name: str,
description: str,
confidence: float,
) -> None:
with self.connect() as connection:
cursor = connection.execute(
"""
UPDATE candidate_capabilities
SET name = ?, description = ?, confidence = ?
WHERE id = ? AND repository_id = ? AND analysis_run_id = ?
""",
(
name,
description,
confidence,
candidate_capability_id,
repository_id,
analysis_run_id,
),
)
if cursor.rowcount == 0:
raise NotFoundError(
"candidate capability "
f"{candidate_capability_id} was not found for repository "
f"{repository_id} analysis run {analysis_run_id}"
)
def relink_candidate_capability(
self,
repository_id: int,
analysis_run_id: int,
candidate_capability_id: int,
target_ability_id: int,
) -> None:
self._ensure_candidate_row(
table="candidate_abilities",
label="target candidate ability",
repository_id=repository_id,
analysis_run_id=analysis_run_id,
candidate_id=target_ability_id,
)
with self.connect() as connection:
cursor = connection.execute(
"""
UPDATE candidate_capabilities
SET ability_id = ?
WHERE id = ? AND repository_id = ? AND analysis_run_id = ?
""",
(
target_ability_id,
candidate_capability_id,
repository_id,
analysis_run_id,
),
)
if cursor.rowcount == 0:
raise NotFoundError(
"candidate capability "
f"{candidate_capability_id} was not found for repository "
f"{repository_id} analysis run {analysis_run_id}"
)
def relink_candidate_feature(
self,
repository_id: int,
analysis_run_id: int,
candidate_feature_id: int,
target_capability_id: int,
) -> None:
self._relink_candidate_leaf(
table="candidate_features",
label="candidate feature",
repository_id=repository_id,
analysis_run_id=analysis_run_id,
candidate_id=candidate_feature_id,
target_capability_id=target_capability_id,
)
def relink_candidate_evidence(
self,
repository_id: int,
analysis_run_id: int,
candidate_evidence_id: int,
target_capability_id: int,
) -> None:
self._relink_candidate_leaf(
table="candidate_evidence",
label="candidate evidence",
repository_id=repository_id,
analysis_run_id=analysis_run_id,
candidate_id=candidate_evidence_id,
target_capability_id=target_capability_id,
)
def merge_candidate_ability(
self,
repository_id: int,
analysis_run_id: int,
source_ability_id: int,
target_ability_id: int,
) -> None:
if source_ability_id == target_ability_id:
raise ValueError("source and target candidate ability must be different")
self._ensure_candidate_row(
table="candidate_abilities",
label="target candidate ability",
repository_id=repository_id,
analysis_run_id=analysis_run_id,
candidate_id=target_ability_id,
)
with self.connect() as connection:
cursor = connection.execute(
"""
UPDATE candidate_abilities
SET status = 'merged'
WHERE id = ?
AND repository_id = ?
AND analysis_run_id = ?
AND status = 'candidate'
""",
(source_ability_id, repository_id, analysis_run_id),
)
if cursor.rowcount == 0:
raise NotFoundError(
"source candidate ability "
f"{source_ability_id} was not found for repository "
f"{repository_id} analysis run {analysis_run_id}"
)
connection.execute(
"""
UPDATE candidate_capabilities
SET ability_id = ?
WHERE ability_id = ? AND repository_id = ? AND analysis_run_id = ?
""",
(target_ability_id, source_ability_id, repository_id, analysis_run_id),
)
def merge_candidate_capability(
self,
repository_id: int,
analysis_run_id: int,
source_capability_id: int,
target_capability_id: int,
) -> None:
if source_capability_id == target_capability_id:
raise ValueError("source and target candidate capability must be different")
self._ensure_candidate_row(
table="candidate_capabilities",
label="target candidate capability",
repository_id=repository_id,
analysis_run_id=analysis_run_id,
candidate_id=target_capability_id,
)
with self.connect() as connection:
cursor = connection.execute(
"""
UPDATE candidate_capabilities
SET status = 'merged'
WHERE id = ?
AND repository_id = ?
AND analysis_run_id = ?
AND status = 'candidate'
""",
(source_capability_id, repository_id, analysis_run_id),
)
if cursor.rowcount == 0:
raise NotFoundError(
"source candidate capability "
f"{source_capability_id} was not found for repository "
f"{repository_id} analysis run {analysis_run_id}"
)
for table in ("candidate_features", "candidate_evidence"):
connection.execute(
f"""
UPDATE {table}
SET capability_id = ?
WHERE capability_id = ?
AND repository_id = ?
AND analysis_run_id = ?
""",
(
target_capability_id,
source_capability_id,
repository_id,
analysis_run_id,
),
)
def merge_candidate_feature(
self,
repository_id: int,
analysis_run_id: int,
source_feature_id: int,
target_feature_id: int,
) -> None:
self._merge_candidate_leaf(
table="candidate_features",
label="candidate feature",
repository_id=repository_id,
analysis_run_id=analysis_run_id,
source_id=source_feature_id,
target_id=target_feature_id,
)
def merge_candidate_evidence(
self,
repository_id: int,
analysis_run_id: int,
source_evidence_id: int,
target_evidence_id: int,
) -> None:
self._merge_candidate_leaf(
table="candidate_evidence",
label="candidate evidence",
repository_id=repository_id,
analysis_run_id=analysis_run_id,
source_id=source_evidence_id,
target_id=target_evidence_id,
)
def _ensure_candidate_row(
self,
*,
table: str,
label: str,
repository_id: int,
analysis_run_id: int,
candidate_id: int,
) -> None:
with self.connect() as connection:
row = connection.execute(
f"""
SELECT id FROM {table}
WHERE id = ? AND repository_id = ? AND analysis_run_id = ?
""",
(candidate_id, repository_id, analysis_run_id),
).fetchone()
if row is None:
raise NotFoundError(
f"{label} {candidate_id} was not found for repository "
f"{repository_id} analysis run {analysis_run_id}"
)
def _relink_candidate_leaf(
self,
*,
table: str,
label: str,
repository_id: int,
analysis_run_id: int,
candidate_id: int,
target_capability_id: int,
) -> None:
self._ensure_candidate_row(
table="candidate_capabilities",
label="target candidate capability",
repository_id=repository_id,
analysis_run_id=analysis_run_id,
candidate_id=target_capability_id,
)
with self.connect() as connection:
cursor = connection.execute(
f"""
UPDATE {table}
SET capability_id = ?
WHERE id = ? AND repository_id = ? AND analysis_run_id = ?
""",
(
target_capability_id,
candidate_id,
repository_id,
analysis_run_id,
),
)
if cursor.rowcount == 0:
raise NotFoundError(
f"{label} {candidate_id} was not found for repository "
f"{repository_id} analysis run {analysis_run_id}"
)
def _merge_candidate_leaf(
self,
*,
table: str,
label: str,
repository_id: int,
analysis_run_id: int,
source_id: int,
target_id: int,
) -> None:
if source_id == target_id:
raise ValueError(f"source and target {label} must be different")
self._ensure_candidate_row(
table=table,
label=f"target {label}",
repository_id=repository_id,
analysis_run_id=analysis_run_id,
candidate_id=target_id,
)
with self.connect() as connection:
cursor = connection.execute(
f"""
UPDATE {table}
SET status = 'merged'
WHERE id = ?
AND repository_id = ?
AND analysis_run_id = ?
AND status = 'candidate'
""",
(source_id, repository_id, analysis_run_id),
)
if cursor.rowcount == 0:
raise NotFoundError(
f"source {label} {source_id} was not found for repository "
f"{repository_id} analysis run {analysis_run_id}"
)
def _reject_candidate_leaf(
self,
*,
table: str,
label: str,
repository_id: int,
analysis_run_id: int,
candidate_id: int,
) -> None:
with self.connect() as connection:
cursor = connection.execute(
f"""
UPDATE {table}
SET status = 'rejected'
WHERE id = ?
AND repository_id = ?
AND analysis_run_id = ?
AND status = 'candidate'
""",
(candidate_id, repository_id, analysis_run_id),
)
if cursor.rowcount == 0:
raise NotFoundError(
f"{label} {candidate_id} was not found for repository "
f"{repository_id} analysis run {analysis_run_id}"
)
def create_review_decision(
self,
repository_id: int,
analysis_run_id: int,
*,
action: str,
notes: str = "",
) -> int:
with self.connect() as connection:
cursor = connection.execute(
"""
INSERT INTO review_decisions
(repository_id, analysis_run_id, action, notes)
VALUES (?, ?, ?, ?)
""",
(repository_id, analysis_run_id, action, notes),
)
return int(cursor.lastrowid)
def list_review_decisions(
self,
repository_id: int,
analysis_run_id: int | None = None,
) -> list[ReviewDecision]:
self.get_repository(repository_id)
params: tuple[int, ...]
where = "WHERE repository_id = ?"
params = (repository_id,)
if analysis_run_id is not None:
where += " AND analysis_run_id = ?"
params = (repository_id, analysis_run_id)
with self.connect() as connection:
rows = connection.execute(
f"""
SELECT id, repository_id, analysis_run_id, action, notes, created_at
FROM review_decisions
{where}
ORDER BY created_at DESC, id DESC
""",
params,
).fetchall()
return [
ReviewDecision(
id=row["id"],
repository_id=row["repository_id"],
analysis_run_id=row["analysis_run_id"],
action=row["action"],
notes=row["notes"],
created_at=row["created_at"],
)
for row in rows
]
def fail_analysis_run(
self,
repository_id: int,
analysis_run_id: int,
error_message: str,
) -> AnalysisRun:
with self.connect() as connection:
cursor = connection.execute(
"""
UPDATE analysis_runs
SET status = 'failed',
completed_at = CURRENT_TIMESTAMP,
error_message = ?
WHERE id = ? AND repository_id = ?
""",
(error_message, analysis_run_id, repository_id),
)
connection.execute(
"""
UPDATE repositories
SET status = 'analysis_failed', updated_at = CURRENT_TIMESTAMP
WHERE id = ?
""",
(repository_id,),
)
if cursor.rowcount == 0:
raise NotFoundError(
f"analysis run {analysis_run_id} was not found for repository {repository_id}"
)
return self.get_analysis_run(repository_id, analysis_run_id)
def get_analysis_run(self, repository_id: int, analysis_run_id: int) -> AnalysisRun:
with self.connect() as connection:
row = connection.execute(
"""
SELECT id, repository_id, snapshot_id, status, started_at,
completed_at, error_message, scanner_version
FROM analysis_runs
WHERE id = ? AND repository_id = ?
""",
(analysis_run_id, repository_id),
).fetchone()
if row is None:
raise NotFoundError(
f"analysis run {analysis_run_id} was not found for repository {repository_id}"
)
return self._analysis_run_from_row(row)
def list_analysis_runs(self, repository_id: int) -> list[AnalysisRun]:
self.get_repository(repository_id)
with self.connect() as connection:
rows = connection.execute(
"""
SELECT id, repository_id, snapshot_id, status, started_at,
completed_at, error_message, scanner_version
FROM analysis_runs
WHERE repository_id = ?
ORDER BY started_at DESC, id DESC
""",
(repository_id,),
).fetchall()
return [self._analysis_run_from_row(row) for row in rows]
def list_abilities(self) -> list[AbilitySummary]:
with self.connect() as connection:
rows = connection.execute(
"""
SELECT a.id, a.repository_id, r.name AS repository_name,
a.name, a.description, a.confidence
FROM approved_abilities a
JOIN repositories r ON r.id = a.repository_id
ORDER BY r.name ASC, a.name ASC, a.id ASC
"""
).fetchall()
return [
AbilitySummary(
id=row["id"],
repository_id=row["repository_id"],
repository_name=row["repository_name"],
name=row["name"],
description=row["description"],
confidence=row["confidence"],
confidence_label=confidence_label(row["confidence"]),
)
for row in rows
]
def list_capabilities(self) -> list[CapabilitySummary]:
with self.connect() as connection:
rows = connection.execute(
"""
SELECT c.id, c.repository_id, r.name AS repository_name,
c.ability_id, a.name AS ability_name,
c.name, c.description, c.confidence
FROM approved_capabilities c
JOIN approved_abilities a ON a.id = c.ability_id
JOIN repositories r ON r.id = c.repository_id
ORDER BY r.name ASC, a.name ASC, c.name ASC, c.id ASC
"""
).fetchall()
return [
CapabilitySummary(
id=row["id"],
repository_id=row["repository_id"],
repository_name=row["repository_name"],
ability_id=row["ability_id"],
ability_name=row["ability_name"],
name=row["name"],
description=row["description"],
confidence=row["confidence"],
confidence_label=confidence_label(row["confidence"]),
)
for row in rows
]
def get_snapshot(self, snapshot_id: int) -> RepositorySnapshot:
with self.connect() as connection:
row = connection.execute(
"""
SELECT id, repository_id, commit_hash, branch, source_path, file_count
FROM repository_snapshots
WHERE id = ?
""",
(snapshot_id,),
).fetchone()
if row is None:
raise NotFoundError(f"snapshot {snapshot_id} was not found")
return self._snapshot_from_row(row)
def list_observed_facts(
self,
repository_id: int,
analysis_run_id: int | None = None,
) -> list[ObservedFact]:
self.get_repository(repository_id)
params: tuple[int, ...]
where = "WHERE repository_id = ?"
params = (repository_id,)
if analysis_run_id is not None:
where += " AND analysis_run_id = ?"
params = (repository_id, analysis_run_id)
with self.connect() as connection:
rows = connection.execute(
f"""
SELECT id, repository_id, analysis_run_id, snapshot_id, kind,
path, name, value, metadata
FROM observed_facts
{where}
ORDER BY kind ASC, path ASC, name ASC, id ASC
""",
params,
).fetchall()
return [self._observed_fact_from_row(row) for row in rows]
def replace_content_chunks(
self,
repository_id: int,
analysis_run_id: int,
snapshot_id: int | None,
chunks: list[ContentChunkCandidate],
) -> None:
with self.connect() as connection:
connection.execute(
"DELETE FROM content_chunks WHERE analysis_run_id = ?",
(analysis_run_id,),
)
connection.executemany(
"""
INSERT INTO content_chunks
(repository_id, analysis_run_id, snapshot_id, path, kind,
start_line, end_line, text)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""",
[
(
repository_id,
analysis_run_id,
snapshot_id,
chunk.path,
chunk.kind,
chunk.start_line,
chunk.end_line,
chunk.text,
)
for chunk in chunks
],
)
def list_content_chunks(
self,
repository_id: int,
analysis_run_id: int | None = None,
) -> list[ContentChunk]:
self.get_repository(repository_id)
params: tuple[int, ...]
where = "WHERE repository_id = ?"
params = (repository_id,)
if analysis_run_id is not None:
where += " AND analysis_run_id = ?"
params = (repository_id, analysis_run_id)
with self.connect() as connection:
rows = connection.execute(
f"""
SELECT id, repository_id, analysis_run_id, snapshot_id, path, kind,
start_line, end_line, text
FROM content_chunks
{where}
ORDER BY path ASC, start_line ASC, id ASC
""",
params,
).fetchall()
return [self._content_chunk_from_row(row) for row in rows]
def create_ability(
self,
repository_id: int,
*,
name: str,
description: str,
confidence: float,
) -> int:
with self.connect() as connection:
cursor = connection.execute(
"""
INSERT INTO approved_abilities
(repository_id, name, description, confidence)
VALUES (?, ?, ?, ?)
""",
(repository_id, name, description, confidence),
)
return int(cursor.lastrowid)
def ensure_ability(self, repository_id: int, ability_id: int) -> None:
with self.connect() as connection:
row = connection.execute(
"""
SELECT id FROM approved_abilities
WHERE id = ? AND repository_id = ?
""",
(ability_id, repository_id),
).fetchone()
if row is None:
raise NotFoundError(
f"ability {ability_id} was not found for repository {repository_id}"
)
def update_ability(
self,
repository_id: int,
ability_id: int,
*,
name: str | None = None,
description: str | None = None,
confidence: float | None = None,
) -> None:
self._update_approved_row(
table="approved_abilities",
label="ability",
repository_id=repository_id,
row_id=ability_id,
values={
"name": name,
"description": description,
"confidence": confidence,
},
)
def delete_ability(self, repository_id: int, ability_id: int) -> None:
self._delete_approved_row(
table="approved_abilities",
label="ability",
repository_id=repository_id,
row_id=ability_id,
)
def create_capability(
self,
repository_id: int,
ability_id: int,
*,
name: str,
description: str,
inputs: list[str],
outputs: list[str],
confidence: float,
) -> int:
with self.connect() as connection:
cursor = connection.execute(
"""
INSERT INTO approved_capabilities
(repository_id, ability_id, name, description, inputs, outputs, confidence)
VALUES (?, ?, ?, ?, ?, ?, ?)
""",
(
repository_id,
ability_id,
name,
description,
json.dumps(inputs),
json.dumps(outputs),
confidence,
),
)
return int(cursor.lastrowid)
def ensure_capability(self, repository_id: int, capability_id: int) -> None:
with self.connect() as connection:
row = connection.execute(
"""
SELECT id FROM approved_capabilities
WHERE id = ? AND repository_id = ?
""",
(capability_id, repository_id),
).fetchone()
if row is None:
raise NotFoundError(
f"capability {capability_id} was not found for repository {repository_id}"
)
def update_capability(
self,
repository_id: int,
capability_id: int,
*,
name: str | None = None,
description: str | None = None,
inputs: list[str] | None = None,
outputs: list[str] | None = None,
confidence: float | None = None,
) -> None:
self._update_approved_row(
table="approved_capabilities",
label="capability",
repository_id=repository_id,
row_id=capability_id,
values={
"name": name,
"description": description,
"inputs": json.dumps(inputs) if inputs is not None else None,
"outputs": json.dumps(outputs) if outputs is not None else None,
"confidence": confidence,
},
)
def delete_capability(self, repository_id: int, capability_id: int) -> None:
self._delete_approved_row(
table="approved_capabilities",
label="capability",
repository_id=repository_id,
row_id=capability_id,
)
def create_feature(
self,
repository_id: int,
capability_id: int,
*,
name: str,
type: str,
location: str,
confidence: float,
source_refs: list[SourceReference] | None = None,
) -> int:
with self.connect() as connection:
cursor = connection.execute(
"""
INSERT INTO approved_features
(repository_id, capability_id, name, type, location, confidence, source_refs)
VALUES (?, ?, ?, ?, ?, ?, ?)
""",
(
repository_id,
capability_id,
name,
type,
location,
confidence,
self._source_refs_to_json(source_refs or []),
),
)
return int(cursor.lastrowid)
def update_feature(
self,
repository_id: int,
feature_id: int,
*,
name: str | None = None,
type: str | None = None,
location: str | None = None,
confidence: float | None = None,
) -> None:
self._update_approved_row(
table="approved_features",
label="feature",
repository_id=repository_id,
row_id=feature_id,
values={
"name": name,
"type": type,
"location": location,
"confidence": confidence,
},
)
def delete_feature(self, repository_id: int, feature_id: int) -> None:
self._delete_approved_row(
table="approved_features",
label="feature",
repository_id=repository_id,
row_id=feature_id,
)
def create_evidence(
self,
repository_id: int,
capability_id: int,
*,
type: str,
reference: str,
strength: str,
source_refs: list[SourceReference] | None = None,
) -> int:
with self.connect() as connection:
cursor = connection.execute(
"""
INSERT INTO approved_evidence
(repository_id, capability_id, type, reference, strength, source_refs)
VALUES (?, ?, ?, ?, ?, ?)
""",
(
repository_id,
capability_id,
type,
reference,
strength,
self._source_refs_to_json(source_refs or []),
),
)
return int(cursor.lastrowid)
def update_evidence(
self,
repository_id: int,
evidence_id: int,
*,
type: str | None = None,
reference: str | None = None,
strength: str | None = None,
) -> None:
self._update_approved_row(
table="approved_evidence",
label="evidence",
repository_id=repository_id,
row_id=evidence_id,
values={
"type": type,
"reference": reference,
"strength": strength,
},
)
def delete_evidence(self, repository_id: int, evidence_id: int) -> None:
self._delete_approved_row(
table="approved_evidence",
label="evidence",
repository_id=repository_id,
row_id=evidence_id,
)
def replace_approved_from_candidate_graph(
self,
repository_id: int,
graph: CandidateGraph,
) -> None:
if graph.repository.id != repository_id:
raise NotFoundError(
f"candidate graph for repository {graph.repository.id} does not match "
f"repository {repository_id}"
)
with self.connect() as connection:
connection.execute(
"DELETE FROM approved_abilities WHERE repository_id = ?",
(repository_id,),
)
for ability in graph.abilities:
if ability.status not in {"candidate", "approved"}:
continue
ability_cursor = connection.execute(
"""
INSERT INTO approved_abilities
(repository_id, name, description, confidence)
VALUES (?, ?, ?, ?)
""",
(
repository_id,
ability.name,
ability.description,
ability.confidence,
),
)
approved_ability_id = int(ability_cursor.lastrowid)
for capability in ability.capabilities:
if capability.status not in {"candidate", "approved"}:
continue
capability_cursor = connection.execute(
"""
INSERT INTO approved_capabilities
(repository_id, ability_id, name, description, inputs, outputs,
confidence)
VALUES (?, ?, ?, ?, ?, ?, ?)
""",
(
repository_id,
approved_ability_id,
capability.name,
capability.description,
json.dumps(capability.inputs),
json.dumps(capability.outputs),
capability.confidence,
),
)
approved_capability_id = int(capability_cursor.lastrowid)
for feature in capability.features:
if feature.status not in {"candidate", "approved"}:
continue
connection.execute(
"""
INSERT INTO approved_features
(repository_id, capability_id, name, type, location,
confidence, source_refs)
VALUES (?, ?, ?, ?, ?, ?, ?)
""",
(
repository_id,
approved_capability_id,
feature.name,
feature.type,
feature.location,
feature.confidence,
self._source_refs_to_json(feature.source_refs),
),
)
for evidence in capability.evidence:
if evidence.status not in {"candidate", "approved"}:
continue
connection.execute(
"""
INSERT INTO approved_evidence
(repository_id, capability_id, type, reference, strength,
source_refs)
VALUES (?, ?, ?, ?, ?, ?)
""",
(
repository_id,
approved_capability_id,
evidence.type,
evidence.reference,
evidence.strength,
self._source_refs_to_json(evidence.source_refs),
),
)
def get_ability_map(self, repository_id: int) -> RepositoryAbilityMap:
repository = self.get_repository(repository_id)
with self.connect() as connection:
ability_rows = connection.execute(
"""
SELECT id, name, description, confidence
FROM approved_abilities
WHERE repository_id = ?
ORDER BY id
""",
(repository_id,),
).fetchall()
capability_rows = connection.execute(
"""
SELECT id, ability_id, name, description, inputs, outputs, confidence
FROM approved_capabilities
WHERE repository_id = ?
ORDER BY id
""",
(repository_id,),
).fetchall()
feature_rows = connection.execute(
"""
SELECT id, capability_id, name, type, location, confidence, source_refs
FROM approved_features
WHERE repository_id = ?
ORDER BY id
""",
(repository_id,),
).fetchall()
evidence_rows = connection.execute(
"""
SELECT id, capability_id, type, reference, strength, source_refs
FROM approved_evidence
WHERE repository_id = ?
ORDER BY id
""",
(repository_id,),
).fetchall()
features_by_capability: dict[int, list[Feature]] = {}
for row in feature_rows:
features_by_capability.setdefault(row["capability_id"], []).append(
Feature(
id=row["id"],
name=row["name"],
type=row["type"],
location=row["location"],
confidence=row["confidence"],
confidence_label=confidence_label(row["confidence"]),
source_refs=self._source_refs_from_json(row["source_refs"]),
)
)
evidence_by_capability: dict[int, list[Evidence]] = {}
for row in evidence_rows:
evidence_by_capability.setdefault(row["capability_id"], []).append(
Evidence(
id=row["id"],
type=row["type"],
reference=row["reference"],
strength=row["strength"],
source_refs=self._source_refs_from_json(row["source_refs"]),
)
)
capabilities_by_ability: dict[int, list[Capability]] = {}
for row in capability_rows:
capabilities_by_ability.setdefault(row["ability_id"], []).append(
Capability(
id=row["id"],
name=row["name"],
description=row["description"],
inputs=json.loads(row["inputs"]),
outputs=json.loads(row["outputs"]),
confidence=row["confidence"],
confidence_label=confidence_label(row["confidence"]),
features=features_by_capability.get(row["id"], []),
evidence=evidence_by_capability.get(row["id"], []),
)
)
abilities = [
Ability(
id=row["id"],
name=row["name"],
description=row["description"],
confidence=row["confidence"],
confidence_label=confidence_label(row["confidence"]),
capabilities=capabilities_by_ability.get(row["id"], []),
)
for row in ability_rows
]
return RepositoryAbilityMap(repository=repository, abilities=abilities)
def search(
self,
query: str,
*,
status: str | None = None,
language: str | None = None,
framework: str | None = None,
ability: str | None = None,
capability: str | None = None,
) -> list[SearchResult]:
term = query.strip()
needle = f"%{term}%"
if not term:
return []
with self.connect() as connection:
repository_ids = self._search_filter_repository_ids(
connection,
status=status,
language=language,
framework=framework,
ability=ability,
capability=capability,
)
if repository_ids is not None and not repository_ids:
return []
repository_filter, repository_params = self._repository_filter_sql(
repository_ids,
)
repository_rows = connection.execute(
f"""
SELECT r.id AS repository_id, r.name AS repository_name,
r.description
FROM repositories r
WHERE (r.name LIKE ? OR COALESCE(r.description, '') LIKE ?)
{repository_filter}
""",
(needle, needle, *repository_params),
).fetchall()
ability_rows = connection.execute(
f"""
SELECT r.id AS repository_id, r.name AS repository_name,
a.id AS ability_id, a.name AS ability_name,
a.description AS ability_description, a.confidence
FROM approved_abilities a
JOIN repositories r ON r.id = a.repository_id
WHERE (a.name LIKE ? OR a.description LIKE ?)
{repository_filter}
""",
(needle, needle, *repository_params),
).fetchall()
capability_rows = connection.execute(
f"""
SELECT r.id AS repository_id, r.name AS repository_name,
a.id AS ability_id, a.name AS ability_name,
c.id AS capability_id, c.name AS capability_name,
c.description AS capability_description, c.confidence
FROM approved_capabilities c
JOIN approved_abilities a ON a.id = c.ability_id
JOIN repositories r ON r.id = c.repository_id
WHERE (c.name LIKE ? OR c.description LIKE ?)
{repository_filter}
""",
(needle, needle, *repository_params),
).fetchall()
feature_rows = connection.execute(
f"""
SELECT r.id AS repository_id, r.name AS repository_name,
a.id AS ability_id, a.name AS ability_name,
c.id AS capability_id, c.name AS capability_name,
f.name AS feature_name, f.type AS feature_type,
f.location, f.confidence
FROM approved_features f
JOIN approved_capabilities c ON c.id = f.capability_id
JOIN approved_abilities a ON a.id = c.ability_id
JOIN repositories r ON r.id = f.repository_id
WHERE (f.name LIKE ? OR f.type LIKE ? OR f.location LIKE ?)
{repository_filter}
""",
(needle, needle, needle, *repository_params),
).fetchall()
evidence_rows = connection.execute(
f"""
SELECT r.id AS repository_id, r.name AS repository_name,
a.id AS ability_id, a.name AS ability_name,
c.id AS capability_id, c.name AS capability_name,
e.type AS evidence_type, e.reference, e.strength
FROM approved_evidence e
JOIN approved_capabilities c ON c.id = e.capability_id
JOIN approved_abilities a ON a.id = c.ability_id
JOIN repositories r ON r.id = e.repository_id
WHERE (e.type LIKE ? OR e.reference LIKE ? OR e.strength LIKE ?)
{repository_filter}
""",
(needle, needle, needle, *repository_params),
).fetchall()
results: list[SearchResult] = []
for row in repository_rows:
matched_field = (
"name" if self._matches(row["repository_name"], term) else "description"
)
results.append(
SearchResult(
repository_id=row["repository_id"],
repository_name=row["repository_name"],
match_type="repository",
match_name=row["repository_name"],
confidence=1.0,
confidence_label=confidence_label(1.0),
match_description=row["description"] or "",
matched_field=matched_field,
)
)
for row in ability_rows:
matched_field = (
"name" if self._matches(row["ability_name"], term) else "description"
)
results.append(
SearchResult(
repository_id=row["repository_id"],
repository_name=row["repository_name"],
match_type="ability",
match_name=row["ability_name"],
confidence=row["confidence"],
confidence_label=confidence_label(row["confidence"]),
match_description=row["ability_description"],
matched_field=matched_field,
ability_id=row["ability_id"],
ability_name=row["ability_name"],
)
)
for row in capability_rows:
matched_field = (
"name"
if self._matches(row["capability_name"], term)
else "description"
)
results.append(
SearchResult(
repository_id=row["repository_id"],
repository_name=row["repository_name"],
match_type="capability",
match_name=row["capability_name"],
confidence=row["confidence"],
confidence_label=confidence_label(row["confidence"]),
match_description=row["capability_description"],
matched_field=matched_field,
ability_id=row["ability_id"],
ability_name=row["ability_name"],
capability_id=row["capability_id"],
capability_name=row["capability_name"],
)
)
for row in feature_rows:
matched_field = self._first_matched_field(
term,
{
"name": row["feature_name"],
"type": row["feature_type"],
"location": row["location"],
},
)
results.append(
SearchResult(
repository_id=row["repository_id"],
repository_name=row["repository_name"],
match_type="feature",
match_name=row["feature_name"],
confidence=row["confidence"],
confidence_label=confidence_label(row["confidence"]),
match_description=row["feature_type"],
matched_field=matched_field,
ability_id=row["ability_id"],
ability_name=row["ability_name"],
capability_id=row["capability_id"],
capability_name=row["capability_name"],
source_reference=row["location"],
)
)
for row in evidence_rows:
matched_field = self._first_matched_field(
term,
{
"type": row["evidence_type"],
"reference": row["reference"],
"strength": row["strength"],
},
)
results.append(
SearchResult(
repository_id=row["repository_id"],
repository_name=row["repository_name"],
match_type="evidence",
match_name=row["reference"],
confidence=self._evidence_confidence(row["strength"]),
confidence_label=confidence_label(
self._evidence_confidence(row["strength"])
),
match_description=row["evidence_type"],
matched_field=matched_field,
ability_id=row["ability_id"],
ability_name=row["ability_name"],
capability_id=row["capability_id"],
capability_name=row["capability_name"],
evidence_level=row["strength"],
source_reference=row["reference"],
)
)
return sorted(
results,
key=lambda result: (
-result.confidence,
result.repository_name.lower(),
result.match_type,
result.match_name.lower(),
),
)
def _matches(self, value: str | None, term: str) -> bool:
return term.lower() in (value or "").lower()
def _first_matched_field(self, term: str, values: dict[str, str | None]) -> str:
for field, value in values.items():
if self._matches(value, term):
return field
return ""
def _evidence_confidence(self, strength: str) -> float:
return {"strong": 0.9, "medium": 0.6, "weak": 0.3}.get(strength, 0.5)
def _search_filter_repository_ids(
self,
connection: sqlite3.Connection,
*,
status: str | None,
language: str | None,
framework: str | None,
ability: str | None,
capability: str | None,
) -> list[int] | None:
filters: list[set[int]] = []
if status:
rows = connection.execute(
"SELECT id FROM repositories WHERE status = ?",
(status,),
).fetchall()
filters.append({row["id"] for row in rows})
if language:
rows = connection.execute(
"""
SELECT DISTINCT repository_id
FROM observed_facts
WHERE kind = 'language' AND name LIKE ?
""",
(language,),
).fetchall()
filters.append({row["repository_id"] for row in rows})
if framework:
rows = connection.execute(
"""
SELECT DISTINCT repository_id
FROM observed_facts
WHERE kind = 'framework' AND name LIKE ?
""",
(framework,),
).fetchall()
filters.append({row["repository_id"] for row in rows})
if ability:
rows = connection.execute(
"""
SELECT DISTINCT repository_id
FROM approved_abilities
WHERE name LIKE ? OR description LIKE ?
""",
(f"%{ability}%", f"%{ability}%"),
).fetchall()
filters.append({row["repository_id"] for row in rows})
if capability:
rows = connection.execute(
"""
SELECT DISTINCT repository_id
FROM approved_capabilities
WHERE name LIKE ? OR description LIKE ?
""",
(f"%{capability}%", f"%{capability}%"),
).fetchall()
filters.append({row["repository_id"] for row in rows})
if not filters:
return None
repository_ids = set.intersection(*filters)
return sorted(repository_ids)
def _repository_filter_sql(
self,
repository_ids: list[int] | None,
) -> tuple[str, tuple[int, ...]]:
if repository_ids is None:
return "", ()
placeholders = ", ".join("?" for _ in repository_ids)
return f"AND r.id IN ({placeholders})", tuple(repository_ids)
def _insert_facts(
self,
connection: sqlite3.Connection,
*,
repository_id: int,
analysis_run_id: int,
snapshot_id: int,
facts: list[FactCandidate],
) -> None:
connection.executemany(
"""
INSERT INTO observed_facts
(repository_id, analysis_run_id, snapshot_id, kind, path, name, value, metadata)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""",
[
(
repository_id,
analysis_run_id,
snapshot_id,
fact.kind,
fact.path,
fact.name,
fact.value,
json.dumps(fact.metadata),
)
for fact in facts
],
)
def _update_approved_row(
self,
*,
table: str,
label: str,
repository_id: int,
row_id: int,
values: dict[str, str | float | None],
) -> None:
assignments: list[str] = []
params: list[str | float | int] = []
for column, value in values.items():
if value is None:
continue
assignments.append(f"{column} = ?")
params.append(value)
if not assignments:
self._ensure_approved_row(
table=table,
label=label,
repository_id=repository_id,
row_id=row_id,
)
return
params.extend([row_id, repository_id])
with self.connect() as connection:
cursor = connection.execute(
f"""
UPDATE {table}
SET {", ".join(assignments)}
WHERE id = ? AND repository_id = ?
""",
params,
)
if cursor.rowcount == 0:
raise NotFoundError(
f"{label} {row_id} was not found for repository {repository_id}"
)
def _delete_approved_row(
self,
*,
table: str,
label: str,
repository_id: int,
row_id: int,
) -> None:
with self.connect() as connection:
cursor = connection.execute(
f"DELETE FROM {table} WHERE id = ? AND repository_id = ?",
(row_id, repository_id),
)
if cursor.rowcount == 0:
raise NotFoundError(
f"{label} {row_id} was not found for repository {repository_id}"
)
def _ensure_approved_row(
self,
*,
table: str,
label: str,
repository_id: int,
row_id: int,
) -> None:
with self.connect() as connection:
row = connection.execute(
f"SELECT id FROM {table} WHERE id = ? AND repository_id = ?",
(row_id, repository_id),
).fetchone()
if row is None:
raise NotFoundError(
f"{label} {row_id} was not found for repository {repository_id}"
)
def _source_refs_to_json(self, source_refs: list[SourceReference]) -> str:
return json.dumps(
[
{
"fact_id": source_ref.fact_id,
"path": source_ref.path,
"kind": source_ref.kind,
"name": source_ref.name,
"line": source_ref.line,
}
for source_ref in source_refs
]
)
def _source_refs_from_json(self, value: str) -> list[SourceReference]:
return [
SourceReference(
fact_id=item.get("fact_id"),
path=item.get("path", ""),
kind=item.get("kind", ""),
name=item.get("name", ""),
line=item.get("line"),
)
for item in json.loads(value)
]
@staticmethod
def _repository_from_row(row: sqlite3.Row) -> Repository:
return Repository(
id=row["id"],
name=row["name"],
url=row["url"],
description=row["description"],
branch=row["branch"],
status=row["status"],
)
@staticmethod
def _snapshot_from_row(row: sqlite3.Row) -> RepositorySnapshot:
return RepositorySnapshot(
id=row["id"],
repository_id=row["repository_id"],
commit_hash=row["commit_hash"],
branch=row["branch"],
source_path=row["source_path"],
file_count=row["file_count"],
)
@staticmethod
def _analysis_run_from_row(row: sqlite3.Row) -> AnalysisRun:
return AnalysisRun(
id=row["id"],
repository_id=row["repository_id"],
snapshot_id=row["snapshot_id"],
status=row["status"],
started_at=row["started_at"],
completed_at=row["completed_at"],
error_message=row["error_message"],
scanner_version=row["scanner_version"],
)
@staticmethod
def _observed_fact_from_row(row: sqlite3.Row) -> ObservedFact:
return ObservedFact(
id=row["id"],
repository_id=row["repository_id"],
analysis_run_id=row["analysis_run_id"],
snapshot_id=row["snapshot_id"],
kind=row["kind"],
path=row["path"],
name=row["name"],
value=row["value"],
metadata=json.loads(row["metadata"]),
)
@staticmethod
def _content_chunk_from_row(row: sqlite3.Row) -> ContentChunk:
return ContentChunk(
id=row["id"],
repository_id=row["repository_id"],
analysis_run_id=row["analysis_run_id"],
snapshot_id=row["snapshot_id"],
path=row["path"],
kind=row["kind"],
start_line=row["start_line"],
end_line=row["end_line"],
text=row["text"],
)