generated from coulomb/repo-seed
Milestone 2’s core deterministic scanner path
This commit is contained in:
@@ -3,17 +3,20 @@ from __future__ import annotations
|
||||
import json
|
||||
import sqlite3
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from repo_registry.core.models import (
|
||||
Ability,
|
||||
AnalysisRun,
|
||||
Capability,
|
||||
Evidence,
|
||||
Feature,
|
||||
ObservedFact,
|
||||
Repository,
|
||||
RepositoryAbilityMap,
|
||||
RepositorySnapshot,
|
||||
SearchResult,
|
||||
)
|
||||
from repo_registry.repo_scanning.scanner import FactCandidate, ScanResult
|
||||
|
||||
|
||||
class NotFoundError(ValueError):
|
||||
@@ -54,6 +57,19 @@ class RegistryStore:
|
||||
repository_id = int(cursor.lastrowid)
|
||||
return self.get_repository(repository_id)
|
||||
|
||||
def update_repository_status(self, repository_id: int, status: str) -> None:
|
||||
with self.connect() as connection:
|
||||
cursor = connection.execute(
|
||||
"""
|
||||
UPDATE repositories
|
||||
SET status = ?, updated_at = CURRENT_TIMESTAMP
|
||||
WHERE id = ?
|
||||
""",
|
||||
(status, repository_id),
|
||||
)
|
||||
if cursor.rowcount == 0:
|
||||
raise NotFoundError(f"repository {repository_id} was not found")
|
||||
|
||||
def list_repositories(self) -> list[Repository]:
|
||||
with self.connect() as connection:
|
||||
rows = connection.execute(
|
||||
@@ -79,6 +95,172 @@ class RegistryStore:
|
||||
raise NotFoundError(f"repository {repository_id} was not found")
|
||||
return self._repository_from_row(row)
|
||||
|
||||
def create_analysis_run(self, repository_id: int) -> AnalysisRun:
|
||||
self.get_repository(repository_id)
|
||||
with self.connect() as connection:
|
||||
cursor = connection.execute(
|
||||
"""
|
||||
INSERT INTO analysis_runs (repository_id, status)
|
||||
VALUES (?, 'running')
|
||||
""",
|
||||
(repository_id,),
|
||||
)
|
||||
analysis_run_id = int(cursor.lastrowid)
|
||||
return self.get_analysis_run(repository_id, analysis_run_id)
|
||||
|
||||
def complete_analysis_run(
|
||||
self,
|
||||
repository_id: int,
|
||||
analysis_run_id: int,
|
||||
scan_result: ScanResult,
|
||||
) -> AnalysisRun:
|
||||
with self.connect() as connection:
|
||||
snapshot_cursor = connection.execute(
|
||||
"""
|
||||
INSERT INTO repository_snapshots
|
||||
(repository_id, commit_hash, branch, source_path, file_count)
|
||||
VALUES (?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
repository_id,
|
||||
scan_result.commit_hash,
|
||||
scan_result.branch,
|
||||
scan_result.source_path,
|
||||
scan_result.file_count,
|
||||
),
|
||||
)
|
||||
snapshot_id = int(snapshot_cursor.lastrowid)
|
||||
self._insert_facts(
|
||||
connection,
|
||||
repository_id=repository_id,
|
||||
analysis_run_id=analysis_run_id,
|
||||
snapshot_id=snapshot_id,
|
||||
facts=scan_result.facts,
|
||||
)
|
||||
connection.execute(
|
||||
"""
|
||||
UPDATE analysis_runs
|
||||
SET status = 'completed',
|
||||
snapshot_id = ?,
|
||||
completed_at = CURRENT_TIMESTAMP,
|
||||
error_message = NULL
|
||||
WHERE id = ? AND repository_id = ?
|
||||
""",
|
||||
(snapshot_id, analysis_run_id, repository_id),
|
||||
)
|
||||
connection.execute(
|
||||
"""
|
||||
UPDATE repositories
|
||||
SET status = 'analyzed', updated_at = CURRENT_TIMESTAMP
|
||||
WHERE id = ?
|
||||
""",
|
||||
(repository_id,),
|
||||
)
|
||||
return self.get_analysis_run(repository_id, analysis_run_id)
|
||||
|
||||
def fail_analysis_run(
|
||||
self,
|
||||
repository_id: int,
|
||||
analysis_run_id: int,
|
||||
error_message: str,
|
||||
) -> AnalysisRun:
|
||||
with self.connect() as connection:
|
||||
cursor = connection.execute(
|
||||
"""
|
||||
UPDATE analysis_runs
|
||||
SET status = 'failed',
|
||||
completed_at = CURRENT_TIMESTAMP,
|
||||
error_message = ?
|
||||
WHERE id = ? AND repository_id = ?
|
||||
""",
|
||||
(error_message, analysis_run_id, repository_id),
|
||||
)
|
||||
connection.execute(
|
||||
"""
|
||||
UPDATE repositories
|
||||
SET status = 'analysis_failed', updated_at = CURRENT_TIMESTAMP
|
||||
WHERE id = ?
|
||||
""",
|
||||
(repository_id,),
|
||||
)
|
||||
if cursor.rowcount == 0:
|
||||
raise NotFoundError(
|
||||
f"analysis run {analysis_run_id} was not found for repository {repository_id}"
|
||||
)
|
||||
return self.get_analysis_run(repository_id, analysis_run_id)
|
||||
|
||||
def get_analysis_run(self, repository_id: int, analysis_run_id: int) -> AnalysisRun:
|
||||
with self.connect() as connection:
|
||||
row = connection.execute(
|
||||
"""
|
||||
SELECT id, repository_id, snapshot_id, status, started_at,
|
||||
completed_at, error_message, scanner_version
|
||||
FROM analysis_runs
|
||||
WHERE id = ? AND repository_id = ?
|
||||
""",
|
||||
(analysis_run_id, repository_id),
|
||||
).fetchone()
|
||||
if row is None:
|
||||
raise NotFoundError(
|
||||
f"analysis run {analysis_run_id} was not found for repository {repository_id}"
|
||||
)
|
||||
return self._analysis_run_from_row(row)
|
||||
|
||||
def list_analysis_runs(self, repository_id: int) -> list[AnalysisRun]:
|
||||
self.get_repository(repository_id)
|
||||
with self.connect() as connection:
|
||||
rows = connection.execute(
|
||||
"""
|
||||
SELECT id, repository_id, snapshot_id, status, started_at,
|
||||
completed_at, error_message, scanner_version
|
||||
FROM analysis_runs
|
||||
WHERE repository_id = ?
|
||||
ORDER BY started_at DESC, id DESC
|
||||
""",
|
||||
(repository_id,),
|
||||
).fetchall()
|
||||
return [self._analysis_run_from_row(row) for row in rows]
|
||||
|
||||
def get_snapshot(self, snapshot_id: int) -> RepositorySnapshot:
|
||||
with self.connect() as connection:
|
||||
row = connection.execute(
|
||||
"""
|
||||
SELECT id, repository_id, commit_hash, branch, source_path, file_count
|
||||
FROM repository_snapshots
|
||||
WHERE id = ?
|
||||
""",
|
||||
(snapshot_id,),
|
||||
).fetchone()
|
||||
if row is None:
|
||||
raise NotFoundError(f"snapshot {snapshot_id} was not found")
|
||||
return self._snapshot_from_row(row)
|
||||
|
||||
def list_observed_facts(
|
||||
self,
|
||||
repository_id: int,
|
||||
analysis_run_id: int | None = None,
|
||||
) -> list[ObservedFact]:
|
||||
self.get_repository(repository_id)
|
||||
params: tuple[int, ...]
|
||||
where = "WHERE repository_id = ?"
|
||||
params = (repository_id,)
|
||||
if analysis_run_id is not None:
|
||||
where += " AND analysis_run_id = ?"
|
||||
params = (repository_id, analysis_run_id)
|
||||
|
||||
with self.connect() as connection:
|
||||
rows = connection.execute(
|
||||
f"""
|
||||
SELECT id, repository_id, analysis_run_id, snapshot_id, kind,
|
||||
path, name, value, metadata
|
||||
FROM observed_facts
|
||||
{where}
|
||||
ORDER BY kind ASC, path ASC, name ASC, id ASC
|
||||
""",
|
||||
params,
|
||||
).fetchall()
|
||||
return [self._observed_fact_from_row(row) for row in rows]
|
||||
|
||||
def create_ability(
|
||||
self,
|
||||
repository_id: int,
|
||||
@@ -326,6 +508,36 @@ class RegistryStore:
|
||||
for row in rows
|
||||
]
|
||||
|
||||
def _insert_facts(
|
||||
self,
|
||||
connection: sqlite3.Connection,
|
||||
*,
|
||||
repository_id: int,
|
||||
analysis_run_id: int,
|
||||
snapshot_id: int,
|
||||
facts: list[FactCandidate],
|
||||
) -> None:
|
||||
connection.executemany(
|
||||
"""
|
||||
INSERT INTO observed_facts
|
||||
(repository_id, analysis_run_id, snapshot_id, kind, path, name, value, metadata)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
[
|
||||
(
|
||||
repository_id,
|
||||
analysis_run_id,
|
||||
snapshot_id,
|
||||
fact.kind,
|
||||
fact.path,
|
||||
fact.name,
|
||||
fact.value,
|
||||
json.dumps(fact.metadata),
|
||||
)
|
||||
for fact in facts
|
||||
],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _repository_from_row(row: sqlite3.Row) -> Repository:
|
||||
return Repository(
|
||||
@@ -336,3 +548,41 @@ class RegistryStore:
|
||||
branch=row["branch"],
|
||||
status=row["status"],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _snapshot_from_row(row: sqlite3.Row) -> RepositorySnapshot:
|
||||
return RepositorySnapshot(
|
||||
id=row["id"],
|
||||
repository_id=row["repository_id"],
|
||||
commit_hash=row["commit_hash"],
|
||||
branch=row["branch"],
|
||||
source_path=row["source_path"],
|
||||
file_count=row["file_count"],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _analysis_run_from_row(row: sqlite3.Row) -> AnalysisRun:
|
||||
return AnalysisRun(
|
||||
id=row["id"],
|
||||
repository_id=row["repository_id"],
|
||||
snapshot_id=row["snapshot_id"],
|
||||
status=row["status"],
|
||||
started_at=row["started_at"],
|
||||
completed_at=row["completed_at"],
|
||||
error_message=row["error_message"],
|
||||
scanner_version=row["scanner_version"],
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _observed_fact_from_row(row: sqlite3.Row) -> ObservedFact:
|
||||
return ObservedFact(
|
||||
id=row["id"],
|
||||
repository_id=row["repository_id"],
|
||||
analysis_run_id=row["analysis_run_id"],
|
||||
snapshot_id=row["snapshot_id"],
|
||||
kind=row["kind"],
|
||||
path=row["path"],
|
||||
name=row["name"],
|
||||
value=row["value"],
|
||||
metadata=json.loads(row["metadata"]),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user