generated from coulomb/repo-seed
first content-indexing slice
This commit is contained in:
@@ -15,6 +15,7 @@ from repo_registry.core.models import (
|
||||
CandidateGraph,
|
||||
Capability,
|
||||
CapabilitySummary,
|
||||
ContentChunk,
|
||||
Evidence,
|
||||
Feature,
|
||||
ObservedFact,
|
||||
@@ -25,6 +26,7 @@ from repo_registry.core.models import (
|
||||
SearchResult,
|
||||
SourceReference,
|
||||
)
|
||||
from repo_registry.content_indexing.extractor import ContentChunkCandidate
|
||||
from repo_registry.candidate_graph.generator import CandidateAbilityDraft
|
||||
from repo_registry.repo_scanning.scanner import FactCandidate, ScanResult
|
||||
|
||||
@@ -41,6 +43,7 @@ class RegistryStore:
|
||||
migration_path = Path(__file__).parents[3] / "migrations" / "0001_initial.sql"
|
||||
with self.connect() as connection:
|
||||
connection.executescript(migration_path.read_text(encoding="utf-8"))
|
||||
self._ensure_content_chunks_table(connection)
|
||||
self._ensure_approved_source_ref_columns(connection)
|
||||
|
||||
def connect(self) -> sqlite3.Connection:
|
||||
@@ -63,6 +66,30 @@ class RegistryStore:
|
||||
f"ALTER TABLE {table} ADD COLUMN source_refs TEXT NOT NULL DEFAULT '[]'"
|
||||
)
|
||||
|
||||
def _ensure_content_chunks_table(self, connection: sqlite3.Connection) -> None:
|
||||
connection.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS content_chunks (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
repository_id INTEGER NOT NULL REFERENCES repositories(id) ON DELETE CASCADE,
|
||||
analysis_run_id INTEGER NOT NULL REFERENCES analysis_runs(id) ON DELETE CASCADE,
|
||||
snapshot_id INTEGER REFERENCES repository_snapshots(id) ON DELETE CASCADE,
|
||||
path TEXT NOT NULL,
|
||||
kind TEXT NOT NULL,
|
||||
start_line INTEGER NOT NULL,
|
||||
end_line INTEGER NOT NULL,
|
||||
text TEXT NOT NULL,
|
||||
created_at TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
"""
|
||||
)
|
||||
connection.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_content_chunks_repository ON content_chunks(repository_id)"
|
||||
)
|
||||
connection.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_content_chunks_run ON content_chunks(analysis_run_id)"
|
||||
)
|
||||
|
||||
def create_repository(
|
||||
self,
|
||||
*,
|
||||
@@ -1163,6 +1190,65 @@ class RegistryStore:
|
||||
).fetchall()
|
||||
return [self._observed_fact_from_row(row) for row in rows]
|
||||
|
||||
def replace_content_chunks(
|
||||
self,
|
||||
repository_id: int,
|
||||
analysis_run_id: int,
|
||||
snapshot_id: int | None,
|
||||
chunks: list[ContentChunkCandidate],
|
||||
) -> None:
|
||||
with self.connect() as connection:
|
||||
connection.execute(
|
||||
"DELETE FROM content_chunks WHERE analysis_run_id = ?",
|
||||
(analysis_run_id,),
|
||||
)
|
||||
connection.executemany(
|
||||
"""
|
||||
INSERT INTO content_chunks
|
||||
(repository_id, analysis_run_id, snapshot_id, path, kind,
|
||||
start_line, end_line, text)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
[
|
||||
(
|
||||
repository_id,
|
||||
analysis_run_id,
|
||||
snapshot_id,
|
||||
chunk.path,
|
||||
chunk.kind,
|
||||
chunk.start_line,
|
||||
chunk.end_line,
|
||||
chunk.text,
|
||||
)
|
||||
for chunk in chunks
|
||||
],
|
||||
)
|
||||
|
||||
def list_content_chunks(
|
||||
self,
|
||||
repository_id: int,
|
||||
analysis_run_id: int | None = None,
|
||||
) -> list[ContentChunk]:
|
||||
self.get_repository(repository_id)
|
||||
params: tuple[int, ...]
|
||||
where = "WHERE repository_id = ?"
|
||||
params = (repository_id,)
|
||||
if analysis_run_id is not None:
|
||||
where += " AND analysis_run_id = ?"
|
||||
params = (repository_id, analysis_run_id)
|
||||
with self.connect() as connection:
|
||||
rows = connection.execute(
|
||||
f"""
|
||||
SELECT id, repository_id, analysis_run_id, snapshot_id, path, kind,
|
||||
start_line, end_line, text
|
||||
FROM content_chunks
|
||||
{where}
|
||||
ORDER BY path ASC, start_line ASC, id ASC
|
||||
""",
|
||||
params,
|
||||
).fetchall()
|
||||
return [self._content_chunk_from_row(row) for row in rows]
|
||||
|
||||
def create_ability(
|
||||
self,
|
||||
repository_id: int,
|
||||
@@ -1986,3 +2072,17 @@ class RegistryStore:
|
||||
value=row["value"],
|
||||
metadata=json.loads(row["metadata"]),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _content_chunk_from_row(row: sqlite3.Row) -> ContentChunk:
|
||||
return ContentChunk(
|
||||
id=row["id"],
|
||||
repository_id=row["repository_id"],
|
||||
analysis_run_id=row["analysis_run_id"],
|
||||
snapshot_id=row["snapshot_id"],
|
||||
path=row["path"],
|
||||
kind=row["kind"],
|
||||
start_line=row["start_line"],
|
||||
end_line=row["end_line"],
|
||||
text=row["text"],
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user