first steps to better scanning of repos

This commit is contained in:
2026-05-02 00:11:55 +02:00
parent 2c427d253c
commit 89c4081001
9 changed files with 270 additions and 35 deletions

View File

@@ -180,6 +180,14 @@ class RegistryStore:
)
"""
)
columns = {
row["name"]
for row in connection.execute("PRAGMA table_info(content_chunks)").fetchall()
}
if "metadata" not in columns:
connection.execute(
"ALTER TABLE content_chunks ADD COLUMN metadata TEXT NOT NULL DEFAULT '{}'"
)
connection.execute(
"CREATE INDEX IF NOT EXISTS idx_content_chunks_repository ON content_chunks(repository_id)"
)
@@ -1675,8 +1683,8 @@ class RegistryStore:
"""
INSERT INTO content_chunks
(repository_id, analysis_run_id, snapshot_id, path, kind,
start_line, end_line, text)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
start_line, end_line, text, metadata)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
[
(
@@ -1688,6 +1696,7 @@ class RegistryStore:
chunk.start_line,
chunk.end_line,
chunk.text,
json.dumps(chunk.metadata),
)
for chunk in chunks
],
@@ -1709,7 +1718,7 @@ class RegistryStore:
rows = connection.execute(
f"""
SELECT id, repository_id, analysis_run_id, snapshot_id, path, kind,
start_line, end_line, text
start_line, end_line, text, metadata
FROM content_chunks
{where}
ORDER BY path ASC, start_line ASC, id ASC
@@ -2842,6 +2851,7 @@ class RegistryStore:
start_line=row["start_line"],
end_line=row["end_line"],
text=row["text"],
metadata=json.loads(row["metadata"]),
)
@staticmethod