Optional JSONPath query/extract support, FTS5 section/block search, mkt cache query and search. Local SQLite backend now supports parsed snapshot persistence, incremental refresh, cached querying, and ranked full-text search

This commit is contained in:
2026-05-04 10:32:06 +02:00
parent 36ff4cedab
commit 0015c8a385
11 changed files with 540 additions and 22 deletions

View File

@@ -36,6 +36,7 @@ from markitect_tool.backend.local_store import (
DEFAULT_LOCAL_INDEX_PATH,
LOCAL_INDEX_SCHEMA_VERSION,
LocalIndexBuildResult,
LocalSearchResult,
LocalSnapshotStore,
local_index_path_for,
)
@@ -70,6 +71,7 @@ __all__ = [
"DEFAULT_LOCAL_INDEX_PATH",
"LOCAL_INDEX_SCHEMA_VERSION",
"LocalIndexBuildResult",
"LocalSearchResult",
"LocalSnapshotStore",
"local_index_path_for",
]

View File

@@ -56,6 +56,24 @@ class LocalIndexBuildResult:
return data
@dataclass(frozen=True)
class LocalSearchResult:
"""One FTS search match from the local index."""
path: str
snapshot_id: str
unit_kind: str
unit_index: int
heading: str | None
text: str
rank: float
line_start: int | None = None
line_end: int | None = None
def to_dict(self) -> dict[str, Any]:
return {key: value for key, value in asdict(self).items() if value is not None}
class LocalSnapshotStore:
"""SQLite-backed local snapshot store for parsed Markdown documents."""
@@ -217,6 +235,7 @@ class LocalSnapshotStore:
return
with self._connect() as conn:
_create_schema(conn)
conn.execute("delete from search_units where path = ?", (path,))
conn.execute("delete from blocks where path = ?", (path,))
conn.execute("delete from sections where path = ?", (path,))
conn.execute("delete from headings where path = ?", (path,))
@@ -236,6 +255,45 @@ class LocalSnapshotStore:
raise KeyError(f"No indexed document `{path}`")
return json.loads(row["document_json"])
def search(self, query: str, *, limit: int = 20) -> list[LocalSearchResult]:
"""Search indexed section and block text with SQLite FTS5."""
if not query.strip():
raise ValueError("Search query cannot be empty")
if not self.path.exists():
return []
with self._connect() as conn:
_create_schema(conn)
try:
rows = conn.execute(
"""
select s.path, s.snapshot_id, s.unit_kind, s.unit_index,
s.heading, s.text, s.line_start, s.line_end,
bm25(search_units) as rank
from search_units s
where search_units match ?
order by rank
limit ?
""",
(query, limit),
).fetchall()
except sqlite3.OperationalError as exc:
raise ValueError(f"Invalid FTS query `{query}`: {exc}") from exc
return [
LocalSearchResult(
path=row["path"],
snapshot_id=row["snapshot_id"],
unit_kind=row["unit_kind"],
unit_index=row["unit_index"],
heading=row["heading"],
text=row["text"],
line_start=row["line_start"],
line_end=row["line_end"],
rank=float(row["rank"]),
)
for row in rows
]
def build(
self,
paths: list[str | Path],
@@ -382,6 +440,16 @@ def _create_schema(conn: sqlite3.Connection) -> None:
target_snapshot_id text,
metadata_json text not null default '{}'
);
create virtual table if not exists search_units using fts5(
path unindexed,
snapshot_id unindexed,
unit_kind unindexed,
unit_index unindexed,
heading,
text,
line_start unindexed,
line_end unindexed
);
create index if not exists idx_sources_content_hash on sources(content_hash);
create index if not exists idx_sources_snapshot_id on sources(snapshot_id);
create index if not exists idx_sources_parser on sources(parser, parser_version);
@@ -402,6 +470,7 @@ def _replace_document_units(
conn.execute("delete from blocks where path = ?", (path,))
conn.execute("delete from sections where path = ?", (path,))
conn.execute("delete from headings where path = ?", (path,))
conn.execute("delete from search_units where path = ?", (path,))
for idx, heading in enumerate(document.get("headings", [])):
conn.execute(
"""
@@ -441,6 +510,22 @@ def _replace_document_units(
line_end,
),
)
conn.execute(
"""
insert into search_units(
path, snapshot_id, unit_kind, unit_index, heading, text, line_start, line_end
) values (?, ?, 'section', ?, ?, ?, ?, ?)
""",
(
path,
snapshot_id,
idx,
str(heading["text"]),
text,
line_start,
line_end,
),
)
for idx, block in enumerate(document.get("blocks", [])):
conn.execute(
"""
@@ -459,6 +544,22 @@ def _replace_document_units(
block.get("heading_level"),
),
)
conn.execute(
"""
insert into search_units(
path, snapshot_id, unit_kind, unit_index, heading, text, line_start, line_end
) values (?, ?, 'block', ?, ?, ?, ?, ?)
""",
(
path,
snapshot_id,
idx,
None,
str(block.get("text", "")),
block.get("line_start"),
block.get("line_end"),
),
)
def _load_dependencies(conn: sqlite3.Connection) -> dict[str, list[DependencyEdge]]: