generated from coulomb/repo-seed
Extensible canonical internal processing refactoring
This commit is contained in:
@@ -5,8 +5,15 @@ from markitect_tool.query.engine import (
|
||||
QueryMatch,
|
||||
extract_document,
|
||||
extract_document_jsonpath,
|
||||
extract_document_with_engine,
|
||||
query_document,
|
||||
query_document_jsonpath,
|
||||
query_document_with_engine,
|
||||
)
|
||||
from markitect_tool.query.registry import (
|
||||
QueryEngine,
|
||||
QueryEngineRegistry,
|
||||
default_query_engine_registry,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
@@ -14,6 +21,11 @@ __all__ = [
|
||||
"QueryMatch",
|
||||
"extract_document",
|
||||
"extract_document_jsonpath",
|
||||
"extract_document_with_engine",
|
||||
"query_document",
|
||||
"query_document_jsonpath",
|
||||
"query_document_with_engine",
|
||||
"QueryEngine",
|
||||
"QueryEngineRegistry",
|
||||
"default_query_engine_registry",
|
||||
]
|
||||
|
||||
@@ -44,6 +44,29 @@ class _Selector:
|
||||
def query_document(document: Document, selector: str) -> list[QueryMatch]:
|
||||
"""Query a parsed document with a small Markitect selector."""
|
||||
|
||||
return query_document_with_engine(document, selector, engine="selector")
|
||||
|
||||
|
||||
def query_document_with_engine(
|
||||
document: Document,
|
||||
selector: str,
|
||||
*,
|
||||
engine: str = "selector",
|
||||
) -> list[QueryMatch]:
|
||||
"""Query a parsed document through a registered query engine."""
|
||||
|
||||
from markitect_tool.query.registry import default_query_engine_registry
|
||||
|
||||
try:
|
||||
query_engine = default_query_engine_registry().get(engine)
|
||||
except ValueError as exc:
|
||||
raise InvalidQueryError(str(exc)) from exc
|
||||
return query_engine.query(document, selector)
|
||||
|
||||
|
||||
def _query_document_selector(document: Document, selector: str) -> list[QueryMatch]:
|
||||
"""Query a parsed document with the built-in selector engine."""
|
||||
|
||||
parsed = _parse_selector(selector)
|
||||
if parsed.target in {"document", "$", "."}:
|
||||
return [QueryMatch(kind="document", path="$", value=document.to_dict())]
|
||||
@@ -67,6 +90,12 @@ def query_document_jsonpath(document: Document, expression: str) -> list[QueryMa
|
||||
remains dependency-light. Install ``markitect-tool[query]`` to enable it.
|
||||
"""
|
||||
|
||||
return query_document_with_engine(document, expression, engine="jsonpath")
|
||||
|
||||
|
||||
def _query_document_jsonpath(document: Document, expression: str) -> list[QueryMatch]:
|
||||
"""Implementation for the registered optional JSONPath engine."""
|
||||
|
||||
try:
|
||||
from jsonpath_ng.ext import parse as parse_jsonpath
|
||||
except ImportError as exc: # pragma: no cover - branch depends on env deps
|
||||
@@ -110,14 +139,29 @@ def extract_document(document: Document, selector: str) -> list[str]:
|
||||
return extracted
|
||||
|
||||
|
||||
def extract_document_with_engine(
|
||||
document: Document,
|
||||
selector: str,
|
||||
*,
|
||||
engine: str = "selector",
|
||||
) -> list[str]:
|
||||
"""Extract textual query matches through a registered query engine."""
|
||||
|
||||
extracted: list[str] = []
|
||||
for match in query_document_with_engine(document, selector, engine=engine):
|
||||
if match.text is not None:
|
||||
extracted.append(match.text)
|
||||
elif isinstance(match.value, str):
|
||||
extracted.append(match.value)
|
||||
elif isinstance(match.value, int | float | bool):
|
||||
extracted.append(str(match.value))
|
||||
return extracted
|
||||
|
||||
|
||||
def extract_document_jsonpath(document: Document, expression: str) -> list[str]:
|
||||
"""Extract textual JSONPath matches from a parsed document."""
|
||||
|
||||
extracted: list[str] = []
|
||||
for match in query_document_jsonpath(document, expression):
|
||||
if match.text is not None:
|
||||
extracted.append(match.text)
|
||||
return extracted
|
||||
return extract_document_with_engine(document, expression, engine="jsonpath")
|
||||
|
||||
|
||||
def _parse_selector(selector: str) -> _Selector:
|
||||
|
||||
105
src/markitect_tool/query/registry.py
Normal file
105
src/markitect_tool/query/registry.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""Query engine registry adapters."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Callable
|
||||
|
||||
from markitect_tool.core import Document
|
||||
from markitect_tool.extension import (
|
||||
ExtensionDescriptor,
|
||||
ExtensionRegistry,
|
||||
OptionalDependency,
|
||||
ProcessingCapability,
|
||||
)
|
||||
from markitect_tool.query.engine import QueryMatch
|
||||
|
||||
|
||||
QueryCallable = Callable[[Document, str], list[QueryMatch]]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class QueryEngine:
|
||||
"""Registered query engine implementation."""
|
||||
|
||||
descriptor: ExtensionDescriptor
|
||||
query: QueryCallable
|
||||
|
||||
|
||||
class QueryEngineRegistry:
|
||||
"""Registry of query engines keyed by short engine id."""
|
||||
|
||||
def __init__(self, engines: list[QueryEngine] | None = None) -> None:
|
||||
self._engines: dict[str, QueryEngine] = {}
|
||||
for engine in engines or []:
|
||||
self.register(engine)
|
||||
|
||||
def register(self, engine: QueryEngine) -> None:
|
||||
if engine.descriptor.id in self._engines:
|
||||
raise ValueError(f"Duplicate query engine `{engine.descriptor.id}`")
|
||||
self._engines[engine.descriptor.id] = engine
|
||||
|
||||
def get(self, engine_id: str) -> QueryEngine:
|
||||
try:
|
||||
return self._engines[engine_id]
|
||||
except KeyError as exc:
|
||||
raise ValueError(f"Unknown query engine `{engine_id}`") from exc
|
||||
|
||||
def list(self) -> list[QueryEngine]:
|
||||
return [self._engines[key] for key in sorted(self._engines)]
|
||||
|
||||
def extension_registry(self) -> ExtensionRegistry:
|
||||
return ExtensionRegistry(engine.descriptor for engine in self.list())
|
||||
|
||||
|
||||
def default_query_engine_registry() -> QueryEngineRegistry:
|
||||
"""Return the built-in query engine registry."""
|
||||
|
||||
from markitect_tool.query.engine import (
|
||||
_query_document_jsonpath,
|
||||
_query_document_selector,
|
||||
)
|
||||
|
||||
return QueryEngineRegistry(
|
||||
[
|
||||
QueryEngine(
|
||||
descriptor=ExtensionDescriptor(
|
||||
id="selector",
|
||||
kind="query-engine",
|
||||
summary="Compact Markitect selector engine.",
|
||||
capabilities=[ProcessingCapability(id="ast", kind="read")],
|
||||
input_contract="Document + selector",
|
||||
output_contract="QueryMatch[]",
|
||||
diagnostics_namespace="query",
|
||||
provenance_prefix="query.selector",
|
||||
cli={"commands": ["mkt query", "mkt extract", "mkt cache query"]},
|
||||
docs=["docs/query-extraction.md"],
|
||||
),
|
||||
query=_query_document_selector,
|
||||
),
|
||||
QueryEngine(
|
||||
descriptor=ExtensionDescriptor(
|
||||
id="jsonpath",
|
||||
kind="query-engine",
|
||||
summary="Optional JSONPath engine over Document.to_dict().",
|
||||
capabilities=[ProcessingCapability(id="ast", kind="read")],
|
||||
optional_dependencies=[
|
||||
OptionalDependency(
|
||||
name="jsonpath_ng",
|
||||
package="jsonpath-ng",
|
||||
extra="query",
|
||||
required=True,
|
||||
purpose="Evaluate JSONPath expressions.",
|
||||
)
|
||||
],
|
||||
input_contract="Document + JSONPath expression",
|
||||
output_contract="QueryMatch[]",
|
||||
diagnostics_namespace="query.jsonpath",
|
||||
provenance_prefix="query.jsonpath",
|
||||
cli={"commands": ["mkt query --engine jsonpath", "mkt extract --engine jsonpath"]},
|
||||
docs=["docs/query-extraction.md", "docs/local-index-backend.md"],
|
||||
),
|
||||
query=_query_document_jsonpath,
|
||||
),
|
||||
]
|
||||
)
|
||||
Reference in New Issue
Block a user