generated from coulomb/repo-seed
Extensible canonical internal processing refactoring
This commit is contained in:
64
src/markitect_tool/cli/extensions.py
Normal file
64
src/markitect_tool/cli/extensions.py
Normal file
@@ -0,0 +1,64 @@
|
||||
"""CLI extension specifications derived from internal extension descriptors."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
|
||||
from markitect_tool.extension import ExtensionDescriptor, ExtensionRegistry
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CliCommandSpec:
|
||||
"""Inspectable command affordance declared by an extension."""
|
||||
|
||||
command: str
|
||||
extension_id: str
|
||||
kind: str
|
||||
summary: str | None = None
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
data = {
|
||||
"command": self.command,
|
||||
"extension_id": self.extension_id,
|
||||
"kind": self.kind,
|
||||
"summary": self.summary,
|
||||
"metadata": self.metadata,
|
||||
}
|
||||
return {
|
||||
key: value
|
||||
for key, value in data.items()
|
||||
if value not in (None, {}, [])
|
||||
}
|
||||
|
||||
|
||||
def command_specs_from_extension(descriptor: ExtensionDescriptor) -> list[CliCommandSpec]:
|
||||
"""Return CLI command specs declared by one extension descriptor."""
|
||||
|
||||
raw_commands = descriptor.cli.get("commands", [])
|
||||
if isinstance(raw_commands, str):
|
||||
raw_commands = [raw_commands]
|
||||
return [
|
||||
CliCommandSpec(
|
||||
command=str(command),
|
||||
extension_id=descriptor.id,
|
||||
kind=descriptor.kind,
|
||||
summary=descriptor.summary,
|
||||
metadata={
|
||||
key: value
|
||||
for key, value in descriptor.cli.items()
|
||||
if key != "commands"
|
||||
},
|
||||
)
|
||||
for command in raw_commands
|
||||
]
|
||||
|
||||
|
||||
def collect_cli_command_specs(registry: ExtensionRegistry) -> list[CliCommandSpec]:
|
||||
"""Collect CLI affordances from a registry of extension descriptors."""
|
||||
|
||||
specs: list[CliCommandSpec] = []
|
||||
for descriptor in registry.list():
|
||||
specs.extend(command_specs_from_extension(descriptor))
|
||||
return sorted(specs, key=lambda spec: (spec.command, spec.extension_id))
|
||||
56
src/markitect_tool/extension/__init__.py
Normal file
56
src/markitect_tool/extension/__init__.py
Normal file
@@ -0,0 +1,56 @@
|
||||
"""Internal extension framework primitives."""
|
||||
|
||||
from markitect_tool.extension.processing import (
|
||||
ProcessingCapability,
|
||||
ProcessingContext,
|
||||
ProcessingDiagnostic,
|
||||
ProcessingProvenance,
|
||||
ProcessingRequest,
|
||||
ProcessingResult,
|
||||
ProcessingTrace,
|
||||
)
|
||||
from markitect_tool.extension.execution import (
|
||||
AfterCallback,
|
||||
BeforeCallback,
|
||||
ExtensionExecutor,
|
||||
ExtensionLifecycle,
|
||||
ExtensionRunner,
|
||||
)
|
||||
from markitect_tool.extension.registry import (
|
||||
ExtensionDependencyCheck,
|
||||
ExtensionDescriptor,
|
||||
ExtensionRegistry,
|
||||
ExtensionRegistryError,
|
||||
OptionalDependency,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"ProcessingCapability",
|
||||
"ProcessingContext",
|
||||
"ProcessingDiagnostic",
|
||||
"ProcessingProvenance",
|
||||
"ProcessingRequest",
|
||||
"ProcessingResult",
|
||||
"ProcessingTrace",
|
||||
"ExtensionDependencyCheck",
|
||||
"ExtensionDescriptor",
|
||||
"ExtensionRegistry",
|
||||
"ExtensionRegistryError",
|
||||
"OptionalDependency",
|
||||
"AfterCallback",
|
||||
"BeforeCallback",
|
||||
"ExtensionExecutor",
|
||||
"ExtensionLifecycle",
|
||||
"ExtensionRunner",
|
||||
]
|
||||
|
||||
|
||||
def builtin_extension_registry():
|
||||
"""Return built-in extension descriptors without import-cycle pressure."""
|
||||
|
||||
from markitect_tool.extension.builtins import builtin_extension_registry as _registry
|
||||
|
||||
return _registry()
|
||||
|
||||
|
||||
__all__.append("builtin_extension_registry")
|
||||
92
src/markitect_tool/extension/builtins.py
Normal file
92
src/markitect_tool/extension/builtins.py
Normal file
@@ -0,0 +1,92 @@
|
||||
"""Built-in internal extension descriptors."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from markitect_tool.extension.registry import ExtensionDescriptor, ExtensionRegistry
|
||||
from markitect_tool.extension.processing import ProcessingCapability
|
||||
from markitect_tool.query import default_query_engine_registry
|
||||
|
||||
|
||||
def builtin_extension_registry() -> ExtensionRegistry:
|
||||
"""Return descriptors for built-in Markitect extensions."""
|
||||
|
||||
registry = default_query_engine_registry().extension_registry()
|
||||
for descriptor in _processor_descriptors() + [_local_sqlite_backend_descriptor()]:
|
||||
registry.register(descriptor)
|
||||
return registry
|
||||
|
||||
|
||||
def _processor_descriptors() -> list[ExtensionDescriptor]:
|
||||
return [
|
||||
ExtensionDescriptor(
|
||||
id="processor.identity",
|
||||
kind="processor",
|
||||
summary="Return fenced block content unchanged.",
|
||||
capabilities=[
|
||||
ProcessingCapability(id="processor", kind="execute"),
|
||||
ProcessingCapability(id="deterministic", kind="execution"),
|
||||
],
|
||||
input_contract="ProcessorRequest",
|
||||
output_contract="ProcessorResult",
|
||||
diagnostics_namespace="processor",
|
||||
provenance_prefix="processor.identity",
|
||||
cli={"commands": ["mkt process"]},
|
||||
docs=["docs/processors.md"],
|
||||
),
|
||||
ExtensionDescriptor(
|
||||
id="processor.uppercase",
|
||||
kind="processor",
|
||||
summary="Uppercase fenced block content deterministically.",
|
||||
capabilities=[
|
||||
ProcessingCapability(id="processor", kind="execute"),
|
||||
ProcessingCapability(id="deterministic", kind="execution"),
|
||||
],
|
||||
input_contract="ProcessorRequest",
|
||||
output_contract="ProcessorResult",
|
||||
diagnostics_namespace="processor",
|
||||
provenance_prefix="processor.uppercase",
|
||||
cli={"commands": ["mkt process"]},
|
||||
docs=["docs/processors.md"],
|
||||
),
|
||||
ExtensionDescriptor(
|
||||
id="processor.include",
|
||||
kind="processor",
|
||||
summary="Resolve a content reference into fenced block output.",
|
||||
capabilities=[
|
||||
ProcessingCapability(id="processor", kind="execute"),
|
||||
ProcessingCapability(id="references", kind="read"),
|
||||
ProcessingCapability(id="filesystem", kind="read"),
|
||||
],
|
||||
safety={"reads_files": True, "writes_files": False, "network": False},
|
||||
input_contract="ProcessorRequest",
|
||||
output_contract="ProcessorResult",
|
||||
diagnostics_namespace="processor",
|
||||
provenance_prefix="processor.include",
|
||||
cli={"commands": ["mkt process"]},
|
||||
docs=["docs/processors.md", "docs/content-references.md"],
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def _local_sqlite_backend_descriptor() -> ExtensionDescriptor:
|
||||
return ExtensionDescriptor(
|
||||
id="backend.local-sqlite",
|
||||
kind="backend",
|
||||
summary="Local SQLite snapshot, metadata, JSON, and FTS5 index backend.",
|
||||
capabilities=[
|
||||
ProcessingCapability(id="snapshots", kind="backend"),
|
||||
ProcessingCapability(id="ast", kind="backend"),
|
||||
ProcessingCapability(id="json", kind="backend"),
|
||||
ProcessingCapability(id="fts", kind="backend"),
|
||||
ProcessingCapability(id="sql", kind="backend"),
|
||||
ProcessingCapability(id="provenance", kind="backend"),
|
||||
],
|
||||
safety={"reads_files": True, "writes_local_cache": True, "network": False},
|
||||
input_contract="Markdown files/directories",
|
||||
output_contract="SQLite snapshot/index store",
|
||||
diagnostics_namespace="backend.local_sqlite",
|
||||
provenance_prefix="local_snapshot_store",
|
||||
cli={"commands": ["mkt cache init", "mkt cache index", "mkt cache query", "mkt search"]},
|
||||
docs=["docs/local-index-backend.md", "docs/backend-fabric.md"],
|
||||
examples=["examples/backends/local-sqlite-backend.md"],
|
||||
)
|
||||
98
src/markitect_tool/extension/execution.py
Normal file
98
src/markitect_tool/extension/execution.py
Normal file
@@ -0,0 +1,98 @@
|
||||
"""Execution lifecycle for internal extensions."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Callable
|
||||
|
||||
from markitect_tool.extension.processing import (
|
||||
ProcessingRequest,
|
||||
ProcessingResult,
|
||||
ProcessingTrace,
|
||||
)
|
||||
from markitect_tool.extension.registry import (
|
||||
ExtensionDescriptor,
|
||||
ExtensionRegistry,
|
||||
ExtensionRegistryError,
|
||||
)
|
||||
|
||||
|
||||
ExtensionRunner = Callable[[ProcessingRequest], ProcessingResult]
|
||||
BeforeCallback = Callable[[ExtensionDescriptor, ProcessingRequest], None]
|
||||
AfterCallback = Callable[[ExtensionDescriptor, ProcessingRequest, ProcessingResult], None]
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExtensionLifecycle:
|
||||
"""Explicit callbacks around extension execution."""
|
||||
|
||||
before: list[BeforeCallback] = field(default_factory=list)
|
||||
after_success: list[AfterCallback] = field(default_factory=list)
|
||||
after_failure: list[AfterCallback] = field(default_factory=list)
|
||||
after: list[AfterCallback] = field(default_factory=list)
|
||||
|
||||
def on_before(self, callback: BeforeCallback) -> None:
|
||||
self.before.append(callback)
|
||||
|
||||
def on_success(self, callback: AfterCallback) -> None:
|
||||
self.after_success.append(callback)
|
||||
|
||||
def on_failure(self, callback: AfterCallback) -> None:
|
||||
self.after_failure.append(callback)
|
||||
|
||||
def on_after(self, callback: AfterCallback) -> None:
|
||||
self.after.append(callback)
|
||||
|
||||
|
||||
class ExtensionExecutor:
|
||||
"""Execute registered extensions with deterministic lifecycle callbacks."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
registry: ExtensionRegistry,
|
||||
*,
|
||||
lifecycle: ExtensionLifecycle | None = None,
|
||||
) -> None:
|
||||
self.registry = registry
|
||||
self.lifecycle = lifecycle or ExtensionLifecycle()
|
||||
|
||||
def execute(self, extension_id: str, request: ProcessingRequest) -> ProcessingResult:
|
||||
descriptor = self.registry.get(extension_id)
|
||||
dependency_check = self.registry.check_dependencies(extension_id)
|
||||
if not dependency_check.compatible:
|
||||
return ProcessingResult.from_error(
|
||||
code="extension.missing_dependency",
|
||||
message=f"Extension `{extension_id}` is missing required dependencies.",
|
||||
details=dependency_check.to_dict(),
|
||||
)
|
||||
runner = descriptor.instantiate()
|
||||
if not callable(runner):
|
||||
raise ExtensionRegistryError(f"Extension `{extension_id}` factory did not return a callable")
|
||||
|
||||
for callback in self.lifecycle.before:
|
||||
callback(descriptor, request)
|
||||
|
||||
result = runner(request)
|
||||
if not isinstance(result, ProcessingResult):
|
||||
raise ExtensionRegistryError(
|
||||
f"Extension `{extension_id}` returned {type(result).__name__}, expected ProcessingResult"
|
||||
)
|
||||
|
||||
result = _with_trace(result, ProcessingTrace(event="extension.executed", metadata={"id": extension_id}))
|
||||
callbacks = self.lifecycle.after_success if result.valid else self.lifecycle.after_failure
|
||||
for callback in callbacks:
|
||||
callback(descriptor, request, result)
|
||||
for callback in self.lifecycle.after:
|
||||
callback(descriptor, request, result)
|
||||
return result
|
||||
|
||||
|
||||
def _with_trace(result: ProcessingResult, trace: ProcessingTrace) -> ProcessingResult:
|
||||
return ProcessingResult(
|
||||
output=result.output,
|
||||
diagnostics=result.diagnostics,
|
||||
provenance=result.provenance,
|
||||
dependencies=result.dependencies,
|
||||
trace=[*result.trace, trace],
|
||||
metadata=result.metadata,
|
||||
)
|
||||
184
src/markitect_tool/extension/processing.py
Normal file
184
src/markitect_tool/extension/processing.py
Normal file
@@ -0,0 +1,184 @@
|
||||
"""Canonical processing envelopes for internal extensions."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from markitect_tool.diagnostics import Diagnostic, SourceLocation, has_error
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProcessingCapability:
|
||||
"""A declared capability or permission needed by an extension."""
|
||||
|
||||
id: str
|
||||
kind: str = "feature"
|
||||
required: bool = True
|
||||
description: str | None = None
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return _drop_empty(asdict(self))
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProcessingProvenance:
|
||||
"""Cross-extension provenance envelope."""
|
||||
|
||||
operation: str
|
||||
source_path: str | None = None
|
||||
snapshot_id: str | None = None
|
||||
content_hash: str | None = None
|
||||
dependencies: list[str] = field(default_factory=list)
|
||||
backend_id: str | None = None
|
||||
provider_id: str | None = None
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return _drop_empty(asdict(self))
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProcessingTrace:
|
||||
"""One optional execution trace event."""
|
||||
|
||||
event: str
|
||||
message: str | None = None
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return _drop_empty(asdict(self))
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProcessingContext:
|
||||
"""Shared execution context available to extension implementations."""
|
||||
|
||||
root: Path = Path(".")
|
||||
source_path: Path | None = None
|
||||
namespaces: dict[str, str] = field(default_factory=dict)
|
||||
variables: dict[str, Any] = field(default_factory=dict)
|
||||
policy: dict[str, Any] = field(default_factory=dict)
|
||||
backend_handles: dict[str, Any] = field(default_factory=dict, repr=False, compare=False)
|
||||
caller: str | None = None
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
data = {
|
||||
"root": str(self.root),
|
||||
"source_path": str(self.source_path) if self.source_path else None,
|
||||
"namespaces": self.namespaces,
|
||||
"variables": self.variables,
|
||||
"policy": self.policy,
|
||||
"backend_handles": sorted(self.backend_handles),
|
||||
"caller": self.caller,
|
||||
"metadata": self.metadata,
|
||||
}
|
||||
return _drop_empty(data)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProcessingRequest:
|
||||
"""Canonical request passed to an internal extension."""
|
||||
|
||||
operation: str
|
||||
input: Any
|
||||
context: ProcessingContext = field(default_factory=ProcessingContext)
|
||||
options: dict[str, Any] = field(default_factory=dict)
|
||||
scope: str | None = None
|
||||
capabilities: list[ProcessingCapability] = field(default_factory=list)
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def cache_key(self) -> str:
|
||||
payload = {
|
||||
"operation": self.operation,
|
||||
"input": self.input,
|
||||
"options": self.options,
|
||||
"scope": self.scope,
|
||||
"capabilities": [capability.to_dict() for capability in self.capabilities],
|
||||
"metadata": self.metadata,
|
||||
}
|
||||
return "processing:" + hashlib.sha256(
|
||||
json.dumps(payload, sort_keys=True, ensure_ascii=False, default=str).encode("utf-8")
|
||||
).hexdigest()
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
data = {
|
||||
"operation": self.operation,
|
||||
"input": self.input,
|
||||
"context": self.context.to_dict(),
|
||||
"options": self.options,
|
||||
"scope": self.scope,
|
||||
"capabilities": [capability.to_dict() for capability in self.capabilities],
|
||||
"metadata": self.metadata,
|
||||
"cache_key": self.cache_key,
|
||||
}
|
||||
return _drop_empty(data)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProcessingResult:
|
||||
"""Canonical result returned by an internal extension."""
|
||||
|
||||
output: Any = None
|
||||
diagnostics: list[Diagnostic] = field(default_factory=list)
|
||||
provenance: list[ProcessingProvenance] = field(default_factory=list)
|
||||
dependencies: list[str] = field(default_factory=list)
|
||||
trace: list[ProcessingTrace] = field(default_factory=list)
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def valid(self) -> bool:
|
||||
return not has_error(self.diagnostics)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
data = {
|
||||
"valid": self.valid,
|
||||
"output": self.output,
|
||||
"diagnostics": [diagnostic.to_dict() for diagnostic in self.diagnostics],
|
||||
"provenance": [event.to_dict() for event in self.provenance],
|
||||
"dependencies": self.dependencies,
|
||||
"trace": [event.to_dict() for event in self.trace],
|
||||
"metadata": self.metadata,
|
||||
}
|
||||
return _drop_empty(data)
|
||||
|
||||
@classmethod
|
||||
def from_error(
|
||||
cls,
|
||||
*,
|
||||
code: str,
|
||||
message: str,
|
||||
source_path: str | None = None,
|
||||
line: int | None = None,
|
||||
details: dict[str, Any] | None = None,
|
||||
) -> "ProcessingResult":
|
||||
return cls(
|
||||
diagnostics=[
|
||||
Diagnostic(
|
||||
severity="error",
|
||||
code=code,
|
||||
message=message,
|
||||
source=SourceLocation(path=source_path, line=line)
|
||||
if source_path or line
|
||||
else None,
|
||||
details=details or {},
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
ProcessingDiagnostic = Diagnostic
|
||||
|
||||
|
||||
def _drop_empty(data: dict[str, Any]) -> dict[str, Any]:
|
||||
return {
|
||||
key: value
|
||||
for key, value in data.items()
|
||||
if value not in (None, [], {}, "")
|
||||
}
|
||||
193
src/markitect_tool/extension/registry.py
Normal file
193
src/markitect_tool/extension/registry.py
Normal file
@@ -0,0 +1,193 @@
|
||||
"""Extension descriptors and registries."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from typing import Any, Callable, Iterable
|
||||
|
||||
from markitect_tool.extension.processing import ProcessingCapability
|
||||
|
||||
|
||||
ExtensionFactory = Callable[[], Any]
|
||||
|
||||
|
||||
class ExtensionRegistryError(ValueError):
|
||||
"""Raised when extension descriptors or registries are invalid."""
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class OptionalDependency:
|
||||
"""An optional runtime dependency declared by an extension."""
|
||||
|
||||
name: str
|
||||
package: str | None = None
|
||||
extra: str | None = None
|
||||
required: bool = False
|
||||
purpose: str | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return _drop_empty(asdict(self))
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ExtensionDescriptor:
|
||||
"""Inspectable descriptor for one internal extension."""
|
||||
|
||||
id: str
|
||||
kind: str
|
||||
version: str = "1"
|
||||
summary: str | None = None
|
||||
factory: ExtensionFactory | None = field(default=None, compare=False, repr=False)
|
||||
capabilities: list[ProcessingCapability] = field(default_factory=list)
|
||||
optional_dependencies: list[OptionalDependency] = field(default_factory=list)
|
||||
safety: dict[str, Any] = field(default_factory=dict)
|
||||
input_contract: str | None = None
|
||||
output_contract: str | None = None
|
||||
diagnostics_namespace: str | None = None
|
||||
provenance_prefix: str | None = None
|
||||
cli: dict[str, Any] = field(default_factory=dict)
|
||||
docs: list[str] = field(default_factory=list)
|
||||
examples: list[str] = field(default_factory=list)
|
||||
metadata: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if not self.id.strip():
|
||||
raise ExtensionRegistryError("Extension id cannot be empty")
|
||||
if not self.kind.strip():
|
||||
raise ExtensionRegistryError("Extension kind cannot be empty")
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
data = {
|
||||
"id": self.id,
|
||||
"kind": self.kind,
|
||||
"version": self.version,
|
||||
"summary": self.summary,
|
||||
"capabilities": [capability.to_dict() for capability in self.capabilities],
|
||||
"optional_dependencies": [
|
||||
dependency.to_dict() for dependency in self.optional_dependencies
|
||||
],
|
||||
"safety": self.safety,
|
||||
"input_contract": self.input_contract,
|
||||
"output_contract": self.output_contract,
|
||||
"diagnostics_namespace": self.diagnostics_namespace,
|
||||
"provenance_prefix": self.provenance_prefix,
|
||||
"cli": self.cli,
|
||||
"docs": self.docs,
|
||||
"examples": self.examples,
|
||||
"metadata": self.metadata,
|
||||
}
|
||||
return _drop_empty(data)
|
||||
|
||||
def instantiate(self) -> Any:
|
||||
"""Create or return the implementation for this descriptor."""
|
||||
|
||||
if self.factory is None:
|
||||
raise ExtensionRegistryError(f"Extension `{self.id}` has no factory")
|
||||
return self.factory()
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ExtensionDependencyCheck:
|
||||
"""Result of checking required extension dependencies."""
|
||||
|
||||
extension_id: str
|
||||
missing: list[str] = field(default_factory=list)
|
||||
optional_missing: list[str] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def compatible(self) -> bool:
|
||||
return not self.missing
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"extension_id": self.extension_id,
|
||||
"compatible": self.compatible,
|
||||
"missing": self.missing,
|
||||
"optional_missing": self.optional_missing,
|
||||
}
|
||||
|
||||
|
||||
class ExtensionRegistry:
|
||||
"""Registry of internal extension descriptors."""
|
||||
|
||||
def __init__(self, descriptors: Iterable[ExtensionDescriptor] | None = None) -> None:
|
||||
self._descriptors: dict[str, ExtensionDescriptor] = {}
|
||||
for descriptor in descriptors or []:
|
||||
self.register(descriptor)
|
||||
|
||||
def register(self, descriptor: ExtensionDescriptor) -> None:
|
||||
if descriptor.id in self._descriptors:
|
||||
raise ExtensionRegistryError(f"Duplicate extension id `{descriptor.id}`")
|
||||
self._descriptors[descriptor.id] = descriptor
|
||||
|
||||
def get(self, extension_id: str) -> ExtensionDescriptor:
|
||||
try:
|
||||
return self._descriptors[extension_id]
|
||||
except KeyError as exc:
|
||||
raise ExtensionRegistryError(f"Unknown extension `{extension_id}`") from exc
|
||||
|
||||
def list(self, *, kind: str | None = None) -> list[ExtensionDescriptor]:
|
||||
descriptors = [self._descriptors[key] for key in sorted(self._descriptors)]
|
||||
if kind is None:
|
||||
return descriptors
|
||||
return [descriptor for descriptor in descriptors if descriptor.kind == kind]
|
||||
|
||||
def require_capability(self, capability_id: str) -> list[ExtensionDescriptor]:
|
||||
return [
|
||||
descriptor
|
||||
for descriptor in self.list()
|
||||
if any(capability.id == capability_id for capability in descriptor.capabilities)
|
||||
]
|
||||
|
||||
def check_dependencies(
|
||||
self,
|
||||
extension_id: str,
|
||||
*,
|
||||
available_modules: set[str] | None = None,
|
||||
) -> ExtensionDependencyCheck:
|
||||
descriptor = self.get(extension_id)
|
||||
available = (
|
||||
available_modules
|
||||
if available_modules is not None
|
||||
else _available_modules(
|
||||
dependency.name for dependency in descriptor.optional_dependencies
|
||||
)
|
||||
)
|
||||
missing: list[str] = []
|
||||
optional_missing: list[str] = []
|
||||
for dependency in descriptor.optional_dependencies:
|
||||
if dependency.name in available:
|
||||
continue
|
||||
if dependency.required:
|
||||
missing.append(dependency.name)
|
||||
else:
|
||||
optional_missing.append(dependency.name)
|
||||
return ExtensionDependencyCheck(
|
||||
extension_id=extension_id,
|
||||
missing=missing,
|
||||
optional_missing=optional_missing,
|
||||
)
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"count": len(self._descriptors),
|
||||
"extensions": [descriptor.to_dict() for descriptor in self.list()],
|
||||
}
|
||||
|
||||
|
||||
def _available_modules(module_names: Iterable[str]) -> set[str]:
|
||||
import importlib.util
|
||||
|
||||
return {
|
||||
module_name
|
||||
for module_name in module_names
|
||||
if importlib.util.find_spec(module_name) is not None
|
||||
}
|
||||
|
||||
|
||||
def _drop_empty(data: dict[str, Any]) -> dict[str, Any]:
|
||||
return {
|
||||
key: value
|
||||
for key, value in data.items()
|
||||
if value not in (None, [], {}, "")
|
||||
}
|
||||
@@ -5,8 +5,15 @@ from markitect_tool.query.engine import (
|
||||
QueryMatch,
|
||||
extract_document,
|
||||
extract_document_jsonpath,
|
||||
extract_document_with_engine,
|
||||
query_document,
|
||||
query_document_jsonpath,
|
||||
query_document_with_engine,
|
||||
)
|
||||
from markitect_tool.query.registry import (
|
||||
QueryEngine,
|
||||
QueryEngineRegistry,
|
||||
default_query_engine_registry,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
@@ -14,6 +21,11 @@ __all__ = [
|
||||
"QueryMatch",
|
||||
"extract_document",
|
||||
"extract_document_jsonpath",
|
||||
"extract_document_with_engine",
|
||||
"query_document",
|
||||
"query_document_jsonpath",
|
||||
"query_document_with_engine",
|
||||
"QueryEngine",
|
||||
"QueryEngineRegistry",
|
||||
"default_query_engine_registry",
|
||||
]
|
||||
|
||||
@@ -44,6 +44,29 @@ class _Selector:
|
||||
def query_document(document: Document, selector: str) -> list[QueryMatch]:
|
||||
"""Query a parsed document with a small Markitect selector."""
|
||||
|
||||
return query_document_with_engine(document, selector, engine="selector")
|
||||
|
||||
|
||||
def query_document_with_engine(
|
||||
document: Document,
|
||||
selector: str,
|
||||
*,
|
||||
engine: str = "selector",
|
||||
) -> list[QueryMatch]:
|
||||
"""Query a parsed document through a registered query engine."""
|
||||
|
||||
from markitect_tool.query.registry import default_query_engine_registry
|
||||
|
||||
try:
|
||||
query_engine = default_query_engine_registry().get(engine)
|
||||
except ValueError as exc:
|
||||
raise InvalidQueryError(str(exc)) from exc
|
||||
return query_engine.query(document, selector)
|
||||
|
||||
|
||||
def _query_document_selector(document: Document, selector: str) -> list[QueryMatch]:
|
||||
"""Query a parsed document with the built-in selector engine."""
|
||||
|
||||
parsed = _parse_selector(selector)
|
||||
if parsed.target in {"document", "$", "."}:
|
||||
return [QueryMatch(kind="document", path="$", value=document.to_dict())]
|
||||
@@ -67,6 +90,12 @@ def query_document_jsonpath(document: Document, expression: str) -> list[QueryMa
|
||||
remains dependency-light. Install ``markitect-tool[query]`` to enable it.
|
||||
"""
|
||||
|
||||
return query_document_with_engine(document, expression, engine="jsonpath")
|
||||
|
||||
|
||||
def _query_document_jsonpath(document: Document, expression: str) -> list[QueryMatch]:
|
||||
"""Implementation for the registered optional JSONPath engine."""
|
||||
|
||||
try:
|
||||
from jsonpath_ng.ext import parse as parse_jsonpath
|
||||
except ImportError as exc: # pragma: no cover - branch depends on env deps
|
||||
@@ -110,14 +139,29 @@ def extract_document(document: Document, selector: str) -> list[str]:
|
||||
return extracted
|
||||
|
||||
|
||||
def extract_document_with_engine(
|
||||
document: Document,
|
||||
selector: str,
|
||||
*,
|
||||
engine: str = "selector",
|
||||
) -> list[str]:
|
||||
"""Extract textual query matches through a registered query engine."""
|
||||
|
||||
extracted: list[str] = []
|
||||
for match in query_document_with_engine(document, selector, engine=engine):
|
||||
if match.text is not None:
|
||||
extracted.append(match.text)
|
||||
elif isinstance(match.value, str):
|
||||
extracted.append(match.value)
|
||||
elif isinstance(match.value, int | float | bool):
|
||||
extracted.append(str(match.value))
|
||||
return extracted
|
||||
|
||||
|
||||
def extract_document_jsonpath(document: Document, expression: str) -> list[str]:
|
||||
"""Extract textual JSONPath matches from a parsed document."""
|
||||
|
||||
extracted: list[str] = []
|
||||
for match in query_document_jsonpath(document, expression):
|
||||
if match.text is not None:
|
||||
extracted.append(match.text)
|
||||
return extracted
|
||||
return extract_document_with_engine(document, expression, engine="jsonpath")
|
||||
|
||||
|
||||
def _parse_selector(selector: str) -> _Selector:
|
||||
|
||||
105
src/markitect_tool/query/registry.py
Normal file
105
src/markitect_tool/query/registry.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""Query engine registry adapters."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Callable
|
||||
|
||||
from markitect_tool.core import Document
|
||||
from markitect_tool.extension import (
|
||||
ExtensionDescriptor,
|
||||
ExtensionRegistry,
|
||||
OptionalDependency,
|
||||
ProcessingCapability,
|
||||
)
|
||||
from markitect_tool.query.engine import QueryMatch
|
||||
|
||||
|
||||
QueryCallable = Callable[[Document, str], list[QueryMatch]]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class QueryEngine:
|
||||
"""Registered query engine implementation."""
|
||||
|
||||
descriptor: ExtensionDescriptor
|
||||
query: QueryCallable
|
||||
|
||||
|
||||
class QueryEngineRegistry:
|
||||
"""Registry of query engines keyed by short engine id."""
|
||||
|
||||
def __init__(self, engines: list[QueryEngine] | None = None) -> None:
|
||||
self._engines: dict[str, QueryEngine] = {}
|
||||
for engine in engines or []:
|
||||
self.register(engine)
|
||||
|
||||
def register(self, engine: QueryEngine) -> None:
|
||||
if engine.descriptor.id in self._engines:
|
||||
raise ValueError(f"Duplicate query engine `{engine.descriptor.id}`")
|
||||
self._engines[engine.descriptor.id] = engine
|
||||
|
||||
def get(self, engine_id: str) -> QueryEngine:
|
||||
try:
|
||||
return self._engines[engine_id]
|
||||
except KeyError as exc:
|
||||
raise ValueError(f"Unknown query engine `{engine_id}`") from exc
|
||||
|
||||
def list(self) -> list[QueryEngine]:
|
||||
return [self._engines[key] for key in sorted(self._engines)]
|
||||
|
||||
def extension_registry(self) -> ExtensionRegistry:
|
||||
return ExtensionRegistry(engine.descriptor for engine in self.list())
|
||||
|
||||
|
||||
def default_query_engine_registry() -> QueryEngineRegistry:
|
||||
"""Return the built-in query engine registry."""
|
||||
|
||||
from markitect_tool.query.engine import (
|
||||
_query_document_jsonpath,
|
||||
_query_document_selector,
|
||||
)
|
||||
|
||||
return QueryEngineRegistry(
|
||||
[
|
||||
QueryEngine(
|
||||
descriptor=ExtensionDescriptor(
|
||||
id="selector",
|
||||
kind="query-engine",
|
||||
summary="Compact Markitect selector engine.",
|
||||
capabilities=[ProcessingCapability(id="ast", kind="read")],
|
||||
input_contract="Document + selector",
|
||||
output_contract="QueryMatch[]",
|
||||
diagnostics_namespace="query",
|
||||
provenance_prefix="query.selector",
|
||||
cli={"commands": ["mkt query", "mkt extract", "mkt cache query"]},
|
||||
docs=["docs/query-extraction.md"],
|
||||
),
|
||||
query=_query_document_selector,
|
||||
),
|
||||
QueryEngine(
|
||||
descriptor=ExtensionDescriptor(
|
||||
id="jsonpath",
|
||||
kind="query-engine",
|
||||
summary="Optional JSONPath engine over Document.to_dict().",
|
||||
capabilities=[ProcessingCapability(id="ast", kind="read")],
|
||||
optional_dependencies=[
|
||||
OptionalDependency(
|
||||
name="jsonpath_ng",
|
||||
package="jsonpath-ng",
|
||||
extra="query",
|
||||
required=True,
|
||||
purpose="Evaluate JSONPath expressions.",
|
||||
)
|
||||
],
|
||||
input_contract="Document + JSONPath expression",
|
||||
output_contract="QueryMatch[]",
|
||||
diagnostics_namespace="query.jsonpath",
|
||||
provenance_prefix="query.jsonpath",
|
||||
cli={"commands": ["mkt query --engine jsonpath", "mkt extract --engine jsonpath"]},
|
||||
docs=["docs/query-extraction.md", "docs/local-index-backend.md"],
|
||||
),
|
||||
query=_query_document_jsonpath,
|
||||
),
|
||||
]
|
||||
)
|
||||
Reference in New Issue
Block a user