From 33fa602fe57cf2e531eef3716105752eb3d2694e Mon Sep 17 00:00:00 2001 From: tegwick Date: Mon, 4 May 2026 11:49:39 +0200 Subject: [PATCH] Extension framework optimization --- docs/extension-authoring.md | 52 +++++++++++++ docs/internal-extension-framework.md | 86 ++++++++++++++++++++++ src/markitect_tool/extension/execution.py | 15 +--- src/markitect_tool/extension/processing.py | 32 ++++++++ src/markitect_tool/extension/registry.py | 17 +++-- tests/test_extension_processing_model.py | 22 ++++++ tests/test_extension_registry.py | 2 + 7 files changed, 208 insertions(+), 18 deletions(-) diff --git a/docs/extension-authoring.md b/docs/extension-authoring.md index 9e36739..5281293 100644 --- a/docs/extension-authoring.md +++ b/docs/extension-authoring.md @@ -94,6 +94,58 @@ Subsystem-specific dataclasses may remain richer. The canonical model is the bridge that lets callbacks, registries, diagnostics, provenance, and future policy checks interact consistently. +### Minimal Runnable Extension + +```python +from markitect_tool.extension import ( + ExtensionDescriptor, + ExtensionExecutor, + ExtensionRegistry, + ProcessingRequest, + ProcessingResult, +) + + +def run_example(request: ProcessingRequest) -> ProcessingResult: + name = request.input.get("name", "world") + return ProcessingResult(output=f"Hello, {name}") + + +descriptor = ExtensionDescriptor( + id="example.hello", + kind="example", + summary="Small example extension.", + factory=lambda: run_example, +) + +registry = ExtensionRegistry([descriptor]) +result = ExtensionExecutor(registry).execute( + "example.hello", + ProcessingRequest(operation="example.hello", input={"name": "Markitect"}), +) +``` + +Use this executor boundary when callbacks, dependency checks, trace events, or +future policy checks matter. For tiny deterministic helpers, it is still fine to +keep the existing direct function API and expose a descriptor alongside it. + +### Cache-Key Rules + +`ProcessingRequest.cache_key` includes: + +- operation +- input +- stable context material +- options +- scope +- declared capabilities +- request metadata + +Stable context material includes source path, namespaces, variables, policy, and +metadata. It does not include workspace root, caller, or live backend handles. +This keeps cache keys portable while avoiding collisions for context-sensitive +operations. + ## Diagnostics Diagnostics should be: diff --git a/docs/internal-extension-framework.md b/docs/internal-extension-framework.md index 5bf51db..e77af7a 100644 --- a/docs/internal-extension-framework.md +++ b/docs/internal-extension-framework.md @@ -95,6 +95,63 @@ The canonical processing model should define a small set of shared envelopes: Subsystem-specific types may remain richer. The canonical model is the bridge, not a forced replacement for every local dataclass. +### Processing Request Example + +```python +from pathlib import Path + +from markitect_tool.extension import ProcessingContext, ProcessingRequest + +request = ProcessingRequest( + operation="query.selector", + input={"selector": "sections[heading=Decision]"}, + context=ProcessingContext( + source_path=Path("docs/adr.md"), + namespaces={"std": "standards"}, + variables={"audience": "internal"}, + ), + options={"format": "json"}, + scope="document", +) +``` + +The request cache key includes operation, input, options, scope, declared +capabilities, metadata, and stable context semantics: + +- source path +- namespaces +- variables +- policy +- metadata + +It intentionally excludes root path, caller name, and live backend handles. +Those are execution-environment details. If they matter semantically for an +extension, put explicit values in request options or metadata. + +### Processing Result Example + +```python +from markitect_tool.extension import ( + ProcessingProvenance, + ProcessingResult, + ProcessingTrace, +) + +result = ProcessingResult( + output={"count": 1}, + provenance=[ + ProcessingProvenance( + operation="query.selector", + source_path="docs/adr.md", + content_hash="sha256:...", + ) + ], +).with_trace(ProcessingTrace(event="query.done")) +``` + +`ProcessingResult.valid` is derived from diagnostics. Any diagnostic with +severity `error` makes the result invalid. + ## Registration Strategy Start with in-package registration: @@ -115,6 +172,35 @@ packages become a real requirement. See `docs/extension-authoring.md` for the extension authoring checklist and descriptor template. +### Registry Use + +Extension registries are optimized for common lookup patterns: + +- `registry.get("backend.local-sqlite")` +- `registry.list(kind="query-engine")` +- `registry.require_capability("fts")` +- `registry.check_dependencies("jsonpath")` + +Kinds and capabilities are indexed at registration time, so large registries can +avoid repeated full scans for basic discovery. + +### Execution Lifecycle + +`ExtensionExecutor` wraps a descriptor factory with deterministic lifecycle +hooks: + +1. Fetch descriptor. +2. Check required optional dependencies. +3. Instantiate callable implementation. +4. Run `before` callbacks. +5. Execute implementation. +6. Normalize result type. +7. Append `extension.executed` trace. +8. Run success or failure callbacks. +9. Run final `after` callbacks. + +Callbacks are explicit. The framework does not introduce hidden global behavior. + ## Compatibility Rules The refactor must preserve: diff --git a/src/markitect_tool/extension/execution.py b/src/markitect_tool/extension/execution.py index 5501abc..5413a9f 100644 --- a/src/markitect_tool/extension/execution.py +++ b/src/markitect_tool/extension/execution.py @@ -78,21 +78,12 @@ class ExtensionExecutor: f"Extension `{extension_id}` returned {type(result).__name__}, expected ProcessingResult" ) - result = _with_trace(result, ProcessingTrace(event="extension.executed", metadata={"id": extension_id})) + result = result.with_trace( + ProcessingTrace(event="extension.executed", metadata={"id": extension_id}) + ) callbacks = self.lifecycle.after_success if result.valid else self.lifecycle.after_failure for callback in callbacks: callback(descriptor, request, result) for callback in self.lifecycle.after: callback(descriptor, request, result) return result - - -def _with_trace(result: ProcessingResult, trace: ProcessingTrace) -> ProcessingResult: - return ProcessingResult( - output=result.output, - diagnostics=result.diagnostics, - provenance=result.provenance, - dependencies=result.dependencies, - trace=[*result.trace, trace], - metadata=result.metadata, - ) diff --git a/src/markitect_tool/extension/processing.py b/src/markitect_tool/extension/processing.py index 0af747a..de50603 100644 --- a/src/markitect_tool/extension/processing.py +++ b/src/markitect_tool/extension/processing.py @@ -80,6 +80,25 @@ class ProcessingContext: } return _drop_empty(data) + def cache_key_material(self) -> dict[str, Any]: + """Return stable context fields that may affect deterministic output. + + The workspace root, caller name, and backend handles are intentionally + excluded: they are execution environment details, not document semantics. + Extensions that need them in cache keys should include explicit values in + request options or metadata. + """ + + return _drop_empty( + { + "source_path": str(self.source_path) if self.source_path else None, + "namespaces": self.namespaces, + "variables": self.variables, + "policy": self.policy, + "metadata": self.metadata, + } + ) + @dataclass(frozen=True) class ProcessingRequest: @@ -98,6 +117,7 @@ class ProcessingRequest: payload = { "operation": self.operation, "input": self.input, + "context": self.context.cache_key_material(), "options": self.options, "scope": self.scope, "capabilities": [capability.to_dict() for capability in self.capabilities], @@ -148,6 +168,18 @@ class ProcessingResult: } return _drop_empty(data) + def with_trace(self, event: ProcessingTrace) -> "ProcessingResult": + """Return a copy with an additional trace event.""" + + return ProcessingResult( + output=self.output, + diagnostics=self.diagnostics, + provenance=self.provenance, + dependencies=self.dependencies, + trace=[*self.trace, event], + metadata=self.metadata, + ) + @classmethod def from_error( cls, diff --git a/src/markitect_tool/extension/registry.py b/src/markitect_tool/extension/registry.py index 05eac7d..ed4a849 100644 --- a/src/markitect_tool/extension/registry.py +++ b/src/markitect_tool/extension/registry.py @@ -112,6 +112,8 @@ class ExtensionRegistry: def __init__(self, descriptors: Iterable[ExtensionDescriptor] | None = None) -> None: self._descriptors: dict[str, ExtensionDescriptor] = {} + self._by_kind: dict[str, set[str]] = {} + self._by_capability: dict[str, set[str]] = {} for descriptor in descriptors or []: self.register(descriptor) @@ -119,6 +121,9 @@ class ExtensionRegistry: if descriptor.id in self._descriptors: raise ExtensionRegistryError(f"Duplicate extension id `{descriptor.id}`") self._descriptors[descriptor.id] = descriptor + self._by_kind.setdefault(descriptor.kind, set()).add(descriptor.id) + for capability in descriptor.capabilities: + self._by_capability.setdefault(capability.id, set()).add(descriptor.id) def get(self, extension_id: str) -> ExtensionDescriptor: try: @@ -127,16 +132,16 @@ class ExtensionRegistry: raise ExtensionRegistryError(f"Unknown extension `{extension_id}`") from exc def list(self, *, kind: str | None = None) -> list[ExtensionDescriptor]: - descriptors = [self._descriptors[key] for key in sorted(self._descriptors)] if kind is None: - return descriptors - return [descriptor for descriptor in descriptors if descriptor.kind == kind] + ids = sorted(self._descriptors) + else: + ids = sorted(self._by_kind.get(kind, set())) + return [self._descriptors[key] for key in ids] def require_capability(self, capability_id: str) -> list[ExtensionDescriptor]: return [ - descriptor - for descriptor in self.list() - if any(capability.id == capability_id for capability in descriptor.capabilities) + self._descriptors[extension_id] + for extension_id in sorted(self._by_capability.get(capability_id, set())) ] def check_dependencies( diff --git a/tests/test_extension_processing_model.py b/tests/test_extension_processing_model.py index 99781e3..067b767 100644 --- a/tests/test_extension_processing_model.py +++ b/tests/test_extension_processing_model.py @@ -33,6 +33,13 @@ def test_processing_request_serializes_context_and_cache_key(): options={"format": "json"}, capabilities=[ProcessingCapability(id="ast", description="Read parsed AST")], ).cache_key + assert request.cache_key != ProcessingRequest( + operation="query.selector", + input={"selector": "sections[heading=Decision]"}, + context=ProcessingContext(source_path=Path("other.md")), + options={"format": "json"}, + capabilities=[ProcessingCapability(id="ast", description="Read parsed AST")], + ).cache_key def test_processing_result_validity_provenance_and_trace(): @@ -58,6 +65,21 @@ def test_processing_result_validity_provenance_and_trace(): assert data["trace"][0]["metadata"]["engine"] == "selector" +def test_processing_result_with_trace_preserves_envelope_fields(): + result = ProcessingResult( + output={"count": 1}, + dependencies=["doc.md"], + metadata={"engine": "selector"}, + ) + + traced = result.with_trace(ProcessingTrace(event="query.done")) + + assert traced.output == result.output + assert traced.dependencies == ["doc.md"] + assert traced.metadata == {"engine": "selector"} + assert traced.trace[0].event == "query.done" + + def test_processing_result_from_error_normalizes_diagnostics(): result = ProcessingResult.from_error( code="extension.missing_dependency", diff --git a/tests/test_extension_registry.py b/tests/test_extension_registry.py index 15f5efa..0f815fe 100644 --- a/tests/test_extension_registry.py +++ b/tests/test_extension_registry.py @@ -54,6 +54,8 @@ def test_extension_registry_lists_by_kind_and_capability(): assert [descriptor.id for descriptor in registry.require_capability("fts")] == [ "backend.local-sqlite" ] + assert registry.require_capability("missing") == [] + assert registry.list(kind="missing") == [] def test_extension_registry_rejects_duplicate_ids():