From 33fa602fe57cf2e531eef3716105752eb3d2694e Mon Sep 17 00:00:00 2001
From: tegwick <bernd.worsch@gmail.com>
Date: Mon, 4 May 2026 11:49:39 +0200
Subject: [PATCH] Extension framework optimization

---
 docs/extension-authoring.md                | 52 +++++++++++++
 docs/internal-extension-framework.md       | 86 ++++++++++++++++++++++
 src/markitect_tool/extension/execution.py  | 15 +---
 src/markitect_tool/extension/processing.py | 32 ++++++++
 src/markitect_tool/extension/registry.py   | 17 +++--
 tests/test_extension_processing_model.py   | 22 ++++++
 tests/test_extension_registry.py           |  2 +
 7 files changed, 208 insertions(+), 18 deletions(-)

diff --git a/docs/extension-authoring.md b/docs/extension-authoring.md
index 9e36739..5281293 100644
--- a/docs/extension-authoring.md
+++ b/docs/extension-authoring.md
@@ -94,6 +94,58 @@ Subsystem-specific dataclasses may remain richer. The canonical model is the
 bridge that lets callbacks, registries, diagnostics, provenance, and future
 policy checks interact consistently.
 
+### Minimal Runnable Extension
+
+```python
+from markitect_tool.extension import (
+    ExtensionDescriptor,
+    ExtensionExecutor,
+    ExtensionRegistry,
+    ProcessingRequest,
+    ProcessingResult,
+)
+
+
+def run_example(request: ProcessingRequest) -> ProcessingResult:
+    name = request.input.get("name", "world")
+    return ProcessingResult(output=f"Hello, {name}")
+
+
+descriptor = ExtensionDescriptor(
+    id="example.hello",
+    kind="example",
+    summary="Small example extension.",
+    factory=lambda: run_example,
+)
+
+registry = ExtensionRegistry([descriptor])
+result = ExtensionExecutor(registry).execute(
+    "example.hello",
+    ProcessingRequest(operation="example.hello", input={"name": "Markitect"}),
+)
+```
+
+Use this executor boundary when callbacks, dependency checks, trace events, or
+future policy checks matter. For tiny deterministic helpers, it is still fine to
+keep the existing direct function API and expose a descriptor alongside it.
+
+### Cache-Key Rules
+
+`ProcessingRequest.cache_key` includes:
+
+- operation
+- input
+- stable context material
+- options
+- scope
+- declared capabilities
+- request metadata
+
+Stable context material includes source path, namespaces, variables, policy, and
+metadata. It does not include workspace root, caller, or live backend handles.
+This keeps cache keys portable while avoiding collisions for context-sensitive
+operations.
+
 ## Diagnostics
 
 Diagnostics should be:
diff --git a/docs/internal-extension-framework.md b/docs/internal-extension-framework.md
index 5bf51db..e77af7a 100644
--- a/docs/internal-extension-framework.md
+++ b/docs/internal-extension-framework.md
@@ -95,6 +95,63 @@ The canonical processing model should define a small set of shared envelopes:
 Subsystem-specific types may remain richer. The canonical model is the bridge,
 not a forced replacement for every local dataclass.
 
+### Processing Request Example
+
+```python
+from pathlib import Path
+
+from markitect_tool.extension import ProcessingContext, ProcessingRequest
+
+request = ProcessingRequest(
+    operation="query.selector",
+    input={"selector": "sections[heading=Decision]"},
+    context=ProcessingContext(
+        source_path=Path("docs/adr.md"),
+        namespaces={"std": "standards"},
+        variables={"audience": "internal"},
+    ),
+    options={"format": "json"},
+    scope="document",
+)
+```
+
+The request cache key includes operation, input, options, scope, declared
+capabilities, metadata, and stable context semantics:
+
+- source path
+- namespaces
+- variables
+- policy
+- metadata
+
+It intentionally excludes root path, caller name, and live backend handles.
+Those are execution-environment details. If they matter semantically for an
+extension, put explicit values in request options or metadata.
+
+### Processing Result Example
+
+```python
+from markitect_tool.extension import (
+    ProcessingProvenance,
+    ProcessingResult,
+    ProcessingTrace,
+)
+
+result = ProcessingResult(
+    output={"count": 1},
+    provenance=[
+        ProcessingProvenance(
+            operation="query.selector",
+            source_path="docs/adr.md",
+            content_hash="sha256:...",
+        )
+    ],
+).with_trace(ProcessingTrace(event="query.done"))
+```
+
+`ProcessingResult.valid` is derived from diagnostics. Any diagnostic with
+severity `error` makes the result invalid.
+
 ## Registration Strategy
 
 Start with in-package registration:
@@ -115,6 +172,35 @@ packages become a real requirement.
 See `docs/extension-authoring.md` for the extension authoring checklist and
 descriptor template.
 
+### Registry Use
+
+Extension registries are optimized for common lookup patterns:
+
+- `registry.get("backend.local-sqlite")`
+- `registry.list(kind="query-engine")`
+- `registry.require_capability("fts")`
+- `registry.check_dependencies("jsonpath")`
+
+Kinds and capabilities are indexed at registration time, so large registries can
+avoid repeated full scans for basic discovery.
+
+### Execution Lifecycle
+
+`ExtensionExecutor` wraps a descriptor factory with deterministic lifecycle
+hooks:
+
+1. Fetch descriptor.
+2. Check required optional dependencies.
+3. Instantiate callable implementation.
+4. Run `before` callbacks.
+5. Execute implementation.
+6. Normalize result type.
+7. Append `extension.executed` trace.
+8. Run success or failure callbacks.
+9. Run final `after` callbacks.
+
+Callbacks are explicit. The framework does not introduce hidden global behavior.
+
 ## Compatibility Rules
 
 The refactor must preserve:
diff --git a/src/markitect_tool/extension/execution.py b/src/markitect_tool/extension/execution.py
index 5501abc..5413a9f 100644
--- a/src/markitect_tool/extension/execution.py
+++ b/src/markitect_tool/extension/execution.py
@@ -78,21 +78,12 @@ class ExtensionExecutor:
                 f"Extension `{extension_id}` returned {type(result).__name__}, expected ProcessingResult"
             )
 
-        result = _with_trace(result, ProcessingTrace(event="extension.executed", metadata={"id": extension_id}))
+        result = result.with_trace(
+            ProcessingTrace(event="extension.executed", metadata={"id": extension_id})
+        )
         callbacks = self.lifecycle.after_success if result.valid else self.lifecycle.after_failure
         for callback in callbacks:
             callback(descriptor, request, result)
         for callback in self.lifecycle.after:
             callback(descriptor, request, result)
         return result
-
-
-def _with_trace(result: ProcessingResult, trace: ProcessingTrace) -> ProcessingResult:
-    return ProcessingResult(
-        output=result.output,
-        diagnostics=result.diagnostics,
-        provenance=result.provenance,
-        dependencies=result.dependencies,
-        trace=[*result.trace, trace],
-        metadata=result.metadata,
-    )
diff --git a/src/markitect_tool/extension/processing.py b/src/markitect_tool/extension/processing.py
index 0af747a..de50603 100644
--- a/src/markitect_tool/extension/processing.py
+++ b/src/markitect_tool/extension/processing.py
@@ -80,6 +80,25 @@ class ProcessingContext:
         }
         return _drop_empty(data)
 
+    def cache_key_material(self) -> dict[str, Any]:
+        """Return stable context fields that may affect deterministic output.
+
+        The workspace root, caller name, and backend handles are intentionally
+        excluded: they are execution environment details, not document semantics.
+        Extensions that need them in cache keys should include explicit values in
+        request options or metadata.
+        """
+
+        return _drop_empty(
+            {
+                "source_path": str(self.source_path) if self.source_path else None,
+                "namespaces": self.namespaces,
+                "variables": self.variables,
+                "policy": self.policy,
+                "metadata": self.metadata,
+            }
+        )
+
 
 @dataclass(frozen=True)
 class ProcessingRequest:
@@ -98,6 +117,7 @@ class ProcessingRequest:
         payload = {
             "operation": self.operation,
             "input": self.input,
+            "context": self.context.cache_key_material(),
             "options": self.options,
             "scope": self.scope,
             "capabilities": [capability.to_dict() for capability in self.capabilities],
@@ -148,6 +168,18 @@ class ProcessingResult:
         }
         return _drop_empty(data)
 
+    def with_trace(self, event: ProcessingTrace) -> "ProcessingResult":
+        """Return a copy with an additional trace event."""
+
+        return ProcessingResult(
+            output=self.output,
+            diagnostics=self.diagnostics,
+            provenance=self.provenance,
+            dependencies=self.dependencies,
+            trace=[*self.trace, event],
+            metadata=self.metadata,
+        )
+
     @classmethod
     def from_error(
         cls,
diff --git a/src/markitect_tool/extension/registry.py b/src/markitect_tool/extension/registry.py
index 05eac7d..ed4a849 100644
--- a/src/markitect_tool/extension/registry.py
+++ b/src/markitect_tool/extension/registry.py
@@ -112,6 +112,8 @@ class ExtensionRegistry:
 
     def __init__(self, descriptors: Iterable[ExtensionDescriptor] | None = None) -> None:
         self._descriptors: dict[str, ExtensionDescriptor] = {}
+        self._by_kind: dict[str, set[str]] = {}
+        self._by_capability: dict[str, set[str]] = {}
         for descriptor in descriptors or []:
             self.register(descriptor)
 
@@ -119,6 +121,9 @@ class ExtensionRegistry:
         if descriptor.id in self._descriptors:
             raise ExtensionRegistryError(f"Duplicate extension id `{descriptor.id}`")
         self._descriptors[descriptor.id] = descriptor
+        self._by_kind.setdefault(descriptor.kind, set()).add(descriptor.id)
+        for capability in descriptor.capabilities:
+            self._by_capability.setdefault(capability.id, set()).add(descriptor.id)
 
     def get(self, extension_id: str) -> ExtensionDescriptor:
         try:
@@ -127,16 +132,16 @@ class ExtensionRegistry:
             raise ExtensionRegistryError(f"Unknown extension `{extension_id}`") from exc
 
     def list(self, *, kind: str | None = None) -> list[ExtensionDescriptor]:
-        descriptors = [self._descriptors[key] for key in sorted(self._descriptors)]
         if kind is None:
-            return descriptors
-        return [descriptor for descriptor in descriptors if descriptor.kind == kind]
+            ids = sorted(self._descriptors)
+        else:
+            ids = sorted(self._by_kind.get(kind, set()))
+        return [self._descriptors[key] for key in ids]
 
     def require_capability(self, capability_id: str) -> list[ExtensionDescriptor]:
         return [
-            descriptor
-            for descriptor in self.list()
-            if any(capability.id == capability_id for capability in descriptor.capabilities)
+            self._descriptors[extension_id]
+            for extension_id in sorted(self._by_capability.get(capability_id, set()))
         ]
 
     def check_dependencies(
diff --git a/tests/test_extension_processing_model.py b/tests/test_extension_processing_model.py
index 99781e3..067b767 100644
--- a/tests/test_extension_processing_model.py
+++ b/tests/test_extension_processing_model.py
@@ -33,6 +33,13 @@ def test_processing_request_serializes_context_and_cache_key():
         options={"format": "json"},
         capabilities=[ProcessingCapability(id="ast", description="Read parsed AST")],
     ).cache_key
+    assert request.cache_key != ProcessingRequest(
+        operation="query.selector",
+        input={"selector": "sections[heading=Decision]"},
+        context=ProcessingContext(source_path=Path("other.md")),
+        options={"format": "json"},
+        capabilities=[ProcessingCapability(id="ast", description="Read parsed AST")],
+    ).cache_key
 
 
 def test_processing_result_validity_provenance_and_trace():
@@ -58,6 +65,21 @@ def test_processing_result_validity_provenance_and_trace():
     assert data["trace"][0]["metadata"]["engine"] == "selector"
 
 
+def test_processing_result_with_trace_preserves_envelope_fields():
+    result = ProcessingResult(
+        output={"count": 1},
+        dependencies=["doc.md"],
+        metadata={"engine": "selector"},
+    )
+
+    traced = result.with_trace(ProcessingTrace(event="query.done"))
+
+    assert traced.output == result.output
+    assert traced.dependencies == ["doc.md"]
+    assert traced.metadata == {"engine": "selector"}
+    assert traced.trace[0].event == "query.done"
+
+
 def test_processing_result_from_error_normalizes_diagnostics():
     result = ProcessingResult.from_error(
         code="extension.missing_dependency",
diff --git a/tests/test_extension_registry.py b/tests/test_extension_registry.py
index 15f5efa..0f815fe 100644
--- a/tests/test_extension_registry.py
+++ b/tests/test_extension_registry.py
@@ -54,6 +54,8 @@ def test_extension_registry_lists_by_kind_and_capability():
     assert [descriptor.id for descriptor in registry.require_capability("fts")] == [
         "backend.local-sqlite"
     ]
+    assert registry.require_capability("missing") == []
+    assert registry.list(kind="missing") == []
 
 
 def test_extension_registry_rejects_duplicate_ids():