Extension framework optimization

This commit is contained in:
2026-05-04 11:49:39 +02:00
parent 32f4af8359
commit 33fa602fe5
7 changed files with 208 additions and 18 deletions

View File

@@ -94,6 +94,58 @@ Subsystem-specific dataclasses may remain richer. The canonical model is the
bridge that lets callbacks, registries, diagnostics, provenance, and future
policy checks interact consistently.
### Minimal Runnable Extension
```python
from markitect_tool.extension import (
ExtensionDescriptor,
ExtensionExecutor,
ExtensionRegistry,
ProcessingRequest,
ProcessingResult,
)
def run_example(request: ProcessingRequest) -> ProcessingResult:
name = request.input.get("name", "world")
return ProcessingResult(output=f"Hello, {name}")
descriptor = ExtensionDescriptor(
id="example.hello",
kind="example",
summary="Small example extension.",
factory=lambda: run_example,
)
registry = ExtensionRegistry([descriptor])
result = ExtensionExecutor(registry).execute(
"example.hello",
ProcessingRequest(operation="example.hello", input={"name": "Markitect"}),
)
```
Use this executor boundary when callbacks, dependency checks, trace events, or
future policy checks matter. For tiny deterministic helpers, it is still fine to
keep the existing direct function API and expose a descriptor alongside it.
### Cache-Key Rules
`ProcessingRequest.cache_key` includes:
- operation
- input
- stable context material
- options
- scope
- declared capabilities
- request metadata
Stable context material includes source path, namespaces, variables, policy, and
metadata. It does not include workspace root, caller, or live backend handles.
This keeps cache keys portable while avoiding collisions for context-sensitive
operations.
## Diagnostics
Diagnostics should be:

View File

@@ -95,6 +95,63 @@ The canonical processing model should define a small set of shared envelopes:
Subsystem-specific types may remain richer. The canonical model is the bridge,
not a forced replacement for every local dataclass.
### Processing Request Example
```python
from pathlib import Path
from markitect_tool.extension import ProcessingContext, ProcessingRequest
request = ProcessingRequest(
operation="query.selector",
input={"selector": "sections[heading=Decision]"},
context=ProcessingContext(
source_path=Path("docs/adr.md"),
namespaces={"std": "standards"},
variables={"audience": "internal"},
),
options={"format": "json"},
scope="document",
)
```
The request cache key includes operation, input, options, scope, declared
capabilities, metadata, and stable context semantics:
- source path
- namespaces
- variables
- policy
- metadata
It intentionally excludes root path, caller name, and live backend handles.
Those are execution-environment details. If they matter semantically for an
extension, put explicit values in request options or metadata.
### Processing Result Example
```python
from markitect_tool.extension import (
ProcessingProvenance,
ProcessingResult,
ProcessingTrace,
)
result = ProcessingResult(
output={"count": 1},
provenance=[
ProcessingProvenance(
operation="query.selector",
source_path="docs/adr.md",
content_hash="sha256:...",
)
],
).with_trace(ProcessingTrace(event="query.done"))
```
`ProcessingResult.valid` is derived from diagnostics. Any diagnostic with
severity `error` makes the result invalid.
## Registration Strategy
Start with in-package registration:
@@ -115,6 +172,35 @@ packages become a real requirement.
See `docs/extension-authoring.md` for the extension authoring checklist and
descriptor template.
### Registry Use
Extension registries are optimized for common lookup patterns:
- `registry.get("backend.local-sqlite")`
- `registry.list(kind="query-engine")`
- `registry.require_capability("fts")`
- `registry.check_dependencies("jsonpath")`
Kinds and capabilities are indexed at registration time, so large registries can
avoid repeated full scans for basic discovery.
### Execution Lifecycle
`ExtensionExecutor` wraps a descriptor factory with deterministic lifecycle
hooks:
1. Fetch descriptor.
2. Check required optional dependencies.
3. Instantiate callable implementation.
4. Run `before` callbacks.
5. Execute implementation.
6. Normalize result type.
7. Append `extension.executed` trace.
8. Run success or failure callbacks.
9. Run final `after` callbacks.
Callbacks are explicit. The framework does not introduce hidden global behavior.
## Compatibility Rules
The refactor must preserve:

View File

@@ -78,21 +78,12 @@ class ExtensionExecutor:
f"Extension `{extension_id}` returned {type(result).__name__}, expected ProcessingResult"
)
result = _with_trace(result, ProcessingTrace(event="extension.executed", metadata={"id": extension_id}))
result = result.with_trace(
ProcessingTrace(event="extension.executed", metadata={"id": extension_id})
)
callbacks = self.lifecycle.after_success if result.valid else self.lifecycle.after_failure
for callback in callbacks:
callback(descriptor, request, result)
for callback in self.lifecycle.after:
callback(descriptor, request, result)
return result
def _with_trace(result: ProcessingResult, trace: ProcessingTrace) -> ProcessingResult:
return ProcessingResult(
output=result.output,
diagnostics=result.diagnostics,
provenance=result.provenance,
dependencies=result.dependencies,
trace=[*result.trace, trace],
metadata=result.metadata,
)

View File

@@ -80,6 +80,25 @@ class ProcessingContext:
}
return _drop_empty(data)
def cache_key_material(self) -> dict[str, Any]:
"""Return stable context fields that may affect deterministic output.
The workspace root, caller name, and backend handles are intentionally
excluded: they are execution environment details, not document semantics.
Extensions that need them in cache keys should include explicit values in
request options or metadata.
"""
return _drop_empty(
{
"source_path": str(self.source_path) if self.source_path else None,
"namespaces": self.namespaces,
"variables": self.variables,
"policy": self.policy,
"metadata": self.metadata,
}
)
@dataclass(frozen=True)
class ProcessingRequest:
@@ -98,6 +117,7 @@ class ProcessingRequest:
payload = {
"operation": self.operation,
"input": self.input,
"context": self.context.cache_key_material(),
"options": self.options,
"scope": self.scope,
"capabilities": [capability.to_dict() for capability in self.capabilities],
@@ -148,6 +168,18 @@ class ProcessingResult:
}
return _drop_empty(data)
def with_trace(self, event: ProcessingTrace) -> "ProcessingResult":
"""Return a copy with an additional trace event."""
return ProcessingResult(
output=self.output,
diagnostics=self.diagnostics,
provenance=self.provenance,
dependencies=self.dependencies,
trace=[*self.trace, event],
metadata=self.metadata,
)
@classmethod
def from_error(
cls,

View File

@@ -112,6 +112,8 @@ class ExtensionRegistry:
def __init__(self, descriptors: Iterable[ExtensionDescriptor] | None = None) -> None:
self._descriptors: dict[str, ExtensionDescriptor] = {}
self._by_kind: dict[str, set[str]] = {}
self._by_capability: dict[str, set[str]] = {}
for descriptor in descriptors or []:
self.register(descriptor)
@@ -119,6 +121,9 @@ class ExtensionRegistry:
if descriptor.id in self._descriptors:
raise ExtensionRegistryError(f"Duplicate extension id `{descriptor.id}`")
self._descriptors[descriptor.id] = descriptor
self._by_kind.setdefault(descriptor.kind, set()).add(descriptor.id)
for capability in descriptor.capabilities:
self._by_capability.setdefault(capability.id, set()).add(descriptor.id)
def get(self, extension_id: str) -> ExtensionDescriptor:
try:
@@ -127,16 +132,16 @@ class ExtensionRegistry:
raise ExtensionRegistryError(f"Unknown extension `{extension_id}`") from exc
def list(self, *, kind: str | None = None) -> list[ExtensionDescriptor]:
descriptors = [self._descriptors[key] for key in sorted(self._descriptors)]
if kind is None:
return descriptors
return [descriptor for descriptor in descriptors if descriptor.kind == kind]
ids = sorted(self._descriptors)
else:
ids = sorted(self._by_kind.get(kind, set()))
return [self._descriptors[key] for key in ids]
def require_capability(self, capability_id: str) -> list[ExtensionDescriptor]:
return [
descriptor
for descriptor in self.list()
if any(capability.id == capability_id for capability in descriptor.capabilities)
self._descriptors[extension_id]
for extension_id in sorted(self._by_capability.get(capability_id, set()))
]
def check_dependencies(

View File

@@ -33,6 +33,13 @@ def test_processing_request_serializes_context_and_cache_key():
options={"format": "json"},
capabilities=[ProcessingCapability(id="ast", description="Read parsed AST")],
).cache_key
assert request.cache_key != ProcessingRequest(
operation="query.selector",
input={"selector": "sections[heading=Decision]"},
context=ProcessingContext(source_path=Path("other.md")),
options={"format": "json"},
capabilities=[ProcessingCapability(id="ast", description="Read parsed AST")],
).cache_key
def test_processing_result_validity_provenance_and_trace():
@@ -58,6 +65,21 @@ def test_processing_result_validity_provenance_and_trace():
assert data["trace"][0]["metadata"]["engine"] == "selector"
def test_processing_result_with_trace_preserves_envelope_fields():
result = ProcessingResult(
output={"count": 1},
dependencies=["doc.md"],
metadata={"engine": "selector"},
)
traced = result.with_trace(ProcessingTrace(event="query.done"))
assert traced.output == result.output
assert traced.dependencies == ["doc.md"]
assert traced.metadata == {"engine": "selector"}
assert traced.trace[0].event == "query.done"
def test_processing_result_from_error_normalizes_diagnostics():
result = ProcessingResult.from_error(
code="extension.missing_dependency",

View File

@@ -54,6 +54,8 @@ def test_extension_registry_lists_by_kind_and_capability():
assert [descriptor.id for descriptor in registry.require_capability("fts")] == [
"backend.local-sqlite"
]
assert registry.require_capability("missing") == []
assert registry.list(kind="missing") == []
def test_extension_registry_rejects_duplicate_ids():