generated from coulomb/repo-seed
Extension framework optimization
This commit is contained in:
@@ -94,6 +94,58 @@ Subsystem-specific dataclasses may remain richer. The canonical model is the
|
||||
bridge that lets callbacks, registries, diagnostics, provenance, and future
|
||||
policy checks interact consistently.
|
||||
|
||||
### Minimal Runnable Extension
|
||||
|
||||
```python
|
||||
from markitect_tool.extension import (
|
||||
ExtensionDescriptor,
|
||||
ExtensionExecutor,
|
||||
ExtensionRegistry,
|
||||
ProcessingRequest,
|
||||
ProcessingResult,
|
||||
)
|
||||
|
||||
|
||||
def run_example(request: ProcessingRequest) -> ProcessingResult:
|
||||
name = request.input.get("name", "world")
|
||||
return ProcessingResult(output=f"Hello, {name}")
|
||||
|
||||
|
||||
descriptor = ExtensionDescriptor(
|
||||
id="example.hello",
|
||||
kind="example",
|
||||
summary="Small example extension.",
|
||||
factory=lambda: run_example,
|
||||
)
|
||||
|
||||
registry = ExtensionRegistry([descriptor])
|
||||
result = ExtensionExecutor(registry).execute(
|
||||
"example.hello",
|
||||
ProcessingRequest(operation="example.hello", input={"name": "Markitect"}),
|
||||
)
|
||||
```
|
||||
|
||||
Use this executor boundary when callbacks, dependency checks, trace events, or
|
||||
future policy checks matter. For tiny deterministic helpers, it is still fine to
|
||||
keep the existing direct function API and expose a descriptor alongside it.
|
||||
|
||||
### Cache-Key Rules
|
||||
|
||||
`ProcessingRequest.cache_key` includes:
|
||||
|
||||
- operation
|
||||
- input
|
||||
- stable context material
|
||||
- options
|
||||
- scope
|
||||
- declared capabilities
|
||||
- request metadata
|
||||
|
||||
Stable context material includes source path, namespaces, variables, policy, and
|
||||
metadata. It does not include workspace root, caller, or live backend handles.
|
||||
This keeps cache keys portable while avoiding collisions for context-sensitive
|
||||
operations.
|
||||
|
||||
## Diagnostics
|
||||
|
||||
Diagnostics should be:
|
||||
|
||||
@@ -95,6 +95,63 @@ The canonical processing model should define a small set of shared envelopes:
|
||||
Subsystem-specific types may remain richer. The canonical model is the bridge,
|
||||
not a forced replacement for every local dataclass.
|
||||
|
||||
### Processing Request Example
|
||||
|
||||
```python
|
||||
from pathlib import Path
|
||||
|
||||
from markitect_tool.extension import ProcessingContext, ProcessingRequest
|
||||
|
||||
request = ProcessingRequest(
|
||||
operation="query.selector",
|
||||
input={"selector": "sections[heading=Decision]"},
|
||||
context=ProcessingContext(
|
||||
source_path=Path("docs/adr.md"),
|
||||
namespaces={"std": "standards"},
|
||||
variables={"audience": "internal"},
|
||||
),
|
||||
options={"format": "json"},
|
||||
scope="document",
|
||||
)
|
||||
```
|
||||
|
||||
The request cache key includes operation, input, options, scope, declared
|
||||
capabilities, metadata, and stable context semantics:
|
||||
|
||||
- source path
|
||||
- namespaces
|
||||
- variables
|
||||
- policy
|
||||
- metadata
|
||||
|
||||
It intentionally excludes root path, caller name, and live backend handles.
|
||||
Those are execution-environment details. If they matter semantically for an
|
||||
extension, put explicit values in request options or metadata.
|
||||
|
||||
### Processing Result Example
|
||||
|
||||
```python
|
||||
from markitect_tool.extension import (
|
||||
ProcessingProvenance,
|
||||
ProcessingResult,
|
||||
ProcessingTrace,
|
||||
)
|
||||
|
||||
result = ProcessingResult(
|
||||
output={"count": 1},
|
||||
provenance=[
|
||||
ProcessingProvenance(
|
||||
operation="query.selector",
|
||||
source_path="docs/adr.md",
|
||||
content_hash="sha256:...",
|
||||
)
|
||||
],
|
||||
).with_trace(ProcessingTrace(event="query.done"))
|
||||
```
|
||||
|
||||
`ProcessingResult.valid` is derived from diagnostics. Any diagnostic with
|
||||
severity `error` makes the result invalid.
|
||||
|
||||
## Registration Strategy
|
||||
|
||||
Start with in-package registration:
|
||||
@@ -115,6 +172,35 @@ packages become a real requirement.
|
||||
See `docs/extension-authoring.md` for the extension authoring checklist and
|
||||
descriptor template.
|
||||
|
||||
### Registry Use
|
||||
|
||||
Extension registries are optimized for common lookup patterns:
|
||||
|
||||
- `registry.get("backend.local-sqlite")`
|
||||
- `registry.list(kind="query-engine")`
|
||||
- `registry.require_capability("fts")`
|
||||
- `registry.check_dependencies("jsonpath")`
|
||||
|
||||
Kinds and capabilities are indexed at registration time, so large registries can
|
||||
avoid repeated full scans for basic discovery.
|
||||
|
||||
### Execution Lifecycle
|
||||
|
||||
`ExtensionExecutor` wraps a descriptor factory with deterministic lifecycle
|
||||
hooks:
|
||||
|
||||
1. Fetch descriptor.
|
||||
2. Check required optional dependencies.
|
||||
3. Instantiate callable implementation.
|
||||
4. Run `before` callbacks.
|
||||
5. Execute implementation.
|
||||
6. Normalize result type.
|
||||
7. Append `extension.executed` trace.
|
||||
8. Run success or failure callbacks.
|
||||
9. Run final `after` callbacks.
|
||||
|
||||
Callbacks are explicit. The framework does not introduce hidden global behavior.
|
||||
|
||||
## Compatibility Rules
|
||||
|
||||
The refactor must preserve:
|
||||
|
||||
@@ -78,21 +78,12 @@ class ExtensionExecutor:
|
||||
f"Extension `{extension_id}` returned {type(result).__name__}, expected ProcessingResult"
|
||||
)
|
||||
|
||||
result = _with_trace(result, ProcessingTrace(event="extension.executed", metadata={"id": extension_id}))
|
||||
result = result.with_trace(
|
||||
ProcessingTrace(event="extension.executed", metadata={"id": extension_id})
|
||||
)
|
||||
callbacks = self.lifecycle.after_success if result.valid else self.lifecycle.after_failure
|
||||
for callback in callbacks:
|
||||
callback(descriptor, request, result)
|
||||
for callback in self.lifecycle.after:
|
||||
callback(descriptor, request, result)
|
||||
return result
|
||||
|
||||
|
||||
def _with_trace(result: ProcessingResult, trace: ProcessingTrace) -> ProcessingResult:
|
||||
return ProcessingResult(
|
||||
output=result.output,
|
||||
diagnostics=result.diagnostics,
|
||||
provenance=result.provenance,
|
||||
dependencies=result.dependencies,
|
||||
trace=[*result.trace, trace],
|
||||
metadata=result.metadata,
|
||||
)
|
||||
|
||||
@@ -80,6 +80,25 @@ class ProcessingContext:
|
||||
}
|
||||
return _drop_empty(data)
|
||||
|
||||
def cache_key_material(self) -> dict[str, Any]:
|
||||
"""Return stable context fields that may affect deterministic output.
|
||||
|
||||
The workspace root, caller name, and backend handles are intentionally
|
||||
excluded: they are execution environment details, not document semantics.
|
||||
Extensions that need them in cache keys should include explicit values in
|
||||
request options or metadata.
|
||||
"""
|
||||
|
||||
return _drop_empty(
|
||||
{
|
||||
"source_path": str(self.source_path) if self.source_path else None,
|
||||
"namespaces": self.namespaces,
|
||||
"variables": self.variables,
|
||||
"policy": self.policy,
|
||||
"metadata": self.metadata,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProcessingRequest:
|
||||
@@ -98,6 +117,7 @@ class ProcessingRequest:
|
||||
payload = {
|
||||
"operation": self.operation,
|
||||
"input": self.input,
|
||||
"context": self.context.cache_key_material(),
|
||||
"options": self.options,
|
||||
"scope": self.scope,
|
||||
"capabilities": [capability.to_dict() for capability in self.capabilities],
|
||||
@@ -148,6 +168,18 @@ class ProcessingResult:
|
||||
}
|
||||
return _drop_empty(data)
|
||||
|
||||
def with_trace(self, event: ProcessingTrace) -> "ProcessingResult":
|
||||
"""Return a copy with an additional trace event."""
|
||||
|
||||
return ProcessingResult(
|
||||
output=self.output,
|
||||
diagnostics=self.diagnostics,
|
||||
provenance=self.provenance,
|
||||
dependencies=self.dependencies,
|
||||
trace=[*self.trace, event],
|
||||
metadata=self.metadata,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_error(
|
||||
cls,
|
||||
|
||||
@@ -112,6 +112,8 @@ class ExtensionRegistry:
|
||||
|
||||
def __init__(self, descriptors: Iterable[ExtensionDescriptor] | None = None) -> None:
|
||||
self._descriptors: dict[str, ExtensionDescriptor] = {}
|
||||
self._by_kind: dict[str, set[str]] = {}
|
||||
self._by_capability: dict[str, set[str]] = {}
|
||||
for descriptor in descriptors or []:
|
||||
self.register(descriptor)
|
||||
|
||||
@@ -119,6 +121,9 @@ class ExtensionRegistry:
|
||||
if descriptor.id in self._descriptors:
|
||||
raise ExtensionRegistryError(f"Duplicate extension id `{descriptor.id}`")
|
||||
self._descriptors[descriptor.id] = descriptor
|
||||
self._by_kind.setdefault(descriptor.kind, set()).add(descriptor.id)
|
||||
for capability in descriptor.capabilities:
|
||||
self._by_capability.setdefault(capability.id, set()).add(descriptor.id)
|
||||
|
||||
def get(self, extension_id: str) -> ExtensionDescriptor:
|
||||
try:
|
||||
@@ -127,16 +132,16 @@ class ExtensionRegistry:
|
||||
raise ExtensionRegistryError(f"Unknown extension `{extension_id}`") from exc
|
||||
|
||||
def list(self, *, kind: str | None = None) -> list[ExtensionDescriptor]:
|
||||
descriptors = [self._descriptors[key] for key in sorted(self._descriptors)]
|
||||
if kind is None:
|
||||
return descriptors
|
||||
return [descriptor for descriptor in descriptors if descriptor.kind == kind]
|
||||
ids = sorted(self._descriptors)
|
||||
else:
|
||||
ids = sorted(self._by_kind.get(kind, set()))
|
||||
return [self._descriptors[key] for key in ids]
|
||||
|
||||
def require_capability(self, capability_id: str) -> list[ExtensionDescriptor]:
|
||||
return [
|
||||
descriptor
|
||||
for descriptor in self.list()
|
||||
if any(capability.id == capability_id for capability in descriptor.capabilities)
|
||||
self._descriptors[extension_id]
|
||||
for extension_id in sorted(self._by_capability.get(capability_id, set()))
|
||||
]
|
||||
|
||||
def check_dependencies(
|
||||
|
||||
@@ -33,6 +33,13 @@ def test_processing_request_serializes_context_and_cache_key():
|
||||
options={"format": "json"},
|
||||
capabilities=[ProcessingCapability(id="ast", description="Read parsed AST")],
|
||||
).cache_key
|
||||
assert request.cache_key != ProcessingRequest(
|
||||
operation="query.selector",
|
||||
input={"selector": "sections[heading=Decision]"},
|
||||
context=ProcessingContext(source_path=Path("other.md")),
|
||||
options={"format": "json"},
|
||||
capabilities=[ProcessingCapability(id="ast", description="Read parsed AST")],
|
||||
).cache_key
|
||||
|
||||
|
||||
def test_processing_result_validity_provenance_and_trace():
|
||||
@@ -58,6 +65,21 @@ def test_processing_result_validity_provenance_and_trace():
|
||||
assert data["trace"][0]["metadata"]["engine"] == "selector"
|
||||
|
||||
|
||||
def test_processing_result_with_trace_preserves_envelope_fields():
|
||||
result = ProcessingResult(
|
||||
output={"count": 1},
|
||||
dependencies=["doc.md"],
|
||||
metadata={"engine": "selector"},
|
||||
)
|
||||
|
||||
traced = result.with_trace(ProcessingTrace(event="query.done"))
|
||||
|
||||
assert traced.output == result.output
|
||||
assert traced.dependencies == ["doc.md"]
|
||||
assert traced.metadata == {"engine": "selector"}
|
||||
assert traced.trace[0].event == "query.done"
|
||||
|
||||
|
||||
def test_processing_result_from_error_normalizes_diagnostics():
|
||||
result = ProcessingResult.from_error(
|
||||
code="extension.missing_dependency",
|
||||
|
||||
@@ -54,6 +54,8 @@ def test_extension_registry_lists_by_kind_and_capability():
|
||||
assert [descriptor.id for descriptor in registry.require_capability("fts")] == [
|
||||
"backend.local-sqlite"
|
||||
]
|
||||
assert registry.require_capability("missing") == []
|
||||
assert registry.list(kind="missing") == []
|
||||
|
||||
|
||||
def test_extension_registry_rejects_duplicate_ids():
|
||||
|
||||
Reference in New Issue
Block a user