generated from coulomb/repo-seed
Extension framework optimization
This commit is contained in:
@@ -94,6 +94,58 @@ Subsystem-specific dataclasses may remain richer. The canonical model is the
|
|||||||
bridge that lets callbacks, registries, diagnostics, provenance, and future
|
bridge that lets callbacks, registries, diagnostics, provenance, and future
|
||||||
policy checks interact consistently.
|
policy checks interact consistently.
|
||||||
|
|
||||||
|
### Minimal Runnable Extension
|
||||||
|
|
||||||
|
```python
|
||||||
|
from markitect_tool.extension import (
|
||||||
|
ExtensionDescriptor,
|
||||||
|
ExtensionExecutor,
|
||||||
|
ExtensionRegistry,
|
||||||
|
ProcessingRequest,
|
||||||
|
ProcessingResult,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def run_example(request: ProcessingRequest) -> ProcessingResult:
|
||||||
|
name = request.input.get("name", "world")
|
||||||
|
return ProcessingResult(output=f"Hello, {name}")
|
||||||
|
|
||||||
|
|
||||||
|
descriptor = ExtensionDescriptor(
|
||||||
|
id="example.hello",
|
||||||
|
kind="example",
|
||||||
|
summary="Small example extension.",
|
||||||
|
factory=lambda: run_example,
|
||||||
|
)
|
||||||
|
|
||||||
|
registry = ExtensionRegistry([descriptor])
|
||||||
|
result = ExtensionExecutor(registry).execute(
|
||||||
|
"example.hello",
|
||||||
|
ProcessingRequest(operation="example.hello", input={"name": "Markitect"}),
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Use this executor boundary when callbacks, dependency checks, trace events, or
|
||||||
|
future policy checks matter. For tiny deterministic helpers, it is still fine to
|
||||||
|
keep the existing direct function API and expose a descriptor alongside it.
|
||||||
|
|
||||||
|
### Cache-Key Rules
|
||||||
|
|
||||||
|
`ProcessingRequest.cache_key` includes:
|
||||||
|
|
||||||
|
- operation
|
||||||
|
- input
|
||||||
|
- stable context material
|
||||||
|
- options
|
||||||
|
- scope
|
||||||
|
- declared capabilities
|
||||||
|
- request metadata
|
||||||
|
|
||||||
|
Stable context material includes source path, namespaces, variables, policy, and
|
||||||
|
metadata. It does not include workspace root, caller, or live backend handles.
|
||||||
|
This keeps cache keys portable while avoiding collisions for context-sensitive
|
||||||
|
operations.
|
||||||
|
|
||||||
## Diagnostics
|
## Diagnostics
|
||||||
|
|
||||||
Diagnostics should be:
|
Diagnostics should be:
|
||||||
|
|||||||
@@ -95,6 +95,63 @@ The canonical processing model should define a small set of shared envelopes:
|
|||||||
Subsystem-specific types may remain richer. The canonical model is the bridge,
|
Subsystem-specific types may remain richer. The canonical model is the bridge,
|
||||||
not a forced replacement for every local dataclass.
|
not a forced replacement for every local dataclass.
|
||||||
|
|
||||||
|
### Processing Request Example
|
||||||
|
|
||||||
|
```python
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from markitect_tool.extension import ProcessingContext, ProcessingRequest
|
||||||
|
|
||||||
|
request = ProcessingRequest(
|
||||||
|
operation="query.selector",
|
||||||
|
input={"selector": "sections[heading=Decision]"},
|
||||||
|
context=ProcessingContext(
|
||||||
|
source_path=Path("docs/adr.md"),
|
||||||
|
namespaces={"std": "standards"},
|
||||||
|
variables={"audience": "internal"},
|
||||||
|
),
|
||||||
|
options={"format": "json"},
|
||||||
|
scope="document",
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
The request cache key includes operation, input, options, scope, declared
|
||||||
|
capabilities, metadata, and stable context semantics:
|
||||||
|
|
||||||
|
- source path
|
||||||
|
- namespaces
|
||||||
|
- variables
|
||||||
|
- policy
|
||||||
|
- metadata
|
||||||
|
|
||||||
|
It intentionally excludes root path, caller name, and live backend handles.
|
||||||
|
Those are execution-environment details. If they matter semantically for an
|
||||||
|
extension, put explicit values in request options or metadata.
|
||||||
|
|
||||||
|
### Processing Result Example
|
||||||
|
|
||||||
|
```python
|
||||||
|
from markitect_tool.extension import (
|
||||||
|
ProcessingProvenance,
|
||||||
|
ProcessingResult,
|
||||||
|
ProcessingTrace,
|
||||||
|
)
|
||||||
|
|
||||||
|
result = ProcessingResult(
|
||||||
|
output={"count": 1},
|
||||||
|
provenance=[
|
||||||
|
ProcessingProvenance(
|
||||||
|
operation="query.selector",
|
||||||
|
source_path="docs/adr.md",
|
||||||
|
content_hash="sha256:...",
|
||||||
|
)
|
||||||
|
],
|
||||||
|
).with_trace(ProcessingTrace(event="query.done"))
|
||||||
|
```
|
||||||
|
|
||||||
|
`ProcessingResult.valid` is derived from diagnostics. Any diagnostic with
|
||||||
|
severity `error` makes the result invalid.
|
||||||
|
|
||||||
## Registration Strategy
|
## Registration Strategy
|
||||||
|
|
||||||
Start with in-package registration:
|
Start with in-package registration:
|
||||||
@@ -115,6 +172,35 @@ packages become a real requirement.
|
|||||||
See `docs/extension-authoring.md` for the extension authoring checklist and
|
See `docs/extension-authoring.md` for the extension authoring checklist and
|
||||||
descriptor template.
|
descriptor template.
|
||||||
|
|
||||||
|
### Registry Use
|
||||||
|
|
||||||
|
Extension registries are optimized for common lookup patterns:
|
||||||
|
|
||||||
|
- `registry.get("backend.local-sqlite")`
|
||||||
|
- `registry.list(kind="query-engine")`
|
||||||
|
- `registry.require_capability("fts")`
|
||||||
|
- `registry.check_dependencies("jsonpath")`
|
||||||
|
|
||||||
|
Kinds and capabilities are indexed at registration time, so large registries can
|
||||||
|
avoid repeated full scans for basic discovery.
|
||||||
|
|
||||||
|
### Execution Lifecycle
|
||||||
|
|
||||||
|
`ExtensionExecutor` wraps a descriptor factory with deterministic lifecycle
|
||||||
|
hooks:
|
||||||
|
|
||||||
|
1. Fetch descriptor.
|
||||||
|
2. Check required optional dependencies.
|
||||||
|
3. Instantiate callable implementation.
|
||||||
|
4. Run `before` callbacks.
|
||||||
|
5. Execute implementation.
|
||||||
|
6. Normalize result type.
|
||||||
|
7. Append `extension.executed` trace.
|
||||||
|
8. Run success or failure callbacks.
|
||||||
|
9. Run final `after` callbacks.
|
||||||
|
|
||||||
|
Callbacks are explicit. The framework does not introduce hidden global behavior.
|
||||||
|
|
||||||
## Compatibility Rules
|
## Compatibility Rules
|
||||||
|
|
||||||
The refactor must preserve:
|
The refactor must preserve:
|
||||||
|
|||||||
@@ -78,21 +78,12 @@ class ExtensionExecutor:
|
|||||||
f"Extension `{extension_id}` returned {type(result).__name__}, expected ProcessingResult"
|
f"Extension `{extension_id}` returned {type(result).__name__}, expected ProcessingResult"
|
||||||
)
|
)
|
||||||
|
|
||||||
result = _with_trace(result, ProcessingTrace(event="extension.executed", metadata={"id": extension_id}))
|
result = result.with_trace(
|
||||||
|
ProcessingTrace(event="extension.executed", metadata={"id": extension_id})
|
||||||
|
)
|
||||||
callbacks = self.lifecycle.after_success if result.valid else self.lifecycle.after_failure
|
callbacks = self.lifecycle.after_success if result.valid else self.lifecycle.after_failure
|
||||||
for callback in callbacks:
|
for callback in callbacks:
|
||||||
callback(descriptor, request, result)
|
callback(descriptor, request, result)
|
||||||
for callback in self.lifecycle.after:
|
for callback in self.lifecycle.after:
|
||||||
callback(descriptor, request, result)
|
callback(descriptor, request, result)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def _with_trace(result: ProcessingResult, trace: ProcessingTrace) -> ProcessingResult:
|
|
||||||
return ProcessingResult(
|
|
||||||
output=result.output,
|
|
||||||
diagnostics=result.diagnostics,
|
|
||||||
provenance=result.provenance,
|
|
||||||
dependencies=result.dependencies,
|
|
||||||
trace=[*result.trace, trace],
|
|
||||||
metadata=result.metadata,
|
|
||||||
)
|
|
||||||
|
|||||||
@@ -80,6 +80,25 @@ class ProcessingContext:
|
|||||||
}
|
}
|
||||||
return _drop_empty(data)
|
return _drop_empty(data)
|
||||||
|
|
||||||
|
def cache_key_material(self) -> dict[str, Any]:
|
||||||
|
"""Return stable context fields that may affect deterministic output.
|
||||||
|
|
||||||
|
The workspace root, caller name, and backend handles are intentionally
|
||||||
|
excluded: they are execution environment details, not document semantics.
|
||||||
|
Extensions that need them in cache keys should include explicit values in
|
||||||
|
request options or metadata.
|
||||||
|
"""
|
||||||
|
|
||||||
|
return _drop_empty(
|
||||||
|
{
|
||||||
|
"source_path": str(self.source_path) if self.source_path else None,
|
||||||
|
"namespaces": self.namespaces,
|
||||||
|
"variables": self.variables,
|
||||||
|
"policy": self.policy,
|
||||||
|
"metadata": self.metadata,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class ProcessingRequest:
|
class ProcessingRequest:
|
||||||
@@ -98,6 +117,7 @@ class ProcessingRequest:
|
|||||||
payload = {
|
payload = {
|
||||||
"operation": self.operation,
|
"operation": self.operation,
|
||||||
"input": self.input,
|
"input": self.input,
|
||||||
|
"context": self.context.cache_key_material(),
|
||||||
"options": self.options,
|
"options": self.options,
|
||||||
"scope": self.scope,
|
"scope": self.scope,
|
||||||
"capabilities": [capability.to_dict() for capability in self.capabilities],
|
"capabilities": [capability.to_dict() for capability in self.capabilities],
|
||||||
@@ -148,6 +168,18 @@ class ProcessingResult:
|
|||||||
}
|
}
|
||||||
return _drop_empty(data)
|
return _drop_empty(data)
|
||||||
|
|
||||||
|
def with_trace(self, event: ProcessingTrace) -> "ProcessingResult":
|
||||||
|
"""Return a copy with an additional trace event."""
|
||||||
|
|
||||||
|
return ProcessingResult(
|
||||||
|
output=self.output,
|
||||||
|
diagnostics=self.diagnostics,
|
||||||
|
provenance=self.provenance,
|
||||||
|
dependencies=self.dependencies,
|
||||||
|
trace=[*self.trace, event],
|
||||||
|
metadata=self.metadata,
|
||||||
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_error(
|
def from_error(
|
||||||
cls,
|
cls,
|
||||||
|
|||||||
@@ -112,6 +112,8 @@ class ExtensionRegistry:
|
|||||||
|
|
||||||
def __init__(self, descriptors: Iterable[ExtensionDescriptor] | None = None) -> None:
|
def __init__(self, descriptors: Iterable[ExtensionDescriptor] | None = None) -> None:
|
||||||
self._descriptors: dict[str, ExtensionDescriptor] = {}
|
self._descriptors: dict[str, ExtensionDescriptor] = {}
|
||||||
|
self._by_kind: dict[str, set[str]] = {}
|
||||||
|
self._by_capability: dict[str, set[str]] = {}
|
||||||
for descriptor in descriptors or []:
|
for descriptor in descriptors or []:
|
||||||
self.register(descriptor)
|
self.register(descriptor)
|
||||||
|
|
||||||
@@ -119,6 +121,9 @@ class ExtensionRegistry:
|
|||||||
if descriptor.id in self._descriptors:
|
if descriptor.id in self._descriptors:
|
||||||
raise ExtensionRegistryError(f"Duplicate extension id `{descriptor.id}`")
|
raise ExtensionRegistryError(f"Duplicate extension id `{descriptor.id}`")
|
||||||
self._descriptors[descriptor.id] = descriptor
|
self._descriptors[descriptor.id] = descriptor
|
||||||
|
self._by_kind.setdefault(descriptor.kind, set()).add(descriptor.id)
|
||||||
|
for capability in descriptor.capabilities:
|
||||||
|
self._by_capability.setdefault(capability.id, set()).add(descriptor.id)
|
||||||
|
|
||||||
def get(self, extension_id: str) -> ExtensionDescriptor:
|
def get(self, extension_id: str) -> ExtensionDescriptor:
|
||||||
try:
|
try:
|
||||||
@@ -127,16 +132,16 @@ class ExtensionRegistry:
|
|||||||
raise ExtensionRegistryError(f"Unknown extension `{extension_id}`") from exc
|
raise ExtensionRegistryError(f"Unknown extension `{extension_id}`") from exc
|
||||||
|
|
||||||
def list(self, *, kind: str | None = None) -> list[ExtensionDescriptor]:
|
def list(self, *, kind: str | None = None) -> list[ExtensionDescriptor]:
|
||||||
descriptors = [self._descriptors[key] for key in sorted(self._descriptors)]
|
|
||||||
if kind is None:
|
if kind is None:
|
||||||
return descriptors
|
ids = sorted(self._descriptors)
|
||||||
return [descriptor for descriptor in descriptors if descriptor.kind == kind]
|
else:
|
||||||
|
ids = sorted(self._by_kind.get(kind, set()))
|
||||||
|
return [self._descriptors[key] for key in ids]
|
||||||
|
|
||||||
def require_capability(self, capability_id: str) -> list[ExtensionDescriptor]:
|
def require_capability(self, capability_id: str) -> list[ExtensionDescriptor]:
|
||||||
return [
|
return [
|
||||||
descriptor
|
self._descriptors[extension_id]
|
||||||
for descriptor in self.list()
|
for extension_id in sorted(self._by_capability.get(capability_id, set()))
|
||||||
if any(capability.id == capability_id for capability in descriptor.capabilities)
|
|
||||||
]
|
]
|
||||||
|
|
||||||
def check_dependencies(
|
def check_dependencies(
|
||||||
|
|||||||
@@ -33,6 +33,13 @@ def test_processing_request_serializes_context_and_cache_key():
|
|||||||
options={"format": "json"},
|
options={"format": "json"},
|
||||||
capabilities=[ProcessingCapability(id="ast", description="Read parsed AST")],
|
capabilities=[ProcessingCapability(id="ast", description="Read parsed AST")],
|
||||||
).cache_key
|
).cache_key
|
||||||
|
assert request.cache_key != ProcessingRequest(
|
||||||
|
operation="query.selector",
|
||||||
|
input={"selector": "sections[heading=Decision]"},
|
||||||
|
context=ProcessingContext(source_path=Path("other.md")),
|
||||||
|
options={"format": "json"},
|
||||||
|
capabilities=[ProcessingCapability(id="ast", description="Read parsed AST")],
|
||||||
|
).cache_key
|
||||||
|
|
||||||
|
|
||||||
def test_processing_result_validity_provenance_and_trace():
|
def test_processing_result_validity_provenance_and_trace():
|
||||||
@@ -58,6 +65,21 @@ def test_processing_result_validity_provenance_and_trace():
|
|||||||
assert data["trace"][0]["metadata"]["engine"] == "selector"
|
assert data["trace"][0]["metadata"]["engine"] == "selector"
|
||||||
|
|
||||||
|
|
||||||
|
def test_processing_result_with_trace_preserves_envelope_fields():
|
||||||
|
result = ProcessingResult(
|
||||||
|
output={"count": 1},
|
||||||
|
dependencies=["doc.md"],
|
||||||
|
metadata={"engine": "selector"},
|
||||||
|
)
|
||||||
|
|
||||||
|
traced = result.with_trace(ProcessingTrace(event="query.done"))
|
||||||
|
|
||||||
|
assert traced.output == result.output
|
||||||
|
assert traced.dependencies == ["doc.md"]
|
||||||
|
assert traced.metadata == {"engine": "selector"}
|
||||||
|
assert traced.trace[0].event == "query.done"
|
||||||
|
|
||||||
|
|
||||||
def test_processing_result_from_error_normalizes_diagnostics():
|
def test_processing_result_from_error_normalizes_diagnostics():
|
||||||
result = ProcessingResult.from_error(
|
result = ProcessingResult.from_error(
|
||||||
code="extension.missing_dependency",
|
code="extension.missing_dependency",
|
||||||
|
|||||||
@@ -54,6 +54,8 @@ def test_extension_registry_lists_by_kind_and_capability():
|
|||||||
assert [descriptor.id for descriptor in registry.require_capability("fts")] == [
|
assert [descriptor.id for descriptor in registry.require_capability("fts")] == [
|
||||||
"backend.local-sqlite"
|
"backend.local-sqlite"
|
||||||
]
|
]
|
||||||
|
assert registry.require_capability("missing") == []
|
||||||
|
assert registry.list(kind="missing") == []
|
||||||
|
|
||||||
|
|
||||||
def test_extension_registry_rejects_duplicate_ids():
|
def test_extension_registry_rejects_duplicate_ids():
|
||||||
|
|||||||
Reference in New Issue
Block a user