Complete memory graph and document value workplans

This commit is contained in:
2026-05-15 13:30:50 +02:00
parent f49ebb563b
commit 6cc44da628
25 changed files with 1546 additions and 168 deletions

View File

@@ -21,6 +21,8 @@ from markitect_tool.contract import (
validate_contract_file,
)
from markitect_tool.document_function import (
DOCUMENT_VALUE_KINDS,
MAX_FUNCTION_PIPELINE_DEPTH,
DocumentFunctionCall,
DocumentFunctionDescriptor,
DocumentFunctionError,
@@ -28,7 +30,11 @@ from markitect_tool.document_function import (
DocumentFunctionParameter,
DocumentFunctionRegistry,
DocumentFunctionRun,
DocumentValue,
coerce_document_value,
default_document_function_registry,
document_value_to_json,
format_document_value,
parse_document_function_calls,
render_document_functions,
validate_document_functions,
@@ -354,6 +360,8 @@ __all__ = [
"load_contract_file",
"validate_contract",
"validate_contract_file",
"DOCUMENT_VALUE_KINDS",
"MAX_FUNCTION_PIPELINE_DEPTH",
"DocumentFunctionCall",
"DocumentFunctionDescriptor",
"DocumentFunctionError",
@@ -361,7 +369,11 @@ __all__ = [
"DocumentFunctionParameter",
"DocumentFunctionRegistry",
"DocumentFunctionRun",
"DocumentValue",
"coerce_document_value",
"default_document_function_registry",
"document_value_to_json",
"format_document_value",
"parse_document_function_calls",
"render_document_functions",
"validate_document_functions",

View File

@@ -23,14 +23,62 @@ FENCE_CALL_RE = re.compile(
r"```(?P<info>[^\n`]*)\n(?P<body>.*?)\n```",
re.DOTALL,
)
MAX_FUNCTION_PIPELINE_DEPTH = 12
FunctionImplementation = Callable[..., Any]
DOCUMENT_VALUE_KINDS = {
"string",
"number",
"boolean",
"none",
"markdown",
"list",
"dictionary",
"record",
"table",
"reference",
"content_unit",
"unknown",
"dynamic",
}
class DocumentFunctionError(ValueError):
"""Raised when document function parsing or evaluation fails."""
@dataclass(frozen=True)
class DocumentValue:
"""Typed value produced by a document function."""
kind: str
value: Any = None
items: list["DocumentValue"] = field(default_factory=list)
fields: dict[str, "DocumentValue"] = field(default_factory=dict)
metadata: dict[str, Any] = field(default_factory=dict)
provenance: list[dict[str, Any]] = field(default_factory=list)
def __post_init__(self) -> None:
if self.kind not in DOCUMENT_VALUE_KINDS:
raise DocumentFunctionError(f"Unknown document value kind `{self.kind}`.")
def to_dict(self) -> dict[str, Any]:
return _drop_empty(
{
"kind": self.kind,
"value": self.value,
"items": [item.to_dict() for item in self.items],
"fields": {key: value.to_dict() for key, value in self.fields.items()},
"metadata": self.metadata,
"provenance": self.provenance,
}
)
def __str__(self) -> str:
return format_document_value(self, inline=True)
@dataclass(frozen=True)
class DocumentFunctionParameter:
"""One declared document function parameter."""
@@ -107,6 +155,7 @@ class DocumentFunctionRun:
call: DocumentFunctionCall
output: Any = None
value: DocumentValue | None = None
diagnostics: list[Diagnostic] = field(default_factory=list)
provenance: list[ProcessingProvenance] = field(default_factory=list)
trace: list[ProcessingTrace] = field(default_factory=list)
@@ -120,7 +169,8 @@ class DocumentFunctionRun:
{
"call": self.call.to_dict(),
"valid": self.valid,
"output": self.output,
"output": _serialize_output(self.output),
"value": self.value.to_dict() if self.value else None,
"diagnostics": [diagnostic.to_dict() for diagnostic in self.diagnostics],
"provenance": [event.to_dict() for event in self.provenance],
"trace": [event.to_dict() for event in self.trace],
@@ -180,6 +230,10 @@ class DocumentFunctionRegistry:
raise DocumentFunctionError(f"Duplicate document function `{descriptor.id}`")
if descriptor.implementation is None:
raise DocumentFunctionError(f"Document function `{descriptor.id}` has no implementation")
if _normalize_output_type(descriptor.output_type) not in DOCUMENT_VALUE_KINDS:
raise DocumentFunctionError(
f"Document function `{descriptor.id}` declares unknown output type `{descriptor.output_type}`"
)
self._descriptors[descriptor.id] = descriptor
def get(self, function_id: str) -> DocumentFunctionDescriptor:
@@ -208,6 +262,7 @@ class DocumentFunctionRegistry:
) -> DocumentFunctionRun:
context = context or ProcessingContext()
output: Any = None
value: DocumentValue | None = None
diagnostics: list[Diagnostic] = []
provenance: list[ProcessingProvenance] = []
trace: list[ProcessingTrace] = []
@@ -230,12 +285,15 @@ class DocumentFunctionRegistry:
trace.extend(run.trace)
if not run.valid:
output = current.raw
value = run.value
break
output = run.output
value = run.value
return DocumentFunctionRun(
call=call,
output=output,
value=value,
diagnostics=diagnostics,
provenance=provenance,
trace=trace,
@@ -281,6 +339,10 @@ class DocumentFunctionRegistry:
else:
assert descriptor.implementation is not None
output = descriptor.implementation(*args, **kwargs)
value = coerce_document_value(output, declared_kind=descriptor.output_type)
value_diagnostic = _validate_output_value(descriptor, value, call, context)
if value_diagnostic is not None:
return DocumentFunctionRun(call=call, output=output, value=value, diagnostics=[value_diagnostic])
except Exception as exc:
return _call_error(call, "function.evaluation_failed", str(exc), context)
@@ -301,7 +363,83 @@ class DocumentFunctionRegistry:
metadata={"function": descriptor.id, "line": call.line},
)
]
return DocumentFunctionRun(call=call, output=output, provenance=provenance, trace=trace)
return DocumentFunctionRun(call=call, output=output, value=value, provenance=provenance, trace=trace)
def coerce_document_value(value: Any, *, declared_kind: str = "dynamic") -> DocumentValue:
"""Coerce a Python value into a typed document value."""
normalized_kind = _normalize_output_type(declared_kind)
if isinstance(value, DocumentValue):
if normalized_kind in {"dynamic", "any"} or _value_matches_kind(value, normalized_kind):
return value
return DocumentValue(kind="unknown", value=value.to_dict(), metadata={"declared_kind": normalized_kind})
if normalized_kind == "dynamic" or normalized_kind == "any":
return _infer_document_value(value)
if normalized_kind == "markdown":
if isinstance(value, str):
return DocumentValue(kind="markdown", value=value)
if normalized_kind == "string":
if isinstance(value, str):
return DocumentValue(kind="string", value=value)
if normalized_kind == "number":
if isinstance(value, (int, float)) and not isinstance(value, bool):
return DocumentValue(kind="number", value=value)
if normalized_kind == "boolean":
if isinstance(value, bool):
return DocumentValue(kind="boolean", value=value)
if normalized_kind == "none":
if value is None:
return DocumentValue(kind="none")
if normalized_kind == "list":
if isinstance(value, list):
return DocumentValue(kind="list", items=[coerce_document_value(item) for item in value])
if normalized_kind in {"dictionary", "record"}:
if isinstance(value, dict):
return DocumentValue(
kind=normalized_kind,
fields={str(key): coerce_document_value(raw) for key, raw in value.items()},
)
if normalized_kind == "table":
if isinstance(value, list):
return DocumentValue(kind="table", items=[coerce_document_value(item, declared_kind="record") for item in value])
if normalized_kind in {"reference", "content_unit", "unknown"}:
return DocumentValue(kind=normalized_kind, value=value)
return DocumentValue(kind="unknown", value=value, metadata={"declared_kind": normalized_kind})
def format_document_value(value: DocumentValue | Any, *, inline: bool) -> str:
"""Map a typed document value to deterministic Markdown text."""
document_value = value if isinstance(value, DocumentValue) else coerce_document_value(value)
if document_value.kind in {"markdown", "string"}:
return str(document_value.value or "")
if document_value.kind == "number":
return str(document_value.value)
if document_value.kind == "boolean":
return "true" if document_value.value else "false"
if document_value.kind == "none":
return ""
if document_value.kind == "list":
rendered = [format_document_value(item, inline=inline) for item in document_value.items]
return ", ".join(rendered) if inline else "\n".join(rendered)
if document_value.kind in {"dictionary", "record"}:
return json.dumps(_document_value_to_plain(document_value), sort_keys=True, ensure_ascii=False)
if document_value.kind == "table":
return _format_table_value(document_value)
if document_value.kind in {"reference", "content_unit"}:
label = document_value.metadata.get("label") or document_value.metadata.get("title")
return str(label or document_value.value or "")
if document_value.kind == "dynamic":
return format_document_value(coerce_document_value(document_value.value), inline=inline)
return "" if document_value.value is None else str(document_value.value)
def document_value_to_json(value: DocumentValue | Any) -> dict[str, Any]:
"""Return the stable JSON-compatible representation of a document value."""
document_value = value if isinstance(value, DocumentValue) else coerce_document_value(value)
return document_value.to_dict()
def default_document_function_registry() -> DocumentFunctionRegistry:
@@ -314,6 +452,7 @@ def default_document_function_registry() -> DocumentFunctionRegistry:
"Uppercase text.",
_text_upper,
[DocumentFunctionParameter("value")],
output_type="string",
examples=['{{mkt:text.upper "draft"}}'],
),
_descriptor(
@@ -321,6 +460,7 @@ def default_document_function_registry() -> DocumentFunctionRegistry:
"Lowercase text.",
_text_lower,
[DocumentFunctionParameter("value")],
output_type="string",
examples=['{{mkt:text.lower "DRAFT"}}'],
),
_descriptor(
@@ -328,6 +468,7 @@ def default_document_function_registry() -> DocumentFunctionRegistry:
"Title-case text.",
_text_title,
[DocumentFunctionParameter("value")],
output_type="string",
examples=['{{mkt:text.title "release notes"}}'],
),
_descriptor(
@@ -335,6 +476,7 @@ def default_document_function_registry() -> DocumentFunctionRegistry:
"Trim surrounding whitespace.",
_text_trim,
[DocumentFunctionParameter("value")],
output_type="string",
examples=['{{mkt:text.trim " ok "}}'],
),
_descriptor(
@@ -346,6 +488,7 @@ def default_document_function_registry() -> DocumentFunctionRegistry:
DocumentFunctionParameter("old"),
DocumentFunctionParameter("new"),
],
output_type="string",
examples=['{{mkt:text.replace "draft" draft final}}'],
),
_descriptor(
@@ -356,6 +499,7 @@ def default_document_function_registry() -> DocumentFunctionRegistry:
DocumentFunctionParameter("items", variadic=True),
DocumentFunctionParameter("sep", required=False, default=""),
],
output_type="string",
examples=['{{mkt:text.join "A" "B" sep=", "}}'],
),
_descriptor(
@@ -398,6 +542,7 @@ def default_document_function_registry() -> DocumentFunctionRegistry:
"Read a value from processing context variables.",
_data_get,
[DocumentFunctionParameter("key"), DocumentFunctionParameter("default", required=False, default="")],
output_type="dynamic",
examples=["{{mkt:data.get title}}"],
),
]
@@ -460,7 +605,7 @@ def render_document_functions(
trace.extend(run.trace)
if not run.valid:
return match.group(0)
return _format_function_output(run.output, inline=True)
return _format_function_output(run.value or run.output, inline=True)
content = INLINE_CALL_RE.sub(replace_inline, text)
@@ -483,7 +628,7 @@ def render_document_functions(
trace.extend(run.trace)
if not run.valid:
return match.group(0)
return _format_function_output(run.output, inline=False)
return _format_function_output(run.value or run.output, inline=False)
content = FENCE_CALL_RE.sub(replace_fence, content)
trace.append(ProcessingTrace(event="document_function.rendered", metadata={"calls": len(runs)}))
@@ -554,6 +699,10 @@ def _parse_call_expression(
pipeline_parts = _split_pipeline_expression(expression)
if not pipeline_parts:
raise DocumentFunctionError("Document function call is empty.")
if len(pipeline_parts) > MAX_FUNCTION_PIPELINE_DEPTH:
raise DocumentFunctionError(
f"Document function pipeline exceeds maximum depth {MAX_FUNCTION_PIPELINE_DEPTH}."
)
first = _parse_single_call(pipeline_parts[0], raw=raw, inline=inline, line=line, body=body)
pipeline = [
_parse_single_call(part, raw=part, inline=inline, line=line)
@@ -712,6 +861,109 @@ def _blocked_capabilities(
return sorted(set(blocked))
def _normalize_output_type(output_type: str | None) -> str:
aliases = {
None: "dynamic",
"": "dynamic",
"any": "dynamic",
"integer": "number",
"float": "number",
"bool": "boolean",
"dict": "dictionary",
"map": "dictionary",
"markdown_content": "markdown",
"content-unit": "content_unit",
}
normalized = str(output_type or "dynamic").strip().lower().replace("-", "_")
return aliases.get(normalized, normalized)
def _infer_document_value(value: Any) -> DocumentValue:
if isinstance(value, DocumentValue):
return value
if value is None:
return DocumentValue(kind="none")
if isinstance(value, bool):
return DocumentValue(kind="boolean", value=value)
if isinstance(value, (int, float)):
return DocumentValue(kind="number", value=value)
if isinstance(value, str):
return DocumentValue(kind="string", value=value)
if isinstance(value, list):
return DocumentValue(kind="list", items=[coerce_document_value(item) for item in value])
if isinstance(value, dict):
return DocumentValue(
kind="dictionary",
fields={str(key): coerce_document_value(raw) for key, raw in value.items()},
)
return DocumentValue(kind="unknown", value=str(value), metadata={"python_type": type(value).__name__})
def _value_matches_kind(value: DocumentValue, expected_kind: str) -> bool:
if expected_kind in {"dynamic", "any"}:
return True
if expected_kind == "markdown":
return value.kind in {"markdown", "string"}
if expected_kind == "dictionary":
return value.kind in {"dictionary", "record"}
return value.kind == expected_kind
def _validate_output_value(
descriptor: DocumentFunctionDescriptor,
value: DocumentValue,
call: DocumentFunctionCall,
context: ProcessingContext,
) -> Diagnostic | None:
expected = _normalize_output_type(descriptor.output_type)
if value.kind == "unknown":
return _output_diagnostic(
call,
"function.output_type_mismatch",
f"Function `{descriptor.id}` returned a value that does not match output type `{expected}`.",
context,
{"function": descriptor.id, "output_type": expected, "value_kind": value.kind},
)
if descriptor.execution == "deterministic" and value.kind == "dynamic":
return _output_diagnostic(
call,
"function.dynamic_output",
f"Function `{descriptor.id}` returned a dynamic value in a deterministic context.",
context,
{"function": descriptor.id, "output_type": expected},
)
if value.kind in {"reference", "content_unit"} and not value.provenance:
return _output_diagnostic(
call,
"function.provenance_missing",
f"Function `{descriptor.id}` returned `{value.kind}` without provenance.",
context,
{"function": descriptor.id, "value_kind": value.kind},
)
return None
def _output_diagnostic(
call: DocumentFunctionCall,
code: str,
message: str,
context: ProcessingContext,
details: dict[str, Any],
) -> Diagnostic:
return Diagnostic(
severity="error",
code=code,
message=message,
source=SourceLocation(
path=str(context.source_path) if context.source_path else None,
line=call.line,
)
if context.source_path or call.line
else None,
details=details,
)
def _resolve_value(value: Any, context: ProcessingContext) -> Any:
if isinstance(value, str):
if value.startswith("${") and value.endswith("}"):
@@ -721,13 +973,59 @@ def _resolve_value(value: Any, context: ProcessingContext) -> Any:
def _format_function_output(value: Any, *, inline: bool) -> str:
if isinstance(value, str):
return value
return format_document_value(value, inline=inline)
def _document_value_to_plain(value: DocumentValue) -> Any:
if value.kind in {"string", "number", "boolean", "markdown", "reference", "content_unit", "unknown", "dynamic"}:
return value.value
if value.kind == "none":
return None
if value.kind in {"list", "table"}:
return [_document_value_to_plain(item) for item in value.items]
if value.kind in {"dictionary", "record"}:
return {key: _document_value_to_plain(raw) for key, raw in value.fields.items()}
return value.value
def _format_table_value(value: DocumentValue) -> str:
rows = [item for item in value.items if item.kind in {"record", "dictionary"}]
if not rows:
return ""
columns: list[str] = []
for row in rows:
for key in row.fields:
if key not in columns:
columns.append(key)
if not columns:
return ""
header = "| " + " | ".join(_escape_table_cell(column) for column in columns) + " |"
separator = "| " + " | ".join("---" for _ in columns) + " |"
body = []
for row in rows:
body.append(
"| "
+ " | ".join(
_escape_table_cell(format_document_value(row.fields.get(column, DocumentValue(kind="none")), inline=True))
for column in columns
)
+ " |"
)
return "\n".join([header, separator, *body])
def _escape_table_cell(value: str) -> str:
return value.replace("|", "\\|").replace("\n", " ").strip()
def _serialize_output(value: Any) -> Any:
if isinstance(value, DocumentValue):
return value.to_dict()
if isinstance(value, list):
return ", ".join(str(item) for item in value) if inline else "\n".join(str(item) for item in value)
return [_serialize_output(item) for item in value]
if isinstance(value, dict):
return json.dumps(value, sort_keys=True, ensure_ascii=False)
return "" if value is None else str(value)
return {str(key): _serialize_output(raw) for key, raw in value.items()}
return value
def _parse_literal(value: str) -> Any:

View File

@@ -26,6 +26,7 @@ def builtin_extension_registry() -> ExtensionRegistry:
_local_label_policy_descriptor(),
_document_function_descriptor(),
_memory_graph_contract_descriptor(),
_memory_runtime_adapter_descriptor(),
_agent_memory_descriptor(),
source_adapter_registry_descriptor(),
]:
@@ -319,6 +320,23 @@ def _document_function_descriptor() -> ExtensionDescriptor:
metadata={
"execution": "deterministic-only",
"external_policy_services_required": False,
"typed_values": True,
"value_kinds": [
"string",
"number",
"boolean",
"none",
"markdown",
"list",
"dictionary",
"record",
"table",
"reference",
"content_unit",
"unknown",
"dynamic",
],
"render_export_execution": False,
},
)
@@ -412,6 +430,13 @@ def _memory_graph_contract_descriptor() -> ExtensionDescriptor:
"examples/memory/memory-profile.local.yaml",
"examples/memory/decision-graph.yaml",
"examples/memory/decision-graph-selection.yaml",
"examples/memory/conversation-path.yaml",
"examples/memory/conversation-path-selection.yaml",
"examples/memory/knowledge-neighborhood.yaml",
"examples/memory/knowledge-neighborhood-selection.yaml",
"examples/memory/invalid-memory-graph.yaml",
"examples/memory/invalid-memory-profile.yaml",
"examples/memory/runtime-adapter-boundaries.yaml",
],
metadata={
"schema_versions": [
@@ -422,7 +447,59 @@ def _memory_graph_contract_descriptor() -> ExtensionDescriptor:
"runtime_execution_required": False,
"runtime_handoff_repositories": [
"kontextual-engine",
"phased-memory",
"infospace-bench",
],
"runtime_adapter_boundaries": [
"memory.runtime.kontextual-engine",
"memory.runtime.phased-memory",
"memory.store.external-graph",
"memory.store.vector",
"memory.extract.llm-assisted",
"memory.policy.enterprise-pdp",
"memory.registry.remote",
"memory.audit.sink",
],
},
)
def _memory_runtime_adapter_descriptor() -> ExtensionDescriptor:
return ExtensionDescriptor(
id="memory.runtime-adapter-boundary",
kind="memory-runtime-adapter",
summary="Non-executing handoff descriptors for external memory runtimes, stores, extraction, policy, and audit.",
capabilities=[
ProcessingCapability(id="memory_runtime_adapters", kind="describe"),
ProcessingCapability(id="memory_graphs", kind="handoff"),
ProcessingCapability(id="memory_events", kind="handoff"),
ProcessingCapability(id="context_packages", kind="handoff"),
ProcessingCapability(id="policy_decisions", kind="handoff"),
],
safety={
"reads_files": False,
"writes_files": False,
"network": False,
"launches_services": False,
"runtime_execution": False,
},
input_contract="MemoryProfile | MemoryGraph | MemoryEvent | MemoryGraphSelection | ContextPackage metadata",
output_contract="External runtime/store/policy/audit adapter descriptor",
diagnostics_namespace="memory.runtime_adapter",
provenance_prefix="memory.runtime_adapter_boundary",
docs=["docs/memory-graph-contract.md"],
examples=["examples/memory/runtime-adapter-boundaries.yaml"],
metadata={
"descriptor_catalog": "examples/memory/runtime-adapter-boundaries.yaml",
"markitect_role": "contract-validation-and-context-package-compilation",
"external_runtime_roles": [
"durable graph and event persistence",
"graph and vector retrieval",
"LLM-assisted graph extraction",
"policy enforcement and reauthorization",
"remote registry coordination",
"audit and event sinks",
],
"services_launched_by_markitect_tool": False,
},
)

View File

@@ -31,6 +31,7 @@ MEMORY_NODE_KINDS = {
"assumption",
"alternative",
"artifact",
"activation",
"capability",
"claim",
"concept",
@@ -45,6 +46,7 @@ MEMORY_NODE_KINDS = {
"evidence",
"finding",
"follow_up",
"interruption",
"memory",
"observation",
"outcome",