Complete memory graph and document value workplans

2026-05-15 13:30:50 +02:00
parent f49ebb563b
commit 6cc44da628
25 changed files with 1546 additions and 168 deletions
--- a/src/markitect_tool/init.py
+++ b/src/markitect_tool/init.py
@@ -21,6 +21,8 @@ from markitect_tool.contract import (
    validate_contract_file,
 )
 from markitect_tool.document_function import (
+    DOCUMENT_VALUE_KINDS,
+    MAX_FUNCTION_PIPELINE_DEPTH,
    DocumentFunctionCall,
    DocumentFunctionDescriptor,
    DocumentFunctionError,
@@ -28,7 +30,11 @@ from markitect_tool.document_function import (
    DocumentFunctionParameter,
    DocumentFunctionRegistry,
    DocumentFunctionRun,
+    DocumentValue,
+    coerce_document_value,
    default_document_function_registry,
+    document_value_to_json,
+    format_document_value,
    parse_document_function_calls,
    render_document_functions,
    validate_document_functions,
@@ -354,6 +360,8 @@ __all__ = [
    "load_contract_file",
    "validate_contract",
    "validate_contract_file",
+    "DOCUMENT_VALUE_KINDS",
+    "MAX_FUNCTION_PIPELINE_DEPTH",
    "DocumentFunctionCall",
    "DocumentFunctionDescriptor",
    "DocumentFunctionError",
@@ -361,7 +369,11 @@ __all__ = [
    "DocumentFunctionParameter",
    "DocumentFunctionRegistry",
    "DocumentFunctionRun",
+    "DocumentValue",
+    "coerce_document_value",
    "default_document_function_registry",
+    "document_value_to_json",
+    "format_document_value",
    "parse_document_function_calls",
    "render_document_functions",
    "validate_document_functions",
--- a/src/markitect_tool/document_function.py
+++ b/src/markitect_tool/document_function.py
@@ -23,14 +23,62 @@ FENCE_CALL_RE = re.compile(
    r"```(?P<info>[^\n`]*)\n(?P<body>.*?)\n```",
    re.DOTALL,
 )
+MAX_FUNCTION_PIPELINE_DEPTH = 12

 FunctionImplementation = Callable[..., Any]

+DOCUMENT_VALUE_KINDS = {
+    "string",
+    "number",
+    "boolean",
+    "none",
+    "markdown",
+    "list",
+    "dictionary",
+    "record",
+    "table",
+    "reference",
+    "content_unit",
+    "unknown",
+    "dynamic",
+}
+

 class DocumentFunctionError(ValueError):
    """Raised when document function parsing or evaluation fails."""


+@dataclass(frozen=True)
+class DocumentValue:
+    """Typed value produced by a document function."""
+
+    kind: str
+    value: Any = None
+    items: list["DocumentValue"] = field(default_factory=list)
+    fields: dict[str, "DocumentValue"] = field(default_factory=dict)
+    metadata: dict[str, Any] = field(default_factory=dict)
+    provenance: list[dict[str, Any]] = field(default_factory=list)
+
+    def __post_init__(self) -> None:
+        if self.kind not in DOCUMENT_VALUE_KINDS:
+            raise DocumentFunctionError(f"Unknown document value kind `{self.kind}`.")
+
+    def to_dict(self) -> dict[str, Any]:
+        return _drop_empty(
+            {
+                "kind": self.kind,
+                "value": self.value,
+                "items": [item.to_dict() for item in self.items],
+                "fields": {key: value.to_dict() for key, value in self.fields.items()},
+                "metadata": self.metadata,
+                "provenance": self.provenance,
+            }
+        )
+
+    def __str__(self) -> str:
+        return format_document_value(self, inline=True)
+
+
@dataclass(frozen=True)
 class DocumentFunctionParameter:
    """One declared document function parameter."""
@@ -107,6 +155,7 @@ class DocumentFunctionRun:

    call: DocumentFunctionCall
    output: Any = None
+    value: DocumentValue | None = None
    diagnostics: list[Diagnostic] = field(default_factory=list)
    provenance: list[ProcessingProvenance] = field(default_factory=list)
    trace: list[ProcessingTrace] = field(default_factory=list)
@@ -120,7 +169,8 @@ class DocumentFunctionRun:
            {
                "call": self.call.to_dict(),
                "valid": self.valid,
-                "output": self.output,
+                "output": _serialize_output(self.output),
+                "value": self.value.to_dict() if self.value else None,
                "diagnostics": [diagnostic.to_dict() for diagnostic in self.diagnostics],
                "provenance": [event.to_dict() for event in self.provenance],
                "trace": [event.to_dict() for event in self.trace],
@@ -180,6 +230,10 @@ class DocumentFunctionRegistry:
            raise DocumentFunctionError(f"Duplicate document function `{descriptor.id}`")
        if descriptor.implementation is None:
            raise DocumentFunctionError(f"Document function `{descriptor.id}` has no implementation")
+        if _normalize_output_type(descriptor.output_type) not in DOCUMENT_VALUE_KINDS:
+            raise DocumentFunctionError(
+                f"Document function `{descriptor.id}` declares unknown output type `{descriptor.output_type}`"
+            )
        self._descriptors[descriptor.id] = descriptor

    def get(self, function_id: str) -> DocumentFunctionDescriptor:
@@ -208,6 +262,7 @@ class DocumentFunctionRegistry:
    ) -> DocumentFunctionRun:
        context = context or ProcessingContext()
        output: Any = None
+        value: DocumentValue | None = None
        diagnostics: list[Diagnostic] = []
        provenance: list[ProcessingProvenance] = []
        trace: list[ProcessingTrace] = []
@@ -230,12 +285,15 @@ class DocumentFunctionRegistry:
            trace.extend(run.trace)
            if not run.valid:
                output = current.raw
+                value = run.value
                break
            output = run.output
+            value = run.value

        return DocumentFunctionRun(
            call=call,
            output=output,
+            value=value,
            diagnostics=diagnostics,
            provenance=provenance,
            trace=trace,
@@ -281,6 +339,10 @@ class DocumentFunctionRegistry:
            else:
                assert descriptor.implementation is not None
                output = descriptor.implementation(*args, **kwargs)
+            value = coerce_document_value(output, declared_kind=descriptor.output_type)
+            value_diagnostic = _validate_output_value(descriptor, value, call, context)
+            if value_diagnostic is not None:
+                return DocumentFunctionRun(call=call, output=output, value=value, diagnostics=[value_diagnostic])
        except Exception as exc:
            return _call_error(call, "function.evaluation_failed", str(exc), context)

@@ -301,7 +363,83 @@ class DocumentFunctionRegistry:
                metadata={"function": descriptor.id, "line": call.line},
            )
        ]
-        return DocumentFunctionRun(call=call, output=output, provenance=provenance, trace=trace)
+        return DocumentFunctionRun(call=call, output=output, value=value, provenance=provenance, trace=trace)
+
+
+def coerce_document_value(value: Any, *, declared_kind: str = "dynamic") -> DocumentValue:
+    """Coerce a Python value into a typed document value."""
+
+    normalized_kind = _normalize_output_type(declared_kind)
+    if isinstance(value, DocumentValue):
+        if normalized_kind in {"dynamic", "any"} or _value_matches_kind(value, normalized_kind):
+            return value
+        return DocumentValue(kind="unknown", value=value.to_dict(), metadata={"declared_kind": normalized_kind})
+    if normalized_kind == "dynamic" or normalized_kind == "any":
+        return _infer_document_value(value)
+    if normalized_kind == "markdown":
+        if isinstance(value, str):
+            return DocumentValue(kind="markdown", value=value)
+    if normalized_kind == "string":
+        if isinstance(value, str):
+            return DocumentValue(kind="string", value=value)
+    if normalized_kind == "number":
+        if isinstance(value, (int, float)) and not isinstance(value, bool):
+            return DocumentValue(kind="number", value=value)
+    if normalized_kind == "boolean":
+        if isinstance(value, bool):
+            return DocumentValue(kind="boolean", value=value)
+    if normalized_kind == "none":
+        if value is None:
+            return DocumentValue(kind="none")
+    if normalized_kind == "list":
+        if isinstance(value, list):
+            return DocumentValue(kind="list", items=[coerce_document_value(item) for item in value])
+    if normalized_kind in {"dictionary", "record"}:
+        if isinstance(value, dict):
+            return DocumentValue(
+                kind=normalized_kind,
+                fields={str(key): coerce_document_value(raw) for key, raw in value.items()},
+            )
+    if normalized_kind == "table":
+        if isinstance(value, list):
+            return DocumentValue(kind="table", items=[coerce_document_value(item, declared_kind="record") for item in value])
+    if normalized_kind in {"reference", "content_unit", "unknown"}:
+        return DocumentValue(kind=normalized_kind, value=value)
+    return DocumentValue(kind="unknown", value=value, metadata={"declared_kind": normalized_kind})
+
+
+def format_document_value(value: DocumentValue | Any, *, inline: bool) -> str:
+    """Map a typed document value to deterministic Markdown text."""
+
+    document_value = value if isinstance(value, DocumentValue) else coerce_document_value(value)
+    if document_value.kind in {"markdown", "string"}:
+        return str(document_value.value or "")
+    if document_value.kind == "number":
+        return str(document_value.value)
+    if document_value.kind == "boolean":
+        return "true" if document_value.value else "false"
+    if document_value.kind == "none":
+        return ""
+    if document_value.kind == "list":
+        rendered = [format_document_value(item, inline=inline) for item in document_value.items]
+        return ", ".join(rendered) if inline else "\n".join(rendered)
+    if document_value.kind in {"dictionary", "record"}:
+        return json.dumps(_document_value_to_plain(document_value), sort_keys=True, ensure_ascii=False)
+    if document_value.kind == "table":
+        return _format_table_value(document_value)
+    if document_value.kind in {"reference", "content_unit"}:
+        label = document_value.metadata.get("label") or document_value.metadata.get("title")
+        return str(label or document_value.value or "")
+    if document_value.kind == "dynamic":
+        return format_document_value(coerce_document_value(document_value.value), inline=inline)
+    return "" if document_value.value is None else str(document_value.value)
+
+
+def document_value_to_json(value: DocumentValue | Any) -> dict[str, Any]:
+    """Return the stable JSON-compatible representation of a document value."""
+
+    document_value = value if isinstance(value, DocumentValue) else coerce_document_value(value)
+    return document_value.to_dict()


 def default_document_function_registry() -> DocumentFunctionRegistry:
@@ -314,6 +452,7 @@ def default_document_function_registry() -> DocumentFunctionRegistry:
                "Uppercase text.",
                _text_upper,
                [DocumentFunctionParameter("value")],
+                output_type="string",
                examples=['{{mkt:text.upper "draft"}}'],
            ),
            _descriptor(
@@ -321,6 +460,7 @@ def default_document_function_registry() -> DocumentFunctionRegistry:
                "Lowercase text.",
                _text_lower,
                [DocumentFunctionParameter("value")],
+                output_type="string",
                examples=['{{mkt:text.lower "DRAFT"}}'],
            ),
            _descriptor(
@@ -328,6 +468,7 @@ def default_document_function_registry() -> DocumentFunctionRegistry:
                "Title-case text.",
                _text_title,
                [DocumentFunctionParameter("value")],
+                output_type="string",
                examples=['{{mkt:text.title "release notes"}}'],
            ),
            _descriptor(
@@ -335,6 +476,7 @@ def default_document_function_registry() -> DocumentFunctionRegistry:
                "Trim surrounding whitespace.",
                _text_trim,
                [DocumentFunctionParameter("value")],
+                output_type="string",
                examples=['{{mkt:text.trim "  ok  "}}'],
            ),
            _descriptor(
@@ -346,6 +488,7 @@ def default_document_function_registry() -> DocumentFunctionRegistry:
                    DocumentFunctionParameter("old"),
                    DocumentFunctionParameter("new"),
                ],
+                output_type="string",
                examples=['{{mkt:text.replace "draft" draft final}}'],
            ),
            _descriptor(
@@ -356,6 +499,7 @@ def default_document_function_registry() -> DocumentFunctionRegistry:
                    DocumentFunctionParameter("items", variadic=True),
                    DocumentFunctionParameter("sep", required=False, default=""),
                ],
+                output_type="string",
                examples=['{{mkt:text.join "A" "B" sep=", "}}'],
            ),
            _descriptor(
@@ -398,6 +542,7 @@ def default_document_function_registry() -> DocumentFunctionRegistry:
                "Read a value from processing context variables.",
                _data_get,
                [DocumentFunctionParameter("key"), DocumentFunctionParameter("default", required=False, default="")],
+                output_type="dynamic",
                examples=["{{mkt:data.get title}}"],
            ),
        ]
@@ -460,7 +605,7 @@ def render_document_functions(
        trace.extend(run.trace)
        if not run.valid:
            return match.group(0)
-        return _format_function_output(run.output, inline=True)
+        return _format_function_output(run.value or run.output, inline=True)

    content = INLINE_CALL_RE.sub(replace_inline, text)

@@ -483,7 +628,7 @@ def render_document_functions(
        trace.extend(run.trace)
        if not run.valid:
            return match.group(0)
-        return _format_function_output(run.output, inline=False)
+        return _format_function_output(run.value or run.output, inline=False)

    content = FENCE_CALL_RE.sub(replace_fence, content)
    trace.append(ProcessingTrace(event="document_function.rendered", metadata={"calls": len(runs)}))
@@ -554,6 +699,10 @@ def _parse_call_expression(
    pipeline_parts = _split_pipeline_expression(expression)
    if not pipeline_parts:
        raise DocumentFunctionError("Document function call is empty.")
+    if len(pipeline_parts) > MAX_FUNCTION_PIPELINE_DEPTH:
+        raise DocumentFunctionError(
+            f"Document function pipeline exceeds maximum depth {MAX_FUNCTION_PIPELINE_DEPTH}."
+        )
    first = _parse_single_call(pipeline_parts[0], raw=raw, inline=inline, line=line, body=body)
    pipeline = [
        _parse_single_call(part, raw=part, inline=inline, line=line)
@@ -712,6 +861,109 @@ def _blocked_capabilities(
    return sorted(set(blocked))


+def _normalize_output_type(output_type: str | None) -> str:
+    aliases = {
+        None: "dynamic",
+        "": "dynamic",
+        "any": "dynamic",
+        "integer": "number",
+        "float": "number",
+        "bool": "boolean",
+        "dict": "dictionary",
+        "map": "dictionary",
+        "markdown_content": "markdown",
+        "content-unit": "content_unit",
+    }
+    normalized = str(output_type or "dynamic").strip().lower().replace("-", "_")
+    return aliases.get(normalized, normalized)
+
+
+def _infer_document_value(value: Any) -> DocumentValue:
+    if isinstance(value, DocumentValue):
+        return value
+    if value is None:
+        return DocumentValue(kind="none")
+    if isinstance(value, bool):
+        return DocumentValue(kind="boolean", value=value)
+    if isinstance(value, (int, float)):
+        return DocumentValue(kind="number", value=value)
+    if isinstance(value, str):
+        return DocumentValue(kind="string", value=value)
+    if isinstance(value, list):
+        return DocumentValue(kind="list", items=[coerce_document_value(item) for item in value])
+    if isinstance(value, dict):
+        return DocumentValue(
+            kind="dictionary",
+            fields={str(key): coerce_document_value(raw) for key, raw in value.items()},
+        )
+    return DocumentValue(kind="unknown", value=str(value), metadata={"python_type": type(value).__name__})
+
+
+def _value_matches_kind(value: DocumentValue, expected_kind: str) -> bool:
+    if expected_kind in {"dynamic", "any"}:
+        return True
+    if expected_kind == "markdown":
+        return value.kind in {"markdown", "string"}
+    if expected_kind == "dictionary":
+        return value.kind in {"dictionary", "record"}
+    return value.kind == expected_kind
+
+
+def _validate_output_value(
+    descriptor: DocumentFunctionDescriptor,
+    value: DocumentValue,
+    call: DocumentFunctionCall,
+    context: ProcessingContext,
+) -> Diagnostic | None:
+    expected = _normalize_output_type(descriptor.output_type)
+    if value.kind == "unknown":
+        return _output_diagnostic(
+            call,
+            "function.output_type_mismatch",
+            f"Function `{descriptor.id}` returned a value that does not match output type `{expected}`.",
+            context,
+            {"function": descriptor.id, "output_type": expected, "value_kind": value.kind},
+        )
+    if descriptor.execution == "deterministic" and value.kind == "dynamic":
+        return _output_diagnostic(
+            call,
+            "function.dynamic_output",
+            f"Function `{descriptor.id}` returned a dynamic value in a deterministic context.",
+            context,
+            {"function": descriptor.id, "output_type": expected},
+        )
+    if value.kind in {"reference", "content_unit"} and not value.provenance:
+        return _output_diagnostic(
+            call,
+            "function.provenance_missing",
+            f"Function `{descriptor.id}` returned `{value.kind}` without provenance.",
+            context,
+            {"function": descriptor.id, "value_kind": value.kind},
+        )
+    return None
+
+
+def _output_diagnostic(
+    call: DocumentFunctionCall,
+    code: str,
+    message: str,
+    context: ProcessingContext,
+    details: dict[str, Any],
+) -> Diagnostic:
+    return Diagnostic(
+        severity="error",
+        code=code,
+        message=message,
+        source=SourceLocation(
+            path=str(context.source_path) if context.source_path else None,
+            line=call.line,
+        )
+        if context.source_path or call.line
+        else None,
+        details=details,
+    )
+
+
 def _resolve_value(value: Any, context: ProcessingContext) -> Any:
    if isinstance(value, str):
        if value.startswith("${") and value.endswith("}"):
@@ -721,13 +973,59 @@ def _resolve_value(value: Any, context: ProcessingContext) -> Any:


 def _format_function_output(value: Any, *, inline: bool) -> str:
-    if isinstance(value, str):
-        return value
+    return format_document_value(value, inline=inline)
+
+
+def _document_value_to_plain(value: DocumentValue) -> Any:
+    if value.kind in {"string", "number", "boolean", "markdown", "reference", "content_unit", "unknown", "dynamic"}:
+        return value.value
+    if value.kind == "none":
+        return None
+    if value.kind in {"list", "table"}:
+        return [_document_value_to_plain(item) for item in value.items]
+    if value.kind in {"dictionary", "record"}:
+        return {key: _document_value_to_plain(raw) for key, raw in value.fields.items()}
+    return value.value
+
+
+def _format_table_value(value: DocumentValue) -> str:
+    rows = [item for item in value.items if item.kind in {"record", "dictionary"}]
+    if not rows:
+        return ""
+    columns: list[str] = []
+    for row in rows:
+        for key in row.fields:
+            if key not in columns:
+                columns.append(key)
+    if not columns:
+        return ""
+    header = "| " + " | ".join(_escape_table_cell(column) for column in columns) + " |"
+    separator = "| " + " | ".join("---" for _ in columns) + " |"
+    body = []
+    for row in rows:
+        body.append(
+            "| "
+            + " | ".join(
+                _escape_table_cell(format_document_value(row.fields.get(column, DocumentValue(kind="none")), inline=True))
+                for column in columns
+            )
+            + " |"
+        )
+    return "\n".join([header, separator, *body])
+
+
+def _escape_table_cell(value: str) -> str:
+    return value.replace("|", "\\|").replace("\n", " ").strip()
+
+
+def _serialize_output(value: Any) -> Any:
+    if isinstance(value, DocumentValue):
+        return value.to_dict()
    if isinstance(value, list):
-        return ", ".join(str(item) for item in value) if inline else "\n".join(str(item) for item in value)
+        return [_serialize_output(item) for item in value]
    if isinstance(value, dict):
-        return json.dumps(value, sort_keys=True, ensure_ascii=False)
-    return "" if value is None else str(value)
+        return {str(key): _serialize_output(raw) for key, raw in value.items()}
+    return value


 def _parse_literal(value: str) -> Any:
--- a/src/markitect_tool/extension/builtins.py
+++ b/src/markitect_tool/extension/builtins.py
@@ -26,6 +26,7 @@ def builtin_extension_registry() -> ExtensionRegistry:
        _local_label_policy_descriptor(),
        _document_function_descriptor(),
        _memory_graph_contract_descriptor(),
+        _memory_runtime_adapter_descriptor(),
        _agent_memory_descriptor(),
        source_adapter_registry_descriptor(),
    ]:
@@ -319,6 +320,23 @@ def _document_function_descriptor() -> ExtensionDescriptor:
        metadata={
            "execution": "deterministic-only",
            "external_policy_services_required": False,
+            "typed_values": True,
+            "value_kinds": [
+                "string",
+                "number",
+                "boolean",
+                "none",
+                "markdown",
+                "list",
+                "dictionary",
+                "record",
+                "table",
+                "reference",
+                "content_unit",
+                "unknown",
+                "dynamic",
+            ],
+            "render_export_execution": False,
        },
    )

@@ -412,6 +430,13 @@ def _memory_graph_contract_descriptor() -> ExtensionDescriptor:
            "examples/memory/memory-profile.local.yaml",
            "examples/memory/decision-graph.yaml",
            "examples/memory/decision-graph-selection.yaml",
+            "examples/memory/conversation-path.yaml",
+            "examples/memory/conversation-path-selection.yaml",
+            "examples/memory/knowledge-neighborhood.yaml",
+            "examples/memory/knowledge-neighborhood-selection.yaml",
+            "examples/memory/invalid-memory-graph.yaml",
+            "examples/memory/invalid-memory-profile.yaml",
+            "examples/memory/runtime-adapter-boundaries.yaml",
        ],
        metadata={
            "schema_versions": [
@@ -422,7 +447,59 @@ def _memory_graph_contract_descriptor() -> ExtensionDescriptor:
            "runtime_execution_required": False,
            "runtime_handoff_repositories": [
                "kontextual-engine",
+                "phased-memory",
                "infospace-bench",
            ],
+            "runtime_adapter_boundaries": [
+                "memory.runtime.kontextual-engine",
+                "memory.runtime.phased-memory",
+                "memory.store.external-graph",
+                "memory.store.vector",
+                "memory.extract.llm-assisted",
+                "memory.policy.enterprise-pdp",
+                "memory.registry.remote",
+                "memory.audit.sink",
+            ],
+        },
+    )
+
+
+def _memory_runtime_adapter_descriptor() -> ExtensionDescriptor:
+    return ExtensionDescriptor(
+        id="memory.runtime-adapter-boundary",
+        kind="memory-runtime-adapter",
+        summary="Non-executing handoff descriptors for external memory runtimes, stores, extraction, policy, and audit.",
+        capabilities=[
+            ProcessingCapability(id="memory_runtime_adapters", kind="describe"),
+            ProcessingCapability(id="memory_graphs", kind="handoff"),
+            ProcessingCapability(id="memory_events", kind="handoff"),
+            ProcessingCapability(id="context_packages", kind="handoff"),
+            ProcessingCapability(id="policy_decisions", kind="handoff"),
+        ],
+        safety={
+            "reads_files": False,
+            "writes_files": False,
+            "network": False,
+            "launches_services": False,
+            "runtime_execution": False,
+        },
+        input_contract="MemoryProfile | MemoryGraph | MemoryEvent | MemoryGraphSelection | ContextPackage metadata",
+        output_contract="External runtime/store/policy/audit adapter descriptor",
+        diagnostics_namespace="memory.runtime_adapter",
+        provenance_prefix="memory.runtime_adapter_boundary",
+        docs=["docs/memory-graph-contract.md"],
+        examples=["examples/memory/runtime-adapter-boundaries.yaml"],
+        metadata={
+            "descriptor_catalog": "examples/memory/runtime-adapter-boundaries.yaml",
+            "markitect_role": "contract-validation-and-context-package-compilation",
+            "external_runtime_roles": [
+                "durable graph and event persistence",
+                "graph and vector retrieval",
+                "LLM-assisted graph extraction",
+                "policy enforcement and reauthorization",
+                "remote registry coordination",
+                "audit and event sinks",
+            ],
+            "services_launched_by_markitect_tool": False,
        },
    )
--- a/src/markitect_tool/memory/graph.py
+++ b/src/markitect_tool/memory/graph.py
@@ -31,6 +31,7 @@ MEMORY_NODE_KINDS = {
    "assumption",
    "alternative",
    "artifact",
+    "activation",
    "capability",
    "claim",
    "concept",
@@ -45,6 +46,7 @@ MEMORY_NODE_KINDS = {
    "evidence",
    "finding",
    "follow_up",
+    "interruption",
    "memory",
    "observation",
    "outcome",