From 305d75177afe4f77811e870c62eb5356434b7d44 Mon Sep 17 00:00:00 2001 From: tegwick Date: Fri, 15 May 2026 14:08:26 +0200 Subject: [PATCH] Add render reference asset manifest contract --- docs/api-reference.md | 20 +- docs/examples-index.md | 1 + docs/internal-extension-framework.md | 7 +- docs/render-export-adapters.md | 6 + docs/render-reference-asset-manifest.md | 96 ++ docs/workplan-planning-map.md | 2 +- .../render/render-reference-manifest.yaml | 83 ++ src/markitect_tool/__init__.py | 36 + src/markitect_tool/extension/builtins.py | 2 + src/markitect_tool/render/__init__.py | 36 + src/markitect_tool/render/engine.py | 860 +++++++++++++++++- tests/test_builtin_extension_catalog.py | 6 + tests/test_render_reference_manifest.py | 236 +++++ ...WP-0021-render-reference-asset-manifest.md | 34 +- 14 files changed, 1405 insertions(+), 20 deletions(-) create mode 100644 docs/render-reference-asset-manifest.md create mode 100644 examples/render/render-reference-manifest.yaml create mode 100644 tests/test_render_reference_manifest.py diff --git a/docs/api-reference.md b/docs/api-reference.md index 576a67c..a64f947 100644 --- a/docs/api-reference.md +++ b/docs/api-reference.md @@ -13,9 +13,13 @@ Generated from `markitect_tool.__all__`. - `LOCAL_INDEX_SCHEMA_VERSION` - object. str(object='') -> str - `MAX_FUNCTION_PIPELINE_DEPTH` - object. int([x]) -> integer - `NORMALIZED_SOURCE_SCHEMA_VERSION` - object. str(object='') -> str +- `RENDER_ASSET_COPY_POLICIES` - object. set() -> new empty set object - `RENDER_EXPORT_ADAPTER_ENTRY_POINT_GROUP` - object. str(object='') -> str - `RENDER_EXPORT_ADAPTER_KIND` - object. str(object='') -> str - `RENDER_EXPORT_SCHEMA_VERSION` - object. str(object='') -> str +- `RENDER_REFERENCE_MANIFEST_KIND` - object. str(object='') -> str +- `RENDER_REFERENCE_SCHEMA_VERSION` - object. str(object='') -> str +- `RENDER_UNIT_KINDS` - object. set() -> new empty set object - `SOURCE_ADAPTER_ENTRY_POINT_GROUP` - object. str(object='') -> str ## `markitect_tool.backend.engine` @@ -317,17 +321,31 @@ Generated from `markitect_tool.__all__`. - `FakeRenderExportAdapter() -> 'None'` - class. Deterministic no-op renderer used for contract tests. - `RenderArtifact(artifact_id: 'str', role: 'str', media_type: 'str', content: 'str | None' = None, path: 'str | None' = None, uri: 'str | None' = None, digest: 'str | None' = None, metadata: 'dict[str, Any]' = ) -> None` - class. Metadata for a rendered or exported artifact. +- `RenderAsset(asset_id: 'str' = '', source_uri: 'str | None' = None, source_path: 'str | None' = None, name: 'str | None' = None, media_type: 'str | None' = None, extension: 'str | None' = None, digest: 'str | None' = None, role: 'str' = 'asset', copy_policy: 'str' = 'copy', output_reference: 'str | None' = None, provenance: 'list[RenderAssetProvenance]' = , metadata: 'dict[str, Any]' = ) -> None` - class. Static asset descriptor with declared renderer copy behavior. +- `RenderAssetManifest(manifest_id: 'str' = '', assets: 'list[RenderAsset]' = , source_path: 'str | None' = None, source_digest: 'str | None' = None, schema_version: 'str' = 'markitect.render.reference.v1', metadata: 'dict[str, Any]' = ) -> None` - class. Deterministic list of static assets requested by renderer source. +- `RenderAssetProvenance(source_uri: 'str | None' = None, source_path: 'str | None' = None, source_href: 'str | None' = None, package_path: 'str | None' = None, attachment_id: 'str | None' = None, source_adapter_id: 'str | None' = None, source_span: 'RenderSourceSpan | None' = None, digest: 'str | None' = None, metadata: 'dict[str, Any]' = ) -> None` - class. Provenance for an asset manifest entry. +- `RenderCrossReference(target_unit_id: 'str', reference_id: 'str' = '', source_unit_id: 'str | None' = None, source_span: 'RenderSourceSpan | None' = None, label: 'str | None' = None, requested_style: 'str' = 'numbered', fallback_text: 'str | None' = None, metadata: 'dict[str, Any]' = ) -> None` - class. Requested cross-reference link before renderer-specific numbering exists. - `RenderExportAdapter(*args, **kwargs)` - class. Render/export adapter protocol. - `RenderExportAdapterDescriptor(id: 'str', version: 'str', name: 'str', operations: 'list[str]', input_contracts: 'list[str]', output_profiles: 'list[str]', artifact_media_types: 'list[str]', factory: 'RenderExportAdapterFactory', summary: 'str | None' = None, option_schema: 'dict[str, Any]' = , optional_dependencies: 'list[OptionalDependency]' = , safety: 'dict[str, Any]' = , quality_profile: 'dict[str, Any]' = , metadata: 'dict[str, Any]' = ) -> None` - class. Inspectable descriptor for one render/export adapter. - `RenderExportAdapterError` - class. Raised when render/export adapter contracts are invalid. - `RenderExportAdapterRegistry(descriptors: 'Iterable[RenderExportAdapterDescriptor] | None' = None) -> 'None'` - class. Registry of render/export adapter descriptors. -- `RenderExportRequest(source: 'str', operation: 'str' = 'render-artifact', profile: 'str' = 'plain', source_path: 'str | None' = None, options: 'dict[str, Any]' = , policy: 'dict[str, Any]' = , schema_version: 'str' = 'markitect.render.export.v1', metadata: 'dict[str, Any]' = ) -> None` - class. Service-free render/export request. +- `RenderExportRequest(source: 'str', operation: 'str' = 'render-artifact', profile: 'str' = 'plain', source_path: 'str | None' = None, options: 'dict[str, Any]' = , policy: 'dict[str, Any]' = , render_manifest: 'RenderReferenceManifest | dict[str, Any] | None' = None, schema_version: 'str' = 'markitect.render.export.v1', metadata: 'dict[str, Any]' = ) -> None` - class. Service-free render/export request. - `RenderExportResult(adapter: 'dict[str, Any]', operation: 'str', profile: 'str', artifacts: 'list[RenderArtifact]' = , exported_source: 'str | None' = None, diagnostics: 'list[Diagnostic]' = , provenance: 'list[RenderProvenance]' = , schema_version: 'str' = 'markitect.render.export.v1', metadata: 'dict[str, Any]' = ) -> None` - class. Result of a render/export adapter operation. - `RenderProvenance(operation: 'str', adapter_id: 'str', profile: 'str', source_path: 'str | None' = None, source_digest: 'str | None' = None, artifact_id: 'str | None' = None, metadata: 'dict[str, Any]' = ) -> None` - class. Source-to-render provenance envelope. +- `RenderReferenceError` - class. Raised when render reference or asset manifest contracts are invalid. +- `RenderReferenceManifest(manifest_id: 'str' = '', units: 'list[RenderUnitReference]' = , cross_references: 'list[RenderCrossReference]' = , toc: 'list[RenderTocEntry]' = , asset_manifest: 'RenderAssetManifest' = , source_maps: 'list[RenderSourceMap]' = , source_path: 'str | None' = None, source_digest: 'str | None' = None, schema_version: 'str' = 'markitect.render.reference.v1', metadata: 'dict[str, Any]' = ) -> None` - class. Passive render structure, cross-reference, asset, and provenance manifest. +- `RenderSourceMap(map_id: 'str' = '', source: 'RenderSourceSpan | None' = None, source_unit_id: 'str | None' = None, generated_by: 'str | None' = None, function_run_id: 'str | None' = None, render_unit_id: 'str | None' = None, artifact_id: 'str | None' = None, artifact_ref: 'str | None' = None, role: 'str | None' = None, metadata: 'dict[str, Any]' = ) -> None` - class. Trace from Markitect source/generated output to renderer units or artifacts. +- `RenderSourceSpan(source_path: 'str | None' = None, line_start: 'int | None' = None, line_end: 'int | None' = None, selector: 'str | None' = None, source_unit_id: 'str | None' = None, content_hash: 'str | None' = None, metadata: 'dict[str, Any]' = ) -> None` - class. Source location for a render unit, asset, or source-map edge. +- `RenderTocEntry(unit_id: 'str', title: 'str', level: 'int', entry_id: 'str' = '', parent_id: 'str | None' = None, order: 'int | None' = None, source_span: 'RenderSourceSpan | None' = None, metadata: 'dict[str, Any]' = ) -> None` - class. Table-of-contents planning entry before final renderer links exist. +- `RenderUnitReference(unit_id: 'str' = '', kind: 'str' = 'custom', label: 'str | None' = None, title: 'str | None' = None, caption: 'str | None' = None, source_path: 'str | None' = None, anchor: 'str | None' = None, source_span: 'RenderSourceSpan | None' = None, content_hash: 'str | None' = None, ordinal_hint: 'int | None' = None, numbering: 'dict[str, Any]' = , metadata: 'dict[str, Any]' = ) -> None` - class. Stable identity for one renderable unit before final renderer numbering. - `default_render_export_adapter_registry() -> 'RenderExportAdapterRegistry'` - function. Return the built-in render/export adapter registry. - `discover_render_export_adapters() -> 'list[RenderExportAdapterDescriptor]'` - function. Discover package-provided render/export adapter descriptors. +- `render_asset_id(source: 'str', *, digest: 'str | None' = None, role: 'str | None' = None, output_reference: 'str | None' = None) -> 'str'` - function. Return a deterministic asset id without copying or reading the asset. - `render_capability_diagnostics(descriptor: 'RenderExportAdapterDescriptor', request: 'RenderExportRequest') -> 'list[Diagnostic]'` - function. Return diagnostics for capabilities blocked by request policy. - `render_export_registry_descriptor() -> 'ExtensionDescriptor'` - function. Descriptor for the render/export adapter registry itself. +- `render_manifest_id(manifest: 'RenderReferenceManifest | dict[str, Any]') -> 'str'` - function. Return the deterministic id for a render reference manifest-like object. +- `render_reference_manifest_descriptor() -> 'ExtensionDescriptor'` - function. Descriptor for passive render reference and asset manifest contracts. +- `render_unit_id(kind: 'str', *, source_path: 'str | None' = None, anchor: 'str | None' = None, content_hash: 'str | None' = None, ordinal_hint: 'int | None' = None, title: 'str | None' = None) -> 'str'` - function. Return a deterministic render unit id for passive reference manifests. - `render_with_adapter(request: 'RenderExportRequest', *, registry: 'RenderExportAdapterRegistry | None' = None, adapter_id: 'str' = 'render.fake') -> 'RenderExportResult'` - function. Render/export through a registered adapter. ## `markitect_tool.runtime.assessment` diff --git a/docs/examples-index.md b/docs/examples-index.md index 9334312..a26fdb5 100644 --- a/docs/examples-index.md +++ b/docs/examples-index.md @@ -42,6 +42,7 @@ This index maps example files to practical usecases and useful commands. | Files | Usecase | Try | | --- | --- | --- | | `examples/render/fake-render-request.yaml` | Deterministic render/export contract fixture | Use with the `render.fake` API adapter; no external renderer required | +| `examples/render/render-reference-manifest.yaml` | Render unit, cross-reference, TOC, asset, and source-map manifest fixture | Load with `RenderReferenceManifest.from_dict`; no renderer or asset copying required | ## Cache, Backend, Policy, And Context diff --git a/docs/internal-extension-framework.md b/docs/internal-extension-framework.md index e66e6cd..72a40ea 100644 --- a/docs/internal-extension-framework.md +++ b/docs/internal-extension-framework.md @@ -42,6 +42,7 @@ framework organizes how Markitect itself exposes and composes capabilities. | `source-adapter` | EPUB3/PDF/DOCX adapters in external packages | source asset in, normalized Markdown out | | `cli-group` | cache, backend, ref, class | command descriptors or registration hook | | `render-export` | Quarkdown/export adapters | Markdown source in, rendered/exported artifact descriptor out | +| `render-reference-contract` | render units, cross-references, TOC, asset manifests | passive manifest in, renderer-planning metadata out | | `document-function` | future function layer | function call in, typed document value out | ## Canonical Lifecycle @@ -172,8 +173,10 @@ Each module exposes one or more descriptors plus a registration function. The root registry can be assembled explicitly at import time or by a small internal discovery list. Source adapters are the first external package-discovery slice and use the `markitect_tool.source_adapters` entry point group defined in -`docs/source-adapter-contract.md`; other extension kinds can adopt package -entry points later if they become a real requirement. +`docs/source-adapter-contract.md`. Render/export adapters use +`markitect_tool.render_export_adapters` and keep concrete renderer execution in +external packages. Render reference and asset manifests remain built-in passive +contracts; they do not need adapter discovery. See `docs/extension-authoring.md` for the extension authoring checklist and descriptor template. diff --git a/docs/render-export-adapters.md b/docs/render-export-adapters.md index 475ed92..777426a 100644 --- a/docs/render-export-adapters.md +++ b/docs/render-export-adapters.md @@ -56,6 +56,12 @@ identity, options, and local policy flags. - source-to-render provenance - metadata such as whether an external renderer was invoked +`RenderExportRequest` may also carry a passive render reference manifest. That +manifest is defined in `docs/render-reference-asset-manifest.md` and lets core +Markitect pass unit identities, cross-reference intent, TOC planning, source +maps, and asset descriptors to a renderer without assigning final numbering or +copying assets. + Artifacts are descriptors, not durable storage records. Real renderer packages may write files, but core Markitect only models the result. diff --git a/docs/render-reference-asset-manifest.md b/docs/render-reference-asset-manifest.md new file mode 100644 index 0000000..09265fe --- /dev/null +++ b/docs/render-reference-asset-manifest.md @@ -0,0 +1,96 @@ +# Render Reference And Asset Manifest Contract + +Markitect models render-aware structure before an optional renderer runs. The +contract is passive: it records units, requested references, static assets, and +source maps, but it does not assign final numbering, rewrite links, copy +assets, or validate renderer output directories. + +## Contract Version + +- `markitect.render.reference.v1` + +The manifest kind is: + +```text +render-reference-manifest +``` + +## Render Units + +`RenderUnitReference` gives a stable identity to renderable content: + +| Unit kind | Typical use | +| --- | --- | +| `section` | Headings and TOC entries. | +| `figure` | Images, charts, diagrams, and captions. | +| `table` | Markdown or generated tables. | +| `equation` | Math blocks or equation-like units. | +| `code-block` | Numbered or referenced code listings. | +| `custom` | Project-defined numbered units. | + +The unit model may carry labels, captions, anchors, source spans, content +hashes, and numbering intent such as scope or sequence. It intentionally does +not carry final numbers. + +## Cross-References And TOC Planning + +`RenderCrossReference` represents a requested link from one source location to +a target render unit. It records the target unit and requested style, while the +renderer decides final label text, numbering, and URL rewriting. + +`RenderTocEntry` records planned table-of-contents entries by unit id, title, +level, parent id, and order. The renderer owns final page numbers and links. + +## Static Assets + +`RenderAssetManifest` is a deterministic list of static assets referenced by +renderer source. Each `RenderAsset` records: + +- source URI or source path +- name, media type, and extension +- digest when available +- logical role +- copy policy declaration +- renderer output reference placeholder +- provenance back to source spans or source adapter attachments + +Supported copy policies are `copy`, `embed`, `link`, `preserve`, and `skip`. +They are declarations for downstream renderers. Core Markitect never copies the +asset. + +`markitect-filter` may provide read-side attachment metadata, such as source +URI, path, media type, extension, digest, and attachment provenance. Markitect +can transform that metadata into a render asset entry without making +`markitect-filter` responsible for rendering. + +## Source Maps + +`RenderSourceMap` connects Markitect source spans or generated function outputs +to renderer units and artifact references. This is the bridge from deterministic +Markdown processing to renderer-owned artifacts: + +```text +source span / function output -> render unit -> renderer source or artifact +``` + +The map can reference a Markitect source unit, a document function run, a +render unit id, an artifact id, or a renderer-owned artifact reference. + +## Fake Renderer Integration + +`RenderExportRequest` accepts an optional `render_manifest`. The built-in +`render.fake` adapter echoes the manifest id, asset manifest id, unit counts, +cross-reference count, TOC count, source-map count, and asset count into result +and artifact metadata. It still performs no external rendering and has no +filesystem or network side effects. + +See `examples/render/render-reference-manifest.yaml` for a complete manifest +fixture. + +## Repository Boundary + +- `markitect-tool` owns these passive contracts and fake-renderer metadata + integration. +- `markitect-filter` owns read-side source asset and attachment metadata. +- `markitect-quarkdown` owns concrete Quarkdown execution, final numbering, + output paths, link rewriting, asset copying, and artifact validation. diff --git a/docs/workplan-planning-map.md b/docs/workplan-planning-map.md index 870de6a..96dd6ad 100644 --- a/docs/workplan-planning-map.md +++ b/docs/workplan-planning-map.md @@ -47,7 +47,7 @@ and descriptions mirror the operational view. | `MKTT-WP-0015` | complete | done | `MKTT-WP-0010`, `MKTT-WP-0011`, `MKTT-WP-0012` | Document function value contracts are complete: typed values, deterministic Markdown/JSON mapping, descriptor output validation, API exports, docs, examples, and tests. | | `MKTT-WP-0016` | complete | done | `MKTT-WP-0008`, `MKTT-WP-0007`, `MKTT-WP-0009`, `MKTT-WP-0013` | Memory graph profile contracts are complete: graph/profile/event models, validation, context-package compilation, CLI, fixture breadth, invalid fixtures, and runtime adapter handoff descriptors. | | `MKTT-WP-0020` | complete | done | `MKTT-WP-0013`, `MKTT-WP-0015` | Render/export adapter contract is complete: descriptors, registry/discovery, request/result/artifact/provenance envelopes, fake deterministic renderer, capability diagnostics, extension descriptors, docs, examples, and tests. | -| `MKTT-WP-0021` | P2 | todo | `MKTT-WP-0010`, `MKTT-WP-0015`, `MKTT-WP-0020` | Render reference and asset manifest contract remains open: passive references, numbered unit metadata, static asset manifests, and source-to-render provenance maps. | +| `MKTT-WP-0021` | complete | done | `MKTT-WP-0010`, `MKTT-WP-0015`, `MKTT-WP-0020` | Render reference and asset manifest contract is complete: passive render unit identities, cross-reference/TOC planning, deterministic asset manifests, source maps, fake-renderer metadata integration, docs, examples, and tests. | ## Dependency Notes diff --git a/examples/render/render-reference-manifest.yaml b/examples/render/render-reference-manifest.yaml new file mode 100644 index 0000000..7986747 --- /dev/null +++ b/examples/render/render-reference-manifest.yaml @@ -0,0 +1,83 @@ +schema_version: markitect.render.reference.v1 +kind: render-reference-manifest +manifest_id: render-manifest:example-report +source_path: examples/render/report.md +source_digest: sha256:example-report +units: + - unit_id: section:results + kind: section + title: Results + source_path: examples/render/report.md + anchor: results + source_span: + source_path: examples/render/report.md + line_start: 8 + line_end: 12 + ordinal_hint: 1 + - unit_id: figure:revenue + kind: figure + label: Figure + caption: Quarterly revenue + source_path: examples/render/report.md + anchor: fig:revenue + source_span: + source_path: examples/render/report.md + line_start: 14 + line_end: 18 + selector: "#fig-revenue" + content_hash: sha256:figure-revenue + ordinal_hint: 2 + numbering: + scope: document + sequence: figures +cross_references: + - reference_id: xref:revenue-from-results + source_unit_id: section:results + target_unit_id: figure:revenue + label: Figure + requested_style: numbered + fallback_text: Quarterly revenue +toc: + - entry_id: toc:results + unit_id: section:results + title: Results + level: 1 + order: 1 +asset_manifest: + schema_version: markitect.render.reference.v1 + manifest_id: asset-manifest:example-report + source_path: examples/render/report.md + source_digest: sha256:example-report + assets: + - asset_id: asset:revenue-chart + source_uri: images/revenue-chart.png + source_path: examples/render/images/revenue-chart.png + name: revenue-chart.png + media_type: image/png + extension: .png + digest: sha256:revenue-chart + role: figure-image + copy_policy: copy + output_reference: renderer://asset/revenue-chart + provenance: + - source_uri: images/revenue-chart.png + source_path: examples/render/report.md + attachment_id: revenue-chart.png + source_adapter_id: markitect-filter + digest: sha256:revenue-chart +source_maps: + - map_id: source-map:revenue-figure + source: + source_path: examples/render/report.md + line_start: 14 + line_end: 18 + selector: "#fig-revenue" + source_unit_id: figure:revenue + generated_by: markitect.render.reference-manifest + render_unit_id: figure:revenue + artifact_ref: renderer-source + role: figure +metadata: + concrete_renderer_required: false + final_numbering_assigned_by_core: false + asset_copying_performed_by_core: false diff --git a/src/markitect_tool/__init__.py b/src/markitect_tool/__init__.py index 136f893..fa9ae2d 100644 --- a/src/markitect_tool/__init__.py +++ b/src/markitect_tool/__init__.py @@ -263,8 +263,16 @@ from markitect_tool.render import ( RENDER_EXPORT_ADAPTER_ENTRY_POINT_GROUP, RENDER_EXPORT_ADAPTER_KIND, RENDER_EXPORT_SCHEMA_VERSION, + RENDER_ASSET_COPY_POLICIES, + RENDER_REFERENCE_MANIFEST_KIND, + RENDER_REFERENCE_SCHEMA_VERSION, + RENDER_UNIT_KINDS, FakeRenderExportAdapter, RenderArtifact, + RenderAsset, + RenderAssetManifest, + RenderAssetProvenance, + RenderCrossReference, RenderExportAdapter, RenderExportAdapterDescriptor, RenderExportAdapterError, @@ -272,10 +280,20 @@ from markitect_tool.render import ( RenderExportRequest, RenderExportResult, RenderProvenance, + RenderReferenceError, + RenderReferenceManifest, + RenderSourceMap, + RenderSourceSpan, + RenderTocEntry, + RenderUnitReference, default_render_export_adapter_registry, discover_render_export_adapters, render_capability_diagnostics, + render_asset_id, render_export_registry_descriptor, + render_manifest_id, + render_reference_manifest_descriptor, + render_unit_id, render_with_adapter, ) from markitect_tool.schema import ( @@ -592,8 +610,16 @@ __all__ = [ "RENDER_EXPORT_ADAPTER_ENTRY_POINT_GROUP", "RENDER_EXPORT_ADAPTER_KIND", "RENDER_EXPORT_SCHEMA_VERSION", + "RENDER_ASSET_COPY_POLICIES", + "RENDER_REFERENCE_MANIFEST_KIND", + "RENDER_REFERENCE_SCHEMA_VERSION", + "RENDER_UNIT_KINDS", "FakeRenderExportAdapter", "RenderArtifact", + "RenderAsset", + "RenderAssetManifest", + "RenderAssetProvenance", + "RenderCrossReference", "RenderExportAdapter", "RenderExportAdapterDescriptor", "RenderExportAdapterError", @@ -601,10 +627,20 @@ __all__ = [ "RenderExportRequest", "RenderExportResult", "RenderProvenance", + "RenderReferenceError", + "RenderReferenceManifest", + "RenderSourceMap", + "RenderSourceSpan", + "RenderTocEntry", + "RenderUnitReference", "default_render_export_adapter_registry", "discover_render_export_adapters", "render_capability_diagnostics", + "render_asset_id", "render_export_registry_descriptor", + "render_manifest_id", + "render_reference_manifest_descriptor", + "render_unit_id", "render_with_adapter", "MissingTemplateVariable", "TemplateAnalysis", diff --git a/src/markitect_tool/extension/builtins.py b/src/markitect_tool/extension/builtins.py index 8deacfb..aa72ed4 100644 --- a/src/markitect_tool/extension/builtins.py +++ b/src/markitect_tool/extension/builtins.py @@ -8,6 +8,7 @@ from markitect_tool.query import default_query_engine_registry from markitect_tool.render import ( default_render_export_adapter_registry, render_export_registry_descriptor, + render_reference_manifest_descriptor, ) from markitect_tool.source import ( default_source_adapter_registry, @@ -33,6 +34,7 @@ def builtin_extension_registry() -> ExtensionRegistry: _memory_runtime_adapter_descriptor(), _agent_memory_descriptor(), render_export_registry_descriptor(), + render_reference_manifest_descriptor(), source_adapter_registry_descriptor(), ]: registry.register(descriptor) diff --git a/src/markitect_tool/render/__init__.py b/src/markitect_tool/render/__init__.py index adf3d1a..fbd9cd9 100644 --- a/src/markitect_tool/render/__init__.py +++ b/src/markitect_tool/render/__init__.py @@ -4,8 +4,16 @@ from markitect_tool.render.engine import ( RENDER_EXPORT_ADAPTER_ENTRY_POINT_GROUP, RENDER_EXPORT_ADAPTER_KIND, RENDER_EXPORT_SCHEMA_VERSION, + RENDER_ASSET_COPY_POLICIES, + RENDER_REFERENCE_MANIFEST_KIND, + RENDER_REFERENCE_SCHEMA_VERSION, + RENDER_UNIT_KINDS, FakeRenderExportAdapter, RenderArtifact, + RenderAsset, + RenderAssetManifest, + RenderAssetProvenance, + RenderCrossReference, RenderExportAdapter, RenderExportAdapterDescriptor, RenderExportAdapterError, @@ -13,10 +21,20 @@ from markitect_tool.render.engine import ( RenderExportRequest, RenderExportResult, RenderProvenance, + RenderReferenceError, + RenderReferenceManifest, + RenderSourceMap, + RenderSourceSpan, + RenderTocEntry, + RenderUnitReference, default_render_export_adapter_registry, discover_render_export_adapters, render_capability_diagnostics, + render_asset_id, render_export_registry_descriptor, + render_manifest_id, + render_reference_manifest_descriptor, + render_unit_id, render_with_adapter, ) @@ -24,8 +42,16 @@ __all__ = [ "RENDER_EXPORT_ADAPTER_ENTRY_POINT_GROUP", "RENDER_EXPORT_ADAPTER_KIND", "RENDER_EXPORT_SCHEMA_VERSION", + "RENDER_ASSET_COPY_POLICIES", + "RENDER_REFERENCE_MANIFEST_KIND", + "RENDER_REFERENCE_SCHEMA_VERSION", + "RENDER_UNIT_KINDS", "FakeRenderExportAdapter", "RenderArtifact", + "RenderAsset", + "RenderAssetManifest", + "RenderAssetProvenance", + "RenderCrossReference", "RenderExportAdapter", "RenderExportAdapterDescriptor", "RenderExportAdapterError", @@ -33,9 +59,19 @@ __all__ = [ "RenderExportRequest", "RenderExportResult", "RenderProvenance", + "RenderReferenceError", + "RenderReferenceManifest", + "RenderSourceMap", + "RenderSourceSpan", + "RenderTocEntry", + "RenderUnitReference", "default_render_export_adapter_registry", "discover_render_export_adapters", "render_capability_diagnostics", + "render_asset_id", "render_export_registry_descriptor", + "render_manifest_id", + "render_reference_manifest_descriptor", + "render_unit_id", "render_with_adapter", ] diff --git a/src/markitect_tool/render/engine.py b/src/markitect_tool/render/engine.py index 7581f1f..598f8a7 100644 --- a/src/markitect_tool/render/engine.py +++ b/src/markitect_tool/render/engine.py @@ -37,6 +37,26 @@ RENDER_PROFILES = { "pdf", } +RENDER_REFERENCE_SCHEMA_VERSION = "markitect.render.reference.v1" +RENDER_REFERENCE_MANIFEST_KIND = "render-reference-manifest" + +RENDER_UNIT_KINDS = { + "figure", + "table", + "equation", + "code-block", + "section", + "custom", +} + +RENDER_ASSET_COPY_POLICIES = { + "copy", + "embed", + "link", + "preserve", + "skip", +} + _SAFETY_POLICY_FLAGS = { "filesystem_read": "filesystem_read", "filesystem_write": "filesystem_write", @@ -52,6 +72,657 @@ class RenderExportAdapterError(ValueError): """Raised when render/export adapter contracts are invalid.""" +class RenderReferenceError(ValueError): + """Raised when render reference or asset manifest contracts are invalid.""" + + +@dataclass(frozen=True) +class RenderSourceSpan: + """Source location for a render unit, asset, or source-map edge.""" + + source_path: str | None = None + line_start: int | None = None + line_end: int | None = None + selector: str | None = None + source_unit_id: str | None = None + content_hash: str | None = None + metadata: dict[str, Any] = field(default_factory=dict) + + def __post_init__(self) -> None: + if ( + self.line_start is not None + and self.line_end is not None + and self.line_start > self.line_end + ): + raise RenderReferenceError("Render source span line_start cannot be after line_end") + + def to_dict(self) -> dict[str, Any]: + return _drop_empty(asdict(self)) + + @classmethod + def from_dict(cls, data: dict[str, Any] | None) -> "RenderSourceSpan | None": + if not data: + return None + return cls( + source_path=str(data["source_path"]) if data.get("source_path") is not None else None, + line_start=int(data["line_start"]) if data.get("line_start") is not None else None, + line_end=int(data["line_end"]) if data.get("line_end") is not None else None, + selector=str(data["selector"]) if data.get("selector") is not None else None, + source_unit_id=str(data["source_unit_id"]) if data.get("source_unit_id") is not None else None, + content_hash=str(data["content_hash"]) if data.get("content_hash") is not None else None, + metadata=dict(data.get("metadata", {})), + ) + + +@dataclass(frozen=True) +class RenderUnitReference: + """Stable identity for one renderable unit before final renderer numbering.""" + + unit_id: str = "" + kind: str = "custom" + label: str | None = None + title: str | None = None + caption: str | None = None + source_path: str | None = None + anchor: str | None = None + source_span: RenderSourceSpan | None = None + content_hash: str | None = None + ordinal_hint: int | None = None + numbering: dict[str, Any] = field(default_factory=dict) + metadata: dict[str, Any] = field(default_factory=dict) + + def __post_init__(self) -> None: + kind = self.kind.strip() + if kind not in RENDER_UNIT_KINDS: + raise RenderReferenceError(f"Unsupported render unit kind `{self.kind}`") + if self.ordinal_hint is not None and self.ordinal_hint < 0: + raise RenderReferenceError("Render unit ordinal_hint cannot be negative") + if not self.unit_id: + object.__setattr__( + self, + "unit_id", + render_unit_id( + kind, + source_path=self.source_path, + anchor=self.anchor, + content_hash=self.content_hash, + ordinal_hint=self.ordinal_hint, + title=self.title or self.caption, + ), + ) + if self.source_span and self.source_span.source_path and not self.source_path: + object.__setattr__(self, "source_path", self.source_span.source_path) + + def to_dict(self) -> dict[str, Any]: + return _drop_empty( + { + "unit_id": self.unit_id, + "kind": self.kind, + "label": self.label, + "title": self.title, + "caption": self.caption, + "source_path": self.source_path, + "anchor": self.anchor, + "source_span": self.source_span.to_dict() if self.source_span else None, + "content_hash": self.content_hash, + "ordinal_hint": self.ordinal_hint, + "numbering": self.numbering, + "metadata": self.metadata, + } + ) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "RenderUnitReference": + return cls( + unit_id=str(data.get("unit_id", "")), + kind=str(data.get("kind", "custom")), + label=str(data["label"]) if data.get("label") is not None else None, + title=str(data["title"]) if data.get("title") is not None else None, + caption=str(data["caption"]) if data.get("caption") is not None else None, + source_path=str(data["source_path"]) if data.get("source_path") is not None else None, + anchor=str(data["anchor"]) if data.get("anchor") is not None else None, + source_span=RenderSourceSpan.from_dict(data.get("source_span")), + content_hash=str(data["content_hash"]) if data.get("content_hash") is not None else None, + ordinal_hint=int(data["ordinal_hint"]) if data.get("ordinal_hint") is not None else None, + numbering=dict(data.get("numbering", {})), + metadata=dict(data.get("metadata", {})), + ) + + @classmethod + def from_content_unit( + cls, + unit: Any, + *, + kind: str | None = None, + label: str | None = None, + ) -> "RenderUnitReference": + data = _mapping_from(unit) + span_data = data.get("span") if isinstance(data.get("span"), dict) else {} + source_path = str(data["source_path"]) if data.get("source_path") is not None else None + return cls( + unit_id=str(data.get("unit_id", "")), + kind=kind or _render_kind_from_content_unit(str(data.get("kind", "custom"))), + label=label, + title=str(data["name"]) if data.get("name") is not None else None, + source_path=source_path, + source_span=RenderSourceSpan( + source_path=source_path, + line_start=int(span_data["line_start"]) if span_data.get("line_start") is not None else None, + line_end=int(span_data["line_end"]) if span_data.get("line_end") is not None else None, + source_unit_id=str(data.get("unit_id", "")) or None, + content_hash=str(data["content_hash"]) if data.get("content_hash") is not None else None, + ) + if source_path or span_data + else None, + content_hash=str(data["content_hash"]) if data.get("content_hash") is not None else None, + metadata={"content_unit_kind": data.get("kind")} if data.get("kind") else {}, + ) + + +@dataclass(frozen=True) +class RenderCrossReference: + """Requested cross-reference link before renderer-specific numbering exists.""" + + target_unit_id: str + reference_id: str = "" + source_unit_id: str | None = None + source_span: RenderSourceSpan | None = None + label: str | None = None + requested_style: str = "numbered" + fallback_text: str | None = None + metadata: dict[str, Any] = field(default_factory=dict) + + def __post_init__(self) -> None: + if not self.target_unit_id.strip(): + raise RenderReferenceError("Cross-reference target_unit_id cannot be empty") + if not self.reference_id: + object.__setattr__( + self, + "reference_id", + _stable_id( + "xref", + self.source_unit_id, + self.target_unit_id, + self.source_span.to_dict() if self.source_span else None, + self.requested_style, + ), + ) + + def to_dict(self) -> dict[str, Any]: + return _drop_empty( + { + "reference_id": self.reference_id, + "source_unit_id": self.source_unit_id, + "target_unit_id": self.target_unit_id, + "source_span": self.source_span.to_dict() if self.source_span else None, + "label": self.label, + "requested_style": self.requested_style, + "fallback_text": self.fallback_text, + "metadata": self.metadata, + } + ) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "RenderCrossReference": + return cls( + target_unit_id=str(data["target_unit_id"]), + reference_id=str(data.get("reference_id", "")), + source_unit_id=str(data["source_unit_id"]) if data.get("source_unit_id") is not None else None, + source_span=RenderSourceSpan.from_dict(data.get("source_span")), + label=str(data["label"]) if data.get("label") is not None else None, + requested_style=str(data.get("requested_style", "numbered")), + fallback_text=str(data["fallback_text"]) if data.get("fallback_text") is not None else None, + metadata=dict(data.get("metadata", {})), + ) + + +@dataclass(frozen=True) +class RenderTocEntry: + """Table-of-contents planning entry before final renderer links exist.""" + + unit_id: str + title: str + level: int + entry_id: str = "" + parent_id: str | None = None + order: int | None = None + source_span: RenderSourceSpan | None = None + metadata: dict[str, Any] = field(default_factory=dict) + + def __post_init__(self) -> None: + if not self.unit_id.strip(): + raise RenderReferenceError("TOC unit_id cannot be empty") + if not self.title.strip(): + raise RenderReferenceError("TOC title cannot be empty") + if self.level < 1: + raise RenderReferenceError("TOC level must be greater than zero") + if self.order is not None and self.order < 0: + raise RenderReferenceError("TOC order cannot be negative") + if not self.entry_id: + object.__setattr__(self, "entry_id", _stable_id("toc", self.unit_id, self.title, self.level)) + + def to_dict(self) -> dict[str, Any]: + return _drop_empty( + { + "entry_id": self.entry_id, + "unit_id": self.unit_id, + "title": self.title, + "level": self.level, + "parent_id": self.parent_id, + "order": self.order, + "source_span": self.source_span.to_dict() if self.source_span else None, + "metadata": self.metadata, + } + ) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "RenderTocEntry": + return cls( + unit_id=str(data["unit_id"]), + title=str(data["title"]), + level=int(data["level"]), + entry_id=str(data.get("entry_id", "")), + parent_id=str(data["parent_id"]) if data.get("parent_id") is not None else None, + order=int(data["order"]) if data.get("order") is not None else None, + source_span=RenderSourceSpan.from_dict(data.get("source_span")), + metadata=dict(data.get("metadata", {})), + ) + + +@dataclass(frozen=True) +class RenderAssetProvenance: + """Provenance for an asset manifest entry.""" + + source_uri: str | None = None + source_path: str | None = None + source_href: str | None = None + package_path: str | None = None + attachment_id: str | None = None + source_adapter_id: str | None = None + source_span: RenderSourceSpan | None = None + digest: str | None = None + metadata: dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + return _drop_empty( + { + "source_uri": self.source_uri, + "source_path": self.source_path, + "source_href": self.source_href, + "package_path": self.package_path, + "attachment_id": self.attachment_id, + "source_adapter_id": self.source_adapter_id, + "source_span": self.source_span.to_dict() if self.source_span else None, + "digest": self.digest, + "metadata": self.metadata, + } + ) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "RenderAssetProvenance": + return cls( + source_uri=str(data["source_uri"]) if data.get("source_uri") is not None else None, + source_path=str(data["source_path"]) if data.get("source_path") is not None else None, + source_href=str(data["source_href"]) if data.get("source_href") is not None else None, + package_path=str(data["package_path"]) if data.get("package_path") is not None else None, + attachment_id=str(data["attachment_id"]) if data.get("attachment_id") is not None else None, + source_adapter_id=str(data["source_adapter_id"]) if data.get("source_adapter_id") is not None else None, + source_span=RenderSourceSpan.from_dict(data.get("source_span")), + digest=str(data["digest"]) if data.get("digest") is not None else None, + metadata=dict(data.get("metadata", {})), + ) + + @classmethod + def from_source_provenance( + cls, + provenance: Any, + *, + attachment_id: str | None = None, + source_adapter_id: str | None = None, + ) -> "RenderAssetProvenance": + data = _mapping_from(provenance) + return cls( + source_uri=str(data["source_uri"]) if data.get("source_uri") is not None else None, + source_path=str(data["source_path"]) if data.get("source_path") is not None else None, + source_href=str(data["source_href"]) if data.get("source_href") is not None else None, + package_path=str(data["package_path"]) if data.get("package_path") is not None else None, + attachment_id=attachment_id, + source_adapter_id=source_adapter_id, + source_span=RenderSourceSpan( + source_path=str(data["source_path"]) if data.get("source_path") is not None else None, + selector=str(data["anchor"]) if data.get("anchor") is not None else None, + metadata={ + key: str(data[key]) + for key in ("page", "section", "start_offset", "end_offset") + if data.get(key) is not None + }, + ), + digest=str(data["digest"]) if data.get("digest") is not None else None, + metadata=dict(data.get("metadata", {})), + ) + + +@dataclass(frozen=True) +class RenderAsset: + """Static asset descriptor with declared renderer copy behavior.""" + + asset_id: str = "" + source_uri: str | None = None + source_path: str | None = None + name: str | None = None + media_type: str | None = None + extension: str | None = None + digest: str | None = None + role: str = "asset" + copy_policy: str = "copy" + output_reference: str | None = None + provenance: list[RenderAssetProvenance] = field(default_factory=list) + metadata: dict[str, Any] = field(default_factory=dict) + + def __post_init__(self) -> None: + if not (self.source_uri or self.source_path): + raise RenderReferenceError("Render asset needs source_uri or source_path") + if not self.role.strip(): + raise RenderReferenceError("Render asset role cannot be empty") + if self.copy_policy not in RENDER_ASSET_COPY_POLICIES: + raise RenderReferenceError(f"Unsupported render asset copy_policy `{self.copy_policy}`") + if not self.asset_id: + object.__setattr__( + self, + "asset_id", + render_asset_id( + self.source_uri or self.source_path or "", + digest=self.digest, + role=self.role, + output_reference=self.output_reference, + ), + ) + + def to_dict(self) -> dict[str, Any]: + return _drop_empty( + { + "asset_id": self.asset_id, + "source_uri": self.source_uri, + "source_path": self.source_path, + "name": self.name, + "media_type": self.media_type, + "extension": self.extension, + "digest": self.digest, + "role": self.role, + "copy_policy": self.copy_policy, + "output_reference": self.output_reference, + "provenance": [event.to_dict() for event in self.provenance], + "metadata": self.metadata, + } + ) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "RenderAsset": + return cls( + asset_id=str(data.get("asset_id", "")), + source_uri=str(data["source_uri"]) if data.get("source_uri") is not None else None, + source_path=str(data["source_path"]) if data.get("source_path") is not None else None, + name=str(data["name"]) if data.get("name") is not None else None, + media_type=str(data["media_type"]) if data.get("media_type") is not None else None, + extension=str(data["extension"]) if data.get("extension") is not None else None, + digest=str(data["digest"]) if data.get("digest") is not None else None, + role=str(data.get("role", "asset")), + copy_policy=str(data.get("copy_policy", "copy")), + output_reference=str(data["output_reference"]) if data.get("output_reference") is not None else None, + provenance=[RenderAssetProvenance.from_dict(event) for event in data.get("provenance", [])], + metadata=dict(data.get("metadata", {})), + ) + + @classmethod + def from_source_asset( + cls, + asset: Any, + *, + role: str = "attachment", + copy_policy: str = "copy", + output_reference: str | None = None, + provenance: list[RenderAssetProvenance] | None = None, + ) -> "RenderAsset": + data = _mapping_from(asset) + source_uri = str(data.get("uri") or data.get("source_uri") or "") + source_path = str(data["path"]) if data.get("path") is not None else None + digest = str(data["digest"]) if data.get("digest") is not None else None + resolved_provenance = provenance + if resolved_provenance is None: + resolved_provenance = [ + RenderAssetProvenance( + source_uri=source_uri or None, + source_path=source_path, + attachment_id=str(data["name"]) if data.get("name") is not None else None, + digest=digest, + metadata={"source_asset": True}, + ) + ] + return cls( + source_uri=source_uri or None, + source_path=source_path, + name=str(data["name"]) if data.get("name") is not None else None, + media_type=str(data["media_type"]) if data.get("media_type") is not None else None, + extension=str(data["extension"]) if data.get("extension") is not None else None, + digest=digest, + role=role, + copy_policy=copy_policy, + output_reference=output_reference, + provenance=resolved_provenance, + metadata=dict(data.get("metadata", {})), + ) + + +@dataclass(frozen=True) +class RenderAssetManifest: + """Deterministic list of static assets requested by renderer source.""" + + manifest_id: str = "" + assets: list[RenderAsset] = field(default_factory=list) + source_path: str | None = None + source_digest: str | None = None + schema_version: str = RENDER_REFERENCE_SCHEMA_VERSION + metadata: dict[str, Any] = field(default_factory=dict) + + def __post_init__(self) -> None: + if self.schema_version != RENDER_REFERENCE_SCHEMA_VERSION: + raise RenderReferenceError(f"Expected schema_version `{RENDER_REFERENCE_SCHEMA_VERSION}`") + assets = sorted(self.assets, key=lambda asset: asset.asset_id) + _check_duplicates([asset.asset_id for asset in assets], "asset") + object.__setattr__(self, "assets", assets) + if not self.manifest_id: + object.__setattr__( + self, + "manifest_id", + _stable_id( + "asset-manifest", + self.source_path, + self.source_digest, + [asset.asset_id for asset in assets], + ), + ) + + def to_dict(self) -> dict[str, Any]: + return _drop_empty( + { + "schema_version": self.schema_version, + "manifest_id": self.manifest_id, + "source_path": self.source_path, + "source_digest": self.source_digest, + "assets": [asset.to_dict() for asset in self.assets], + "metadata": self.metadata, + } + ) + + @classmethod + def from_dict(cls, data: dict[str, Any] | None) -> "RenderAssetManifest": + if not data: + return cls() + return cls( + manifest_id=str(data.get("manifest_id", "")), + source_path=str(data["source_path"]) if data.get("source_path") is not None else None, + source_digest=str(data["source_digest"]) if data.get("source_digest") is not None else None, + assets=[RenderAsset.from_dict(asset) for asset in data.get("assets", [])], + schema_version=str(data.get("schema_version", RENDER_REFERENCE_SCHEMA_VERSION)), + metadata=dict(data.get("metadata", {})), + ) + + +@dataclass(frozen=True) +class RenderSourceMap: + """Trace from Markitect source/generated output to renderer units or artifacts.""" + + map_id: str = "" + source: RenderSourceSpan | None = None + source_unit_id: str | None = None + generated_by: str | None = None + function_run_id: str | None = None + render_unit_id: str | None = None + artifact_id: str | None = None + artifact_ref: str | None = None + role: str | None = None + metadata: dict[str, Any] = field(default_factory=dict) + + def __post_init__(self) -> None: + if not (self.render_unit_id or self.artifact_id or self.artifact_ref): + raise RenderReferenceError( + "Render source map needs render_unit_id, artifact_id, or artifact_ref" + ) + if not self.map_id: + object.__setattr__( + self, + "map_id", + _stable_id( + "source-map", + self.source.to_dict() if self.source else None, + self.source_unit_id, + self.generated_by, + self.function_run_id, + self.render_unit_id, + self.artifact_id, + self.artifact_ref, + self.role, + ), + ) + + def to_dict(self) -> dict[str, Any]: + return _drop_empty( + { + "map_id": self.map_id, + "source": self.source.to_dict() if self.source else None, + "source_unit_id": self.source_unit_id, + "generated_by": self.generated_by, + "function_run_id": self.function_run_id, + "render_unit_id": self.render_unit_id, + "artifact_id": self.artifact_id, + "artifact_ref": self.artifact_ref, + "role": self.role, + "metadata": self.metadata, + } + ) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "RenderSourceMap": + return cls( + map_id=str(data.get("map_id", "")), + source=RenderSourceSpan.from_dict(data.get("source")), + source_unit_id=str(data["source_unit_id"]) if data.get("source_unit_id") is not None else None, + generated_by=str(data["generated_by"]) if data.get("generated_by") is not None else None, + function_run_id=str(data["function_run_id"]) if data.get("function_run_id") is not None else None, + render_unit_id=str(data["render_unit_id"]) if data.get("render_unit_id") is not None else None, + artifact_id=str(data["artifact_id"]) if data.get("artifact_id") is not None else None, + artifact_ref=str(data["artifact_ref"]) if data.get("artifact_ref") is not None else None, + role=str(data["role"]) if data.get("role") is not None else None, + metadata=dict(data.get("metadata", {})), + ) + + +@dataclass(frozen=True) +class RenderReferenceManifest: + """Passive render structure, cross-reference, asset, and provenance manifest.""" + + manifest_id: str = "" + units: list[RenderUnitReference] = field(default_factory=list) + cross_references: list[RenderCrossReference] = field(default_factory=list) + toc: list[RenderTocEntry] = field(default_factory=list) + asset_manifest: RenderAssetManifest = field(default_factory=RenderAssetManifest) + source_maps: list[RenderSourceMap] = field(default_factory=list) + source_path: str | None = None + source_digest: str | None = None + schema_version: str = RENDER_REFERENCE_SCHEMA_VERSION + metadata: dict[str, Any] = field(default_factory=dict) + + def __post_init__(self) -> None: + if self.schema_version != RENDER_REFERENCE_SCHEMA_VERSION: + raise RenderReferenceError(f"Expected schema_version `{RENDER_REFERENCE_SCHEMA_VERSION}`") + _check_duplicates([unit.unit_id for unit in self.units], "render unit") + _check_duplicates([entry.entry_id for entry in self.toc], "TOC entry") + _check_duplicates([event.map_id for event in self.source_maps], "source map") + unit_ids = {unit.unit_id for unit in self.units} + if unit_ids: + for cross_reference in self.cross_references: + if cross_reference.target_unit_id not in unit_ids: + raise RenderReferenceError( + f"Cross-reference `{cross_reference.reference_id}` targets unknown unit " + f"`{cross_reference.target_unit_id}`" + ) + for entry in self.toc: + if entry.unit_id not in unit_ids: + raise RenderReferenceError( + f"TOC entry `{entry.entry_id}` targets unknown unit `{entry.unit_id}`" + ) + if not self.manifest_id: + object.__setattr__( + self, + "manifest_id", + _stable_id( + "render-manifest", + self.source_path, + self.source_digest, + [unit.unit_id for unit in self.units], + [reference.reference_id for reference in self.cross_references], + [entry.entry_id for entry in self.toc], + self.asset_manifest.manifest_id, + [event.map_id for event in self.source_maps], + ), + ) + + def to_dict(self) -> dict[str, Any]: + return _drop_empty( + { + "schema_version": self.schema_version, + "kind": RENDER_REFERENCE_MANIFEST_KIND, + "manifest_id": self.manifest_id, + "source_path": self.source_path, + "source_digest": self.source_digest, + "units": [unit.to_dict() for unit in self.units], + "cross_references": [reference.to_dict() for reference in self.cross_references], + "toc": [entry.to_dict() for entry in self.toc], + "asset_manifest": self.asset_manifest.to_dict(), + "source_maps": [event.to_dict() for event in self.source_maps], + "metadata": self.metadata, + } + ) + + @classmethod + def from_dict(cls, data: dict[str, Any]) -> "RenderReferenceManifest": + return cls( + manifest_id=str(data.get("manifest_id", "")), + units=[RenderUnitReference.from_dict(unit) for unit in data.get("units", [])], + cross_references=[ + RenderCrossReference.from_dict(reference) + for reference in data.get("cross_references", []) + ], + toc=[RenderTocEntry.from_dict(entry) for entry in data.get("toc", [])], + asset_manifest=RenderAssetManifest.from_dict(data.get("asset_manifest")), + source_maps=[RenderSourceMap.from_dict(event) for event in data.get("source_maps", [])], + source_path=str(data["source_path"]) if data.get("source_path") is not None else None, + source_digest=str(data["source_digest"]) if data.get("source_digest") is not None else None, + schema_version=str(data.get("schema_version", RENDER_REFERENCE_SCHEMA_VERSION)), + metadata=dict(data.get("metadata", {})), + ) + + @dataclass(frozen=True) class RenderArtifact: """Metadata for a rendered or exported artifact.""" @@ -115,6 +786,7 @@ class RenderExportRequest: source_path: str | None = None options: dict[str, Any] = field(default_factory=dict) policy: dict[str, Any] = field(default_factory=dict) + render_manifest: RenderReferenceManifest | dict[str, Any] | None = None schema_version: str = RENDER_EXPORT_SCHEMA_VERSION metadata: dict[str, Any] = field(default_factory=dict) @@ -128,6 +800,7 @@ class RenderExportRequest: "source_path": self.source_path, "options": self.options, "policy": self.policy, + "render_manifest": _render_manifest_dict(self.render_manifest), "metadata": self.metadata, } ) @@ -384,18 +1057,20 @@ class FakeRenderExportAdapter: operation=request.operation, profile=request.profile, provenance=provenance, - metadata={ - "profile": request.profile, - "supported_operations": self.descriptor.operations, - "external_renderer_invoked": False, - }, + metadata=_fake_result_metadata( + request, + { + "profile": request.profile, + "supported_operations": self.descriptor.operations, + }, + ), ) if request.operation == "export-source": artifact = RenderArtifact.from_content( exported, role="renderer-source", media_type="text/markdown", - metadata={"profile": request.profile}, + metadata=_fake_artifact_metadata(request, {"profile": request.profile}), ) provenance = [ _event_with_artifact(event, artifact.artifact_id) @@ -408,14 +1083,14 @@ class FakeRenderExportAdapter: artifacts=[artifact], exported_source=exported, provenance=provenance, - metadata={"external_renderer_invoked": False}, + metadata=_fake_result_metadata(request), ) rendered = f"FAKE RENDER ARTIFACT\nprofile: {request.profile}\n\n{exported}" artifact = RenderArtifact.from_content( rendered, role="rendered-artifact", media_type=_fake_media_type(request.profile), - metadata={"profile": request.profile, "fake_renderer": True}, + metadata=_fake_artifact_metadata(request, {"profile": request.profile, "fake_renderer": True}), ) provenance = [_event_with_artifact(event, artifact.artifact_id) for event in provenance] return RenderExportResult( @@ -425,7 +1100,7 @@ class FakeRenderExportAdapter: artifacts=[artifact], exported_source=exported, provenance=provenance, - metadata={"external_renderer_invoked": False}, + metadata=_fake_result_metadata(request), ) @@ -527,6 +1202,45 @@ def render_export_registry_descriptor() -> ExtensionDescriptor: ) +def render_reference_manifest_descriptor() -> ExtensionDescriptor: + """Descriptor for passive render reference and asset manifest contracts.""" + + return ExtensionDescriptor( + id="render.reference-manifest", + kind="render-reference-contract", + summary="Passive render unit, cross-reference, TOC, asset manifest, and source-map contracts.", + capabilities=[ + ProcessingCapability(id="render_references", kind="model"), + ProcessingCapability(id="asset_manifest", kind="model"), + ProcessingCapability(id="source_maps", kind="model"), + ProcessingCapability(id="provenance", kind="emit"), + ], + safety={ + "filesystem_read": False, + "filesystem_write": False, + "external_process": False, + "network": False, + "asset_copying": False, + "final_numbering": False, + }, + input_contract="Markdown units | normalized source attachments | renderer source planning metadata", + output_contract="RenderReferenceManifest", + diagnostics_namespace="render.reference", + provenance_prefix="render.reference_manifest", + docs=["docs/render-reference-asset-manifest.md"], + examples=["examples/render/render-reference-manifest.yaml"], + metadata={ + "schema_version": RENDER_REFERENCE_SCHEMA_VERSION, + "kind": RENDER_REFERENCE_MANIFEST_KIND, + "unit_kinds": sorted(RENDER_UNIT_KINDS), + "copy_policies": sorted(RENDER_ASSET_COPY_POLICIES), + "core_performs_asset_copying": False, + "core_assigns_final_numbering": False, + "markitect_filter_boundary": "read-side source asset and attachment metadata only", + }, + ) + + def fake_render_export_adapter_descriptor() -> RenderExportAdapterDescriptor: """Descriptor for the built-in fake render/export adapter.""" @@ -603,6 +1317,45 @@ def _render_export_capabilities() -> list[ProcessingCapability]: ] +def render_unit_id( + kind: str, + *, + source_path: str | None = None, + anchor: str | None = None, + content_hash: str | None = None, + ordinal_hint: int | None = None, + title: str | None = None, +) -> str: + """Return a deterministic render unit id for passive reference manifests.""" + + if kind not in RENDER_UNIT_KINDS: + raise RenderReferenceError(f"Unsupported render unit kind `{kind}`") + return _stable_id("render-unit", kind, source_path, anchor, content_hash, ordinal_hint, title) + + +def render_asset_id( + source: str, + *, + digest: str | None = None, + role: str | None = None, + output_reference: str | None = None, +) -> str: + """Return a deterministic asset id without copying or reading the asset.""" + + if not source.strip(): + raise RenderReferenceError("Render asset id source cannot be empty") + return _stable_id("asset", source, digest, role, output_reference) + + +def render_manifest_id(manifest: RenderReferenceManifest | dict[str, Any]) -> str: + """Return the deterministic id for a render reference manifest-like object.""" + + data = _render_manifest_dict(manifest) + if data and data.get("manifest_id"): + return str(data["manifest_id"]) + return _stable_id("render-manifest", data) + + def _event_with_artifact(event: RenderProvenance, artifact_id: str) -> RenderProvenance: return RenderProvenance( operation=event.operation, @@ -615,6 +1368,57 @@ def _event_with_artifact(event: RenderProvenance, artifact_id: str) -> RenderPro ) +def _fake_result_metadata( + request: RenderExportRequest, + base: dict[str, Any] | None = None, +) -> dict[str, Any]: + metadata = {"external_renderer_invoked": False} + metadata.update(base or {}) + metadata.update(_manifest_summary_metadata(request.render_manifest)) + return metadata + + +def _fake_artifact_metadata( + request: RenderExportRequest, + base: dict[str, Any] | None = None, +) -> dict[str, Any]: + metadata = dict(base or {}) + metadata.update(_manifest_summary_metadata(request.render_manifest)) + return metadata + + +def _manifest_summary_metadata(manifest: RenderReferenceManifest | dict[str, Any] | None) -> dict[str, Any]: + data = _render_manifest_dict(manifest) + if not data: + return {} + asset_manifest = data.get("asset_manifest", {}) + assets = asset_manifest.get("assets", []) if isinstance(asset_manifest, dict) else [] + return _drop_empty( + { + "render_reference_manifest_id": data.get("manifest_id"), + "render_reference_schema_version": data.get("schema_version"), + "render_units": len(data.get("units", [])), + "render_cross_references": len(data.get("cross_references", [])), + "render_toc_entries": len(data.get("toc", [])), + "render_source_maps": len(data.get("source_maps", [])), + "asset_manifest_id": asset_manifest.get("manifest_id") + if isinstance(asset_manifest, dict) + else None, + "render_assets": len(assets), + } + ) + + +def _render_manifest_dict(manifest: RenderReferenceManifest | dict[str, Any] | None) -> dict[str, Any]: + if manifest is None: + return {} + if isinstance(manifest, RenderReferenceManifest): + return manifest.to_dict() + if isinstance(manifest, dict): + return dict(manifest) + raise RenderReferenceError("render_manifest must be a RenderReferenceManifest or mapping") + + def _fake_exported_source(request: RenderExportRequest) -> str: return ( f"\n\n" @@ -630,6 +1434,44 @@ def _fake_media_type(profile: str) -> str: return "text/plain" +def _stable_id(prefix: str, *parts: Any) -> str: + payload = json.dumps(parts, sort_keys=True, separators=(",", ":"), default=str) + digest = hashlib.sha256(payload.encode("utf-8")).hexdigest()[:16] + return f"{prefix}:{digest}" + + +def _check_duplicates(values: list[str], label: str) -> None: + seen: set[str] = set() + duplicates: set[str] = set() + for value in values: + if value in seen: + duplicates.add(value) + seen.add(value) + if duplicates: + raise RenderReferenceError( + f"Duplicate {label} id(s): " + ", ".join(sorted(duplicates)) + ) + + +def _mapping_from(value: Any) -> dict[str, Any]: + if isinstance(value, dict): + return dict(value) + to_dict = getattr(value, "to_dict", None) + if callable(to_dict): + return dict(to_dict()) + raise RenderReferenceError("Expected a mapping or object with to_dict()") + + +def _render_kind_from_content_unit(kind: str) -> str: + if kind == "heading": + return "section" + if kind == "code": + return "code-block" + if kind in RENDER_UNIT_KINDS: + return kind + return "custom" + + def _digest_text(value: str) -> str: return "sha256:" + hashlib.sha256(value.encode("utf-8")).hexdigest() diff --git a/tests/test_builtin_extension_catalog.py b/tests/test_builtin_extension_catalog.py index 2ce282f..5829be1 100644 --- a/tests/test_builtin_extension_catalog.py +++ b/tests/test_builtin_extension_catalog.py @@ -25,6 +25,7 @@ def test_builtin_extension_registry_lists_query_processors_and_backend(): assert "memory.runtime-adapter-boundary" in ids assert "memory.context-package" in ids assert "render.export-registry" in ids + assert "render.reference-manifest" in ids assert "render.fake" in ids assert "source.adapter-registry" in ids @@ -190,11 +191,16 @@ def test_builtin_render_export_descriptors_expose_contract_boundary(): registry = builtin_extension_registry() registry_descriptor = registry.get("render.export-registry") + reference_manifest = registry.get("render.reference-manifest") fake = registry.get("render.fake") assert registry_descriptor.kind == "render-export-registry" assert registry_descriptor.safety["external_process"] is False assert registry_descriptor.metadata["concrete_renderer_execution_required"] is False + assert reference_manifest.kind == "render-reference-contract" + assert reference_manifest.safety["asset_copying"] is False + assert reference_manifest.safety["final_numbering"] is False + assert reference_manifest.metadata["core_performs_asset_copying"] is False assert fake.kind == "render-export" assert fake.safety["external_process"] is False assert fake.safety["filesystem_write"] is False diff --git a/tests/test_render_reference_manifest.py b/tests/test_render_reference_manifest.py new file mode 100644 index 0000000..714a709 --- /dev/null +++ b/tests/test_render_reference_manifest.py @@ -0,0 +1,236 @@ +import yaml + +from markitect_tool.render import ( + RENDER_REFERENCE_MANIFEST_KIND, + RENDER_REFERENCE_SCHEMA_VERSION, + RenderAsset, + RenderAssetManifest, + RenderCrossReference, + RenderExportRequest, + RenderReferenceError, + RenderReferenceManifest, + RenderSourceMap, + RenderSourceSpan, + RenderTocEntry, + RenderUnitReference, + render_asset_id, + render_manifest_id, + render_unit_id, + render_with_adapter, +) +from markitect_tool.source import SourceAsset + + +def test_render_unit_reference_serializes_with_stable_id_and_no_final_numbering(): + span = RenderSourceSpan( + source_path="docs/report.md", + line_start=12, + line_end=16, + selector="#fig-revenue", + ) + + first = RenderUnitReference( + kind="figure", + label="Figure", + caption="Quarterly revenue", + source_path="docs/report.md", + anchor="fig:revenue", + source_span=span, + content_hash="sha256:figure-content", + ordinal_hint=1, + numbering={"scope": "document", "sequence": "figures"}, + ) + second = RenderUnitReference( + kind="figure", + label="Figure", + caption="Quarterly revenue", + source_path="docs/report.md", + anchor="fig:revenue", + source_span=span, + content_hash="sha256:figure-content", + ordinal_hint=1, + numbering={"scope": "document", "sequence": "figures"}, + ) + + assert first.unit_id == second.unit_id + assert first.unit_id == render_unit_id( + "figure", + source_path="docs/report.md", + anchor="fig:revenue", + content_hash="sha256:figure-content", + ordinal_hint=1, + title="Quarterly revenue", + ) + assert "final_number" not in first.to_dict() + assert RenderUnitReference.from_dict(first.to_dict()).to_dict() == first.to_dict() + + +def test_render_manifest_models_cross_references_and_toc_before_layout(): + section = RenderUnitReference( + kind="section", + title="Results", + anchor="results", + source_path="docs/report.md", + ordinal_hint=1, + ) + table = RenderUnitReference( + kind="table", + label="Table", + caption="Quarterly totals", + anchor="tbl:totals", + source_path="docs/report.md", + ordinal_hint=2, + numbering={"scope": "section", "sequence": "tables"}, + ) + cross_reference = RenderCrossReference( + source_unit_id=section.unit_id, + target_unit_id=table.unit_id, + label="Table", + requested_style="numbered", + fallback_text="Quarterly totals", + ) + toc_entry = RenderTocEntry( + unit_id=section.unit_id, + title="Results", + level=1, + order=1, + ) + + manifest = RenderReferenceManifest( + source_path="docs/report.md", + source_digest="sha256:report", + units=[section, table], + cross_references=[cross_reference], + toc=[toc_entry], + ) + data = manifest.to_dict() + + assert data["schema_version"] == RENDER_REFERENCE_SCHEMA_VERSION + assert data["kind"] == RENDER_REFERENCE_MANIFEST_KIND + assert data["cross_references"][0]["target_unit_id"] == table.unit_id + assert "final_number" not in data["cross_references"][0] + assert "href" not in data["toc"][0] + assert RenderReferenceManifest.from_dict(data).to_dict() == data + assert render_manifest_id(manifest) == manifest.manifest_id + + +def test_render_asset_manifest_preserves_source_adapter_attachment_metadata(): + source_asset = SourceAsset( + uri="attachments/chart.png", + path="attachments/chart.png", + name="chart.png", + media_type="image/png", + extension=".png", + digest="sha256:chart", + metadata={"source_adapter": "markitect-filter"}, + ) + + render_asset = RenderAsset.from_source_asset( + source_asset, + role="figure-image", + copy_policy="copy", + output_reference="renderer://asset/chart", + ) + skipped_asset = RenderAsset( + source_uri="https://example.test/logo.svg", + media_type="image/svg+xml", + extension=".svg", + digest="sha256:logo", + role="brand-logo", + copy_policy="link", + output_reference="renderer://asset/logo", + ) + manifest = RenderAssetManifest( + assets=[skipped_asset, render_asset], + source_path="docs/report.md", + source_digest="sha256:report", + ) + data = manifest.to_dict() + + assert [asset["asset_id"] for asset in data["assets"]] == sorted( + [render_asset.asset_id, skipped_asset.asset_id] + ) + assert render_asset.asset_id == render_asset_id( + "attachments/chart.png", + digest="sha256:chart", + role="figure-image", + output_reference="renderer://asset/chart", + ) + chart_entry = next(asset for asset in data["assets"] if asset["asset_id"] == render_asset.asset_id) + assert chart_entry["provenance"][0]["digest"].startswith("sha256:") + assert chart_entry["copy_policy"] == "copy" + assert RenderAssetManifest.from_dict(data).to_dict() == data + + +def test_render_source_maps_are_echoed_by_fake_renderer_metadata(): + section = RenderUnitReference( + kind="section", + title="Demo", + anchor="demo", + source_path="docs/demo.md", + ) + source_map = RenderSourceMap( + source=RenderSourceSpan(source_path="docs/demo.md", line_start=1, line_end=3), + source_unit_id=section.unit_id, + generated_by="document.function.echo", + function_run_id="run:echo", + render_unit_id=section.unit_id, + artifact_ref="renderer-source", + ) + asset = RenderAsset( + source_uri="images/demo.png", + source_path="images/demo.png", + media_type="image/png", + extension=".png", + digest="sha256:demo-image", + role="figure-image", + copy_policy="copy", + output_reference="renderer://asset/demo-image", + ) + manifest = RenderReferenceManifest( + units=[section], + source_maps=[source_map], + asset_manifest=RenderAssetManifest(assets=[asset]), + source_path="docs/demo.md", + ) + + result = render_with_adapter( + RenderExportRequest( + source="# Demo\n\n![Demo](images/demo.png)", + operation="render-artifact", + profile="docs", + source_path="docs/demo.md", + render_manifest=manifest, + ) + ) + + assert result.valid + assert result.metadata["external_renderer_invoked"] is False + assert result.metadata["render_reference_manifest_id"] == manifest.manifest_id + assert result.metadata["render_source_maps"] == 1 + assert result.artifacts[0].metadata["asset_manifest_id"] == manifest.asset_manifest.manifest_id + + +def test_render_reference_manifest_rejects_unknown_cross_reference_targets(): + unit = RenderUnitReference(kind="section", title="Known", anchor="known") + + try: + RenderReferenceManifest( + units=[unit], + cross_references=[RenderCrossReference(target_unit_id="missing")], + ) + except RenderReferenceError as exc: + assert "targets unknown unit" in str(exc) + else: + raise AssertionError("expected unknown cross-reference target to fail") + + +def test_render_reference_manifest_example_loads_and_roundtrips(): + with open("examples/render/render-reference-manifest.yaml", encoding="utf-8") as handle: + data = yaml.safe_load(handle) + + manifest = RenderReferenceManifest.from_dict(data) + + assert manifest.schema_version == RENDER_REFERENCE_SCHEMA_VERSION + assert manifest.asset_manifest.assets + assert manifest.to_dict()["kind"] == RENDER_REFERENCE_MANIFEST_KIND diff --git a/workplans/MKTT-WP-0021-render-reference-asset-manifest.md b/workplans/MKTT-WP-0021-render-reference-asset-manifest.md index 82544da..b7a58fa 100644 --- a/workplans/MKTT-WP-0021-render-reference-asset-manifest.md +++ b/workplans/MKTT-WP-0021-render-reference-asset-manifest.md @@ -3,10 +3,10 @@ id: MKTT-WP-0021 type: workplan title: "Render Reference And Asset Manifest Contract" domain: markitect -status: todo +status: done owner: markitect-tool topic_slug: markitect -planning_priority: P2 +planning_priority: complete planning_order: 155 depends_on_workplans: - MKTT-WP-0010 @@ -55,11 +55,31 @@ Renderer packages own: `markitect-filter` owns only read-side source asset and attachment metadata needed by normalized Markdown inputs. +## Implementation Summary + +Completed in `markitect-tool` as a passive render reference and asset manifest +contract: + +- `RenderUnitReference`, `RenderCrossReference`, and `RenderTocEntry` model + renderable units, requested cross-references, and TOC planning without final + numbering. +- `RenderAsset`, `RenderAssetManifest`, and `RenderAssetProvenance` model + static assets, copy-policy declarations, output placeholders, and source + adapter attachment provenance without asset copying. +- `RenderSourceMap` and `RenderReferenceManifest` tie Markitect source spans, + generated function outputs, render units, assets, and artifact references + together under `markitect.render.reference.v1`. +- `RenderExportRequest` can carry a passive render manifest; `render.fake` + echoes manifest and asset counts into result/artifact metadata without + invoking a renderer. +- Docs, examples, extension catalog metadata, generated API reference, and + tests were added. + ## P21.1 - Define render unit references ```task id: MKTT-WP-0021-T001 -status: todo +status: done priority: high state_hub_task_id: "3d33d387-633e-4ffb-962e-1a5061d3db01" ``` @@ -79,7 +99,7 @@ Output: render reference model, serialization tests, and examples. ```task id: MKTT-WP-0021-T002 -status: todo +status: done priority: medium state_hub_task_id: "4a96e27b-9165-450c-899c-f7af484d9438" ``` @@ -93,7 +113,7 @@ Output: manifest model and tests that keep final numbering outside core. ```task id: MKTT-WP-0021-T003 -status: todo +status: done priority: high state_hub_task_id: "ba917e45-1912-4bdb-bf3f-5946c20957b2" ``` @@ -114,7 +134,7 @@ Output: model, examples, and compatibility note for `MKTF-WP-0003`. ```task id: MKTT-WP-0021-T004 -status: todo +status: done priority: high state_hub_task_id: "dd9f1128-af1e-44a8-961b-3aba6104ec9a" ``` @@ -128,7 +148,7 @@ Output: source map model, fake-renderer fixture integration, and tests. ```task id: MKTT-WP-0021-T005 -status: todo +status: done priority: medium state_hub_task_id: "1d472c44-d970-4403-9f0b-18e6192da737" ```