source adapter framework

This commit is contained in:
2026-05-14 22:05:34 +02:00
parent f8f20c7c32
commit eb34c0d4fb
17 changed files with 1924 additions and 15 deletions

View File

@@ -262,6 +262,32 @@ from markitect_tool.schema import (
validate_markdown_file,
validate_schema,
)
from markitect_tool.source import (
NORMALIZED_SOURCE_SCHEMA_VERSION,
SOURCE_ADAPTER_ENTRY_POINT_GROUP,
NormalizationQuality,
NormalizedMarkdownDocument,
NormalizedMarkdownSegment,
SourceAdapterDescriptor,
SourceAdapterError,
SourceAdapterMatch,
SourceAdapterMatchRequest,
SourceAdapterRegistry,
SourceAsset,
SourceInspectRequest,
SourceInspectResult,
SourceMetadata,
SourceProvenance,
SourceReadAdapter,
SourceReadRequest,
SourceReadResult,
default_source_adapter_registry,
discover_source_adapters,
inspect_source,
normalization_cache_key,
normalize_source,
source_adapter_registry_descriptor,
)
from markitect_tool.template import (
MissingTemplateVariable,
TemplateAnalysis,
@@ -295,6 +321,30 @@ __all__ = [
"validate_document",
"validate_markdown_file",
"validate_schema",
"NORMALIZED_SOURCE_SCHEMA_VERSION",
"SOURCE_ADAPTER_ENTRY_POINT_GROUP",
"NormalizationQuality",
"NormalizedMarkdownDocument",
"NormalizedMarkdownSegment",
"SourceAdapterDescriptor",
"SourceAdapterError",
"SourceAdapterMatch",
"SourceAdapterMatchRequest",
"SourceAdapterRegistry",
"SourceAsset",
"SourceInspectRequest",
"SourceInspectResult",
"SourceMetadata",
"SourceProvenance",
"SourceReadAdapter",
"SourceReadRequest",
"SourceReadResult",
"default_source_adapter_registry",
"discover_source_adapters",
"inspect_source",
"normalization_cache_key",
"normalize_source",
"source_adapter_registry_descriptor",
"ContractCheckResult",
"ContractValidationResult",
"DocumentContract",

View File

@@ -99,6 +99,11 @@ from markitect_tool.reference import (
)
from markitect_tool.runtime import evaluate_form_state, load_runtime_context_file
from markitect_tool.schema import load_schema_file, validate_markdown_file, validate_schema
from markitect_tool.source import (
default_source_adapter_registry,
inspect_source,
normalize_source,
)
from markitect_tool.template import (
MissingTemplateVariable,
TemplateError,
@@ -197,6 +202,123 @@ def extension_commands(output_format: str) -> None:
_emit_extension_catalog({"count": len(specs), "commands": specs}, output_format)
@main.group("source")
def source_group() -> None:
"""Inspect source-format adapters and normalize sources."""
@source_group.command("adapters")
@click.option(
"--format",
"output_format",
type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
default="text",
show_default=True,
)
def source_adapters(output_format: str) -> None:
"""List discovered read-only source adapters."""
_emit_source_adapters(default_source_adapter_registry().to_dict(), output_format)
@source_group.command("inspect")
@click.argument("source_path", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--adapter", "adapter_id", help="Explicit source adapter id.")
@click.option(
"--option",
"option_values",
multiple=True,
metavar="KEY=VALUE",
help="Adapter-specific option. May be repeated.",
)
@click.option(
"--format",
"output_format",
type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
default="json",
show_default=True,
)
def source_inspect(
source_path: Path,
adapter_id: str | None,
option_values: tuple[str, ...],
output_format: str,
) -> None:
"""Inspect a local source without full Markdown conversion."""
try:
result = inspect_source(
source_path,
registry=default_source_adapter_registry(),
adapter_id=adapter_id,
options=_parse_key_value_options(option_values),
)
except ValueError as exc:
raise click.ClickException(str(exc)) from exc
_emit_source_inspect(result.to_dict(), output_format)
raise click.exceptions.Exit(0 if result.is_valid else 1)
@source_group.command("normalize")
@click.argument("source_path", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--adapter", "adapter_id", help="Explicit source adapter id.")
@click.option(
"--option",
"option_values",
multiple=True,
metavar="KEY=VALUE",
help="Adapter-specific option. May be repeated.",
)
@click.option(
"--output",
type=click.Path(dir_okay=False, path_type=Path),
help="Write normalized output to this file.",
)
@click.option(
"--format",
"output_format",
type=click.Choice(["markdown", "json", "yaml"], case_sensitive=False),
default="markdown",
show_default=True,
)
def source_normalize(
source_path: Path,
adapter_id: str | None,
option_values: tuple[str, ...],
output: Path | None,
output_format: str,
) -> None:
"""Normalize a local source into canonical Markdown."""
try:
result = normalize_source(
source_path,
registry=default_source_adapter_registry(),
adapter_id=adapter_id,
options=_parse_key_value_options(option_values),
)
except ValueError as exc:
raise click.ClickException(str(exc)) from exc
data = result.to_dict()
if output_format == "markdown":
if not result.is_valid or result.document is None:
for diagnostic in data.get("diagnostics", []):
click.echo(
f"[{diagnostic['severity']}] {diagnostic['code']}: "
f"{diagnostic['message']}",
err=True,
)
raise click.exceptions.Exit(1)
markdown = result.document.markdown
if output:
output.write_text(markdown, encoding="utf-8")
else:
click.echo(markdown, nl=False)
else:
_emit_jsonish(data, output_format)
raise click.exceptions.Exit(0 if result.is_valid else 1)
@main.group("docs")
def docs_group() -> None:
"""Generate CLI and API reference documentation."""
@@ -2892,6 +3014,46 @@ def _emit_extension_catalog(data: dict, output_format: str) -> None:
click.echo(f"- {extension['id']} ({extension['kind']})")
def _emit_source_adapters(data: dict, output_format: str) -> None:
if output_format == "json":
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
elif output_format == "yaml":
click.echo(yaml.safe_dump(data, sort_keys=False))
else:
click.echo(f"adapters: {data.get('count', 0)}")
for adapter in data.get("adapters", []):
operations = ", ".join(adapter.get("operations", []))
extensions = ", ".join(adapter.get("extensions", []))
click.echo(f"- {adapter['id']} [{operations}] {extensions}")
check = adapter.get("dependency_check", {})
if check.get("missing"):
click.echo(" missing: " + ", ".join(check["missing"]))
def _emit_source_inspect(data: dict, output_format: str) -> None:
if output_format == "json":
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
elif output_format == "yaml":
click.echo(yaml.safe_dump(data, sort_keys=False))
else:
click.echo("valid" if data.get("valid") else "invalid")
asset = data.get("asset", {})
adapter = data.get("adapter", {})
metadata = data.get("metadata", {})
click.echo(f"source: {asset.get('path') or asset.get('uri', '<unknown>')}")
if adapter.get("id"):
click.echo(f"adapter: {adapter['id']}")
if metadata.get("title"):
click.echo(f"title: {metadata['title']}")
if metadata.get("creators"):
click.echo("creators: " + ", ".join(metadata["creators"]))
quality = data.get("quality", {})
if quality.get("lossiness"):
click.echo(f"lossiness: {quality['lossiness']}")
for diagnostic in data.get("diagnostics", []):
click.echo(f"! [{diagnostic['severity']}] {diagnostic['code']}: {diagnostic['message']}")
def _emit_jsonish(data: dict, output_format: str) -> None:
if output_format == "yaml":
click.echo(yaml.safe_dump(data, sort_keys=False))

View File

@@ -5,6 +5,10 @@ from __future__ import annotations
from markitect_tool.extension.registry import ExtensionDescriptor, ExtensionRegistry
from markitect_tool.extension.processing import ProcessingCapability
from markitect_tool.query import default_query_engine_registry
from markitect_tool.source import (
default_source_adapter_registry,
source_adapter_registry_descriptor,
)
def builtin_extension_registry() -> ExtensionRegistry:
@@ -22,8 +26,11 @@ def builtin_extension_registry() -> ExtensionRegistry:
_local_label_policy_descriptor(),
_document_function_descriptor(),
_agent_memory_descriptor(),
source_adapter_registry_descriptor(),
]:
registry.register(descriptor)
for descriptor in default_source_adapter_registry().extension_descriptors():
registry.register(descriptor)
return registry

View File

@@ -0,0 +1,55 @@
"""Source adapter contracts and normalization helpers."""
from markitect_tool.source.engine import (
SOURCE_ADAPTER_ENTRY_POINT_GROUP,
NORMALIZED_SOURCE_SCHEMA_VERSION,
NormalizationQuality,
NormalizedMarkdownDocument,
NormalizedMarkdownSegment,
SourceAdapterDescriptor,
SourceAdapterError,
SourceAdapterMatch,
SourceAdapterMatchRequest,
SourceAdapterRegistry,
SourceAsset,
SourceInspectRequest,
SourceInspectResult,
SourceMetadata,
SourceProvenance,
SourceReadAdapter,
SourceReadRequest,
SourceReadResult,
default_source_adapter_registry,
discover_source_adapters,
inspect_source,
normalization_cache_key,
normalize_source,
source_adapter_registry_descriptor,
)
__all__ = [
"SOURCE_ADAPTER_ENTRY_POINT_GROUP",
"NORMALIZED_SOURCE_SCHEMA_VERSION",
"NormalizationQuality",
"NormalizedMarkdownDocument",
"NormalizedMarkdownSegment",
"SourceAdapterDescriptor",
"SourceAdapterError",
"SourceAdapterMatch",
"SourceAdapterMatchRequest",
"SourceAdapterRegistry",
"SourceAsset",
"SourceInspectRequest",
"SourceInspectResult",
"SourceMetadata",
"SourceProvenance",
"SourceReadAdapter",
"SourceReadRequest",
"SourceReadResult",
"default_source_adapter_registry",
"discover_source_adapters",
"inspect_source",
"normalization_cache_key",
"normalize_source",
"source_adapter_registry_descriptor",
]

File diff suppressed because it is too large Load Diff