generated from coulomb/repo-seed
source adapter framework
This commit is contained in:
@@ -262,6 +262,32 @@ from markitect_tool.schema import (
|
||||
validate_markdown_file,
|
||||
validate_schema,
|
||||
)
|
||||
from markitect_tool.source import (
|
||||
NORMALIZED_SOURCE_SCHEMA_VERSION,
|
||||
SOURCE_ADAPTER_ENTRY_POINT_GROUP,
|
||||
NormalizationQuality,
|
||||
NormalizedMarkdownDocument,
|
||||
NormalizedMarkdownSegment,
|
||||
SourceAdapterDescriptor,
|
||||
SourceAdapterError,
|
||||
SourceAdapterMatch,
|
||||
SourceAdapterMatchRequest,
|
||||
SourceAdapterRegistry,
|
||||
SourceAsset,
|
||||
SourceInspectRequest,
|
||||
SourceInspectResult,
|
||||
SourceMetadata,
|
||||
SourceProvenance,
|
||||
SourceReadAdapter,
|
||||
SourceReadRequest,
|
||||
SourceReadResult,
|
||||
default_source_adapter_registry,
|
||||
discover_source_adapters,
|
||||
inspect_source,
|
||||
normalization_cache_key,
|
||||
normalize_source,
|
||||
source_adapter_registry_descriptor,
|
||||
)
|
||||
from markitect_tool.template import (
|
||||
MissingTemplateVariable,
|
||||
TemplateAnalysis,
|
||||
@@ -295,6 +321,30 @@ __all__ = [
|
||||
"validate_document",
|
||||
"validate_markdown_file",
|
||||
"validate_schema",
|
||||
"NORMALIZED_SOURCE_SCHEMA_VERSION",
|
||||
"SOURCE_ADAPTER_ENTRY_POINT_GROUP",
|
||||
"NormalizationQuality",
|
||||
"NormalizedMarkdownDocument",
|
||||
"NormalizedMarkdownSegment",
|
||||
"SourceAdapterDescriptor",
|
||||
"SourceAdapterError",
|
||||
"SourceAdapterMatch",
|
||||
"SourceAdapterMatchRequest",
|
||||
"SourceAdapterRegistry",
|
||||
"SourceAsset",
|
||||
"SourceInspectRequest",
|
||||
"SourceInspectResult",
|
||||
"SourceMetadata",
|
||||
"SourceProvenance",
|
||||
"SourceReadAdapter",
|
||||
"SourceReadRequest",
|
||||
"SourceReadResult",
|
||||
"default_source_adapter_registry",
|
||||
"discover_source_adapters",
|
||||
"inspect_source",
|
||||
"normalization_cache_key",
|
||||
"normalize_source",
|
||||
"source_adapter_registry_descriptor",
|
||||
"ContractCheckResult",
|
||||
"ContractValidationResult",
|
||||
"DocumentContract",
|
||||
|
||||
@@ -99,6 +99,11 @@ from markitect_tool.reference import (
|
||||
)
|
||||
from markitect_tool.runtime import evaluate_form_state, load_runtime_context_file
|
||||
from markitect_tool.schema import load_schema_file, validate_markdown_file, validate_schema
|
||||
from markitect_tool.source import (
|
||||
default_source_adapter_registry,
|
||||
inspect_source,
|
||||
normalize_source,
|
||||
)
|
||||
from markitect_tool.template import (
|
||||
MissingTemplateVariable,
|
||||
TemplateError,
|
||||
@@ -197,6 +202,123 @@ def extension_commands(output_format: str) -> None:
|
||||
_emit_extension_catalog({"count": len(specs), "commands": specs}, output_format)
|
||||
|
||||
|
||||
@main.group("source")
|
||||
def source_group() -> None:
|
||||
"""Inspect source-format adapters and normalize sources."""
|
||||
|
||||
|
||||
@source_group.command("adapters")
|
||||
@click.option(
|
||||
"--format",
|
||||
"output_format",
|
||||
type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
|
||||
default="text",
|
||||
show_default=True,
|
||||
)
|
||||
def source_adapters(output_format: str) -> None:
|
||||
"""List discovered read-only source adapters."""
|
||||
|
||||
_emit_source_adapters(default_source_adapter_registry().to_dict(), output_format)
|
||||
|
||||
|
||||
@source_group.command("inspect")
|
||||
@click.argument("source_path", type=click.Path(exists=True, dir_okay=False, path_type=Path))
|
||||
@click.option("--adapter", "adapter_id", help="Explicit source adapter id.")
|
||||
@click.option(
|
||||
"--option",
|
||||
"option_values",
|
||||
multiple=True,
|
||||
metavar="KEY=VALUE",
|
||||
help="Adapter-specific option. May be repeated.",
|
||||
)
|
||||
@click.option(
|
||||
"--format",
|
||||
"output_format",
|
||||
type=click.Choice(["json", "yaml", "text"], case_sensitive=False),
|
||||
default="json",
|
||||
show_default=True,
|
||||
)
|
||||
def source_inspect(
|
||||
source_path: Path,
|
||||
adapter_id: str | None,
|
||||
option_values: tuple[str, ...],
|
||||
output_format: str,
|
||||
) -> None:
|
||||
"""Inspect a local source without full Markdown conversion."""
|
||||
|
||||
try:
|
||||
result = inspect_source(
|
||||
source_path,
|
||||
registry=default_source_adapter_registry(),
|
||||
adapter_id=adapter_id,
|
||||
options=_parse_key_value_options(option_values),
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise click.ClickException(str(exc)) from exc
|
||||
_emit_source_inspect(result.to_dict(), output_format)
|
||||
raise click.exceptions.Exit(0 if result.is_valid else 1)
|
||||
|
||||
|
||||
@source_group.command("normalize")
|
||||
@click.argument("source_path", type=click.Path(exists=True, dir_okay=False, path_type=Path))
|
||||
@click.option("--adapter", "adapter_id", help="Explicit source adapter id.")
|
||||
@click.option(
|
||||
"--option",
|
||||
"option_values",
|
||||
multiple=True,
|
||||
metavar="KEY=VALUE",
|
||||
help="Adapter-specific option. May be repeated.",
|
||||
)
|
||||
@click.option(
|
||||
"--output",
|
||||
type=click.Path(dir_okay=False, path_type=Path),
|
||||
help="Write normalized output to this file.",
|
||||
)
|
||||
@click.option(
|
||||
"--format",
|
||||
"output_format",
|
||||
type=click.Choice(["markdown", "json", "yaml"], case_sensitive=False),
|
||||
default="markdown",
|
||||
show_default=True,
|
||||
)
|
||||
def source_normalize(
|
||||
source_path: Path,
|
||||
adapter_id: str | None,
|
||||
option_values: tuple[str, ...],
|
||||
output: Path | None,
|
||||
output_format: str,
|
||||
) -> None:
|
||||
"""Normalize a local source into canonical Markdown."""
|
||||
|
||||
try:
|
||||
result = normalize_source(
|
||||
source_path,
|
||||
registry=default_source_adapter_registry(),
|
||||
adapter_id=adapter_id,
|
||||
options=_parse_key_value_options(option_values),
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise click.ClickException(str(exc)) from exc
|
||||
data = result.to_dict()
|
||||
if output_format == "markdown":
|
||||
if not result.is_valid or result.document is None:
|
||||
for diagnostic in data.get("diagnostics", []):
|
||||
click.echo(
|
||||
f"[{diagnostic['severity']}] {diagnostic['code']}: "
|
||||
f"{diagnostic['message']}",
|
||||
err=True,
|
||||
)
|
||||
raise click.exceptions.Exit(1)
|
||||
markdown = result.document.markdown
|
||||
if output:
|
||||
output.write_text(markdown, encoding="utf-8")
|
||||
else:
|
||||
click.echo(markdown, nl=False)
|
||||
else:
|
||||
_emit_jsonish(data, output_format)
|
||||
raise click.exceptions.Exit(0 if result.is_valid else 1)
|
||||
|
||||
|
||||
@main.group("docs")
|
||||
def docs_group() -> None:
|
||||
"""Generate CLI and API reference documentation."""
|
||||
@@ -2892,6 +3014,46 @@ def _emit_extension_catalog(data: dict, output_format: str) -> None:
|
||||
click.echo(f"- {extension['id']} ({extension['kind']})")
|
||||
|
||||
|
||||
def _emit_source_adapters(data: dict, output_format: str) -> None:
|
||||
if output_format == "json":
|
||||
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
|
||||
elif output_format == "yaml":
|
||||
click.echo(yaml.safe_dump(data, sort_keys=False))
|
||||
else:
|
||||
click.echo(f"adapters: {data.get('count', 0)}")
|
||||
for adapter in data.get("adapters", []):
|
||||
operations = ", ".join(adapter.get("operations", []))
|
||||
extensions = ", ".join(adapter.get("extensions", []))
|
||||
click.echo(f"- {adapter['id']} [{operations}] {extensions}")
|
||||
check = adapter.get("dependency_check", {})
|
||||
if check.get("missing"):
|
||||
click.echo(" missing: " + ", ".join(check["missing"]))
|
||||
|
||||
|
||||
def _emit_source_inspect(data: dict, output_format: str) -> None:
|
||||
if output_format == "json":
|
||||
click.echo(json.dumps(data, indent=2, ensure_ascii=False))
|
||||
elif output_format == "yaml":
|
||||
click.echo(yaml.safe_dump(data, sort_keys=False))
|
||||
else:
|
||||
click.echo("valid" if data.get("valid") else "invalid")
|
||||
asset = data.get("asset", {})
|
||||
adapter = data.get("adapter", {})
|
||||
metadata = data.get("metadata", {})
|
||||
click.echo(f"source: {asset.get('path') or asset.get('uri', '<unknown>')}")
|
||||
if adapter.get("id"):
|
||||
click.echo(f"adapter: {adapter['id']}")
|
||||
if metadata.get("title"):
|
||||
click.echo(f"title: {metadata['title']}")
|
||||
if metadata.get("creators"):
|
||||
click.echo("creators: " + ", ".join(metadata["creators"]))
|
||||
quality = data.get("quality", {})
|
||||
if quality.get("lossiness"):
|
||||
click.echo(f"lossiness: {quality['lossiness']}")
|
||||
for diagnostic in data.get("diagnostics", []):
|
||||
click.echo(f"! [{diagnostic['severity']}] {diagnostic['code']}: {diagnostic['message']}")
|
||||
|
||||
|
||||
def _emit_jsonish(data: dict, output_format: str) -> None:
|
||||
if output_format == "yaml":
|
||||
click.echo(yaml.safe_dump(data, sort_keys=False))
|
||||
|
||||
@@ -5,6 +5,10 @@ from __future__ import annotations
|
||||
from markitect_tool.extension.registry import ExtensionDescriptor, ExtensionRegistry
|
||||
from markitect_tool.extension.processing import ProcessingCapability
|
||||
from markitect_tool.query import default_query_engine_registry
|
||||
from markitect_tool.source import (
|
||||
default_source_adapter_registry,
|
||||
source_adapter_registry_descriptor,
|
||||
)
|
||||
|
||||
|
||||
def builtin_extension_registry() -> ExtensionRegistry:
|
||||
@@ -22,8 +26,11 @@ def builtin_extension_registry() -> ExtensionRegistry:
|
||||
_local_label_policy_descriptor(),
|
||||
_document_function_descriptor(),
|
||||
_agent_memory_descriptor(),
|
||||
source_adapter_registry_descriptor(),
|
||||
]:
|
||||
registry.register(descriptor)
|
||||
for descriptor in default_source_adapter_registry().extension_descriptors():
|
||||
registry.register(descriptor)
|
||||
return registry
|
||||
|
||||
|
||||
|
||||
55
src/markitect_tool/source/__init__.py
Normal file
55
src/markitect_tool/source/__init__.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""Source adapter contracts and normalization helpers."""
|
||||
|
||||
from markitect_tool.source.engine import (
|
||||
SOURCE_ADAPTER_ENTRY_POINT_GROUP,
|
||||
NORMALIZED_SOURCE_SCHEMA_VERSION,
|
||||
NormalizationQuality,
|
||||
NormalizedMarkdownDocument,
|
||||
NormalizedMarkdownSegment,
|
||||
SourceAdapterDescriptor,
|
||||
SourceAdapterError,
|
||||
SourceAdapterMatch,
|
||||
SourceAdapterMatchRequest,
|
||||
SourceAdapterRegistry,
|
||||
SourceAsset,
|
||||
SourceInspectRequest,
|
||||
SourceInspectResult,
|
||||
SourceMetadata,
|
||||
SourceProvenance,
|
||||
SourceReadAdapter,
|
||||
SourceReadRequest,
|
||||
SourceReadResult,
|
||||
default_source_adapter_registry,
|
||||
discover_source_adapters,
|
||||
inspect_source,
|
||||
normalization_cache_key,
|
||||
normalize_source,
|
||||
source_adapter_registry_descriptor,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"SOURCE_ADAPTER_ENTRY_POINT_GROUP",
|
||||
"NORMALIZED_SOURCE_SCHEMA_VERSION",
|
||||
"NormalizationQuality",
|
||||
"NormalizedMarkdownDocument",
|
||||
"NormalizedMarkdownSegment",
|
||||
"SourceAdapterDescriptor",
|
||||
"SourceAdapterError",
|
||||
"SourceAdapterMatch",
|
||||
"SourceAdapterMatchRequest",
|
||||
"SourceAdapterRegistry",
|
||||
"SourceAsset",
|
||||
"SourceInspectRequest",
|
||||
"SourceInspectResult",
|
||||
"SourceMetadata",
|
||||
"SourceProvenance",
|
||||
"SourceReadAdapter",
|
||||
"SourceReadRequest",
|
||||
"SourceReadResult",
|
||||
"default_source_adapter_registry",
|
||||
"discover_source_adapters",
|
||||
"inspect_source",
|
||||
"normalization_cache_key",
|
||||
"normalize_source",
|
||||
"source_adapter_registry_descriptor",
|
||||
]
|
||||
1000
src/markitect_tool/source/engine.py
Normal file
1000
src/markitect_tool/source/engine.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user