diff --git a/README.md b/README.md index 1fb861c..44e5503 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ Start with: - `docs/evaluation-and-inspection.md` - `docs/reference-pilot-decision.md` - `docs/markitect-main-scope-assessment.md` +- `docs/markitect-tool-adapter.md` - `docs/orthogonal-successor-roadmap.md` - `docs/legacy-infospace-feature-inventory.md` - `docs/successor-boundary-interface-map.md` diff --git a/docs/markitect-tool-adapter.md b/docs/markitect-tool-adapter.md new file mode 100644 index 0000000..228f7b1 --- /dev/null +++ b/docs/markitect-tool-adapter.md @@ -0,0 +1,68 @@ +# Markitect Tool Adapter + +Date: 2026-05-14 + +## Purpose + +`infospace-bench` delegates Markdown syntax work to `markitect-tool` through a +single narrow adapter: + +```text +src/infospace_bench/markdown_adapter.py +``` + +This keeps the repos orthogonal: + +- `markitect-tool` parses and validates Markdown. +- `infospace-bench` interprets parsed/validated artifacts as parts of concrete + infospaces. + +## Current Adapter Surface + +- `parse_markdown_artifact(path)` +- `extract_section_text(parsed, heading)` +- `validate_markdown_artifact(artifact_id, path, contract_path)` +- `validate_infospace_artifacts(root)` + +The adapter normalizes `markitect-tool` diagnostics into +`MarkdownDiagnostic`, preserving severity, code, message, source, contract, +rule, guidance, and details. + +## Contract Selection + +`validate_infospace_artifacts()` reads `infospace.yaml` and uses the `schemas` +mapping as artifact-kind contract references: + +```yaml +schemas: + source: contracts/source.contract.md + generated: contracts/generated.contract.md +``` + +If no contract is configured for an artifact kind, validation skips that +artifact. If a configured contract path is missing, validation raises a +structured `InfospaceError`. + +## CLI + +```bash +python3 -m infospace_bench validate infospaces/bootstrap-pilot +``` + +The command emits JSON: + +```json +{ + "valid": true, + "results": [] +} +``` + +It exits with `0` when all checked artifacts are valid and `1` when any checked +artifact fails validation. + +## Boundary Rule + +Only `src/infospace_bench/markdown_adapter.py` should import `markitect_tool`. +Other modules should consume adapter results so `infospace-bench` remains an +application layer, not a Markdown toolkit. diff --git a/pyproject.toml b/pyproject.toml index 5196690..fb62221 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,11 +3,14 @@ name = "infospace-bench" version = "0.1.0" description = "Application-layer workspace for concrete structured knowledge spaces." requires-python = ">=3.11" -dependencies = ["PyYAML>=6"] +dependencies = [ + "PyYAML>=6", + "markitect-tool @ file:///home/worsch/markitect-tool", +] [project.scripts] infospace-bench = "infospace_bench.cli:main" [tool.pytest.ini_options] -pythonpath = ["src"] +pythonpath = ["src", "../markitect-tool/src"] testpaths = ["tests"] diff --git a/src/infospace_bench/cli.py b/src/infospace_bench/cli.py index 0b1be0b..d6377eb 100644 --- a/src/infospace_bench/cli.py +++ b/src/infospace_bench/cli.py @@ -7,6 +7,7 @@ from pathlib import Path from .errors import InfospaceError from .lifecycle import add_artifact, create_infospace, load_infospace +from .markdown_adapter import validate_infospace_artifacts def build_parser() -> argparse.ArgumentParser: @@ -31,6 +32,9 @@ def build_parser() -> argparse.ArgumentParser: export = sub.add_parser("export", help="Print the infospace representation") export.add_argument("root") + validate = sub.add_parser("validate", help="Validate infospace artifacts") + validate.add_argument("root") + return parser @@ -58,6 +62,16 @@ def main(argv: list[str] | None = None) -> int: _write_json({"artifact": artifact.to_dict()}) elif args.command == "export": _write_json(load_infospace(Path(args.root)).to_dict()) + elif args.command == "validate": + results = validate_infospace_artifacts(Path(args.root)) + valid = all(result.valid for result in results) + _write_json( + { + "valid": valid, + "results": [result.to_dict() for result in results], + } + ) + return 0 if valid else 1 else: parser.error(f"Unhandled command: {args.command}") except InfospaceError as exc: diff --git a/src/infospace_bench/markdown_adapter.py b/src/infospace_bench/markdown_adapter.py new file mode 100644 index 0000000..2532c65 --- /dev/null +++ b/src/infospace_bench/markdown_adapter.py @@ -0,0 +1,169 @@ +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any + +from markitect_tool import Heading, Section, parse_markdown_file +from markitect_tool.contract import check_markdown_file + +from .errors import InfospaceError +from .lifecycle import load_infospace + + +@dataclass(frozen=True) +class MarkdownDiagnostic: + severity: str + code: str + message: str + source: dict[str, Any] | None = None + contract: dict[str, Any] | None = None + rule_id: str | None = None + guidance: str | None = None + details: dict[str, Any] = field(default_factory=dict) + + @classmethod + def from_markitect(cls, diagnostic: Any) -> "MarkdownDiagnostic": + data = diagnostic.to_dict() + return cls( + severity=str(data.get("severity") or ""), + code=str(data.get("code") or ""), + message=str(data.get("message") or ""), + source=data.get("source"), + contract=data.get("contract"), + rule_id=data.get("rule_id"), + guidance=data.get("guidance"), + details=dict(data.get("details") or {}), + ) + + def to_dict(self) -> dict[str, Any]: + data: dict[str, Any] = { + "severity": self.severity, + "code": self.code, + "message": self.message, + } + if self.source: + data["source"] = self.source + if self.contract: + data["contract"] = self.contract + if self.rule_id: + data["rule_id"] = self.rule_id + if self.guidance: + data["guidance"] = self.guidance + if self.details: + data["details"] = self.details + return data + + +@dataclass(frozen=True) +class ParsedMarkdownArtifact: + path: Path + frontmatter: dict[str, Any] + headings: list[Heading] + sections: list[Section] + + def to_dict(self) -> dict[str, Any]: + return { + "path": str(self.path), + "frontmatter": self.frontmatter, + "headings": [heading.to_dict() for heading in self.headings], + "sections": [section.to_dict() for section in self.sections], + } + + +@dataclass(frozen=True) +class ArtifactValidationResult: + artifact_id: str + path: str + contract_path: str + valid: bool + diagnostics: list[MarkdownDiagnostic] = field(default_factory=list) + metrics: dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict[str, Any]: + return { + "artifact_id": self.artifact_id, + "path": self.path, + "contract_path": self.contract_path, + "valid": self.valid, + "diagnostics": [diagnostic.to_dict() for diagnostic in self.diagnostics], + "metrics": self.metrics, + } + + +def parse_markdown_artifact(path: str | Path) -> ParsedMarkdownArtifact: + artifact_path = Path(path) + document = parse_markdown_file(artifact_path) + return ParsedMarkdownArtifact( + path=artifact_path, + frontmatter=document.frontmatter, + headings=document.headings, + sections=document.sections, + ) + + +def extract_section_text( + parsed: ParsedMarkdownArtifact, + heading: str, +) -> str: + expected = _normalize_heading(heading) + for section in parsed.sections: + if _normalize_heading(section.heading.text) == expected: + return "\n".join(block.text for block in section.blocks if block.text).strip() + return "" + + +def validate_markdown_artifact( + artifact_id: str, + path: str | Path, + contract_path: str | Path, +) -> ArtifactValidationResult: + artifact_path = Path(path) + contract = Path(contract_path) + result = check_markdown_file(artifact_path, contract) + return ArtifactValidationResult( + artifact_id=artifact_id, + path=str(artifact_path), + contract_path=str(contract), + valid=result.valid, + diagnostics=[ + MarkdownDiagnostic.from_markitect(diagnostic) + for diagnostic in result.diagnostics + ], + metrics=result.metrics, + ) + + +def validate_infospace_artifacts(root: str | Path) -> list[ArtifactValidationResult]: + infospace = load_infospace(root) + results: list[ArtifactValidationResult] = [] + for artifact in infospace.artifacts: + contract_ref = ( + infospace.config.schemas.get(artifact.kind) + or infospace.config.schemas.get("artifact") + ) + if not contract_ref: + continue + artifact_path = infospace.root / artifact.path + contract_path = infospace.root / contract_ref + if not contract_path.is_file(): + raise InfospaceError( + "missing_contract", + f"Configured contract does not exist: {contract_path}", + { + "artifact_id": artifact.id, + "contract_path": str(contract_path), + }, + ) + results.append( + validate_markdown_artifact( + artifact.id, + artifact_path, + contract_path, + ) + ) + return results + + +def _normalize_heading(value: str) -> str: + return " ".join(value.strip().lower().split()) diff --git a/tests/test_cli.py b/tests/test_cli.py index 9585ae2..ab50ad8 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -7,7 +7,7 @@ from pathlib import Path def run_cli(*args: str) -> subprocess.CompletedProcess[str]: env = os.environ.copy() - env["PYTHONPATH"] = "src" + env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src" return subprocess.run( [sys.executable, "-m", "infospace_bench", *args], check=False, diff --git a/tests/test_markdown_adapter.py b/tests/test_markdown_adapter.py new file mode 100644 index 0000000..4a0aaa8 --- /dev/null +++ b/tests/test_markdown_adapter.py @@ -0,0 +1,150 @@ +import json +import os +import subprocess +import sys +from pathlib import Path + +from infospace_bench import add_artifact, create_infospace +from infospace_bench.markdown_adapter import ( + extract_section_text, + parse_markdown_artifact, + validate_infospace_artifacts, +) + + +CONTRACT = """# Source Artifact Contract + +```yaml contract +id: source-artifact-v1 +document: + type: source-artifact +sections: + - id: summary + title: Summary + presence: required + level: 2 + - id: evidence + title: Evidence + presence: required + level: 2 +``` +""" + + +VALID_SOURCE = """--- +document_type: source-artifact +status: draft +--- + +# Source A + +## Summary + +This source describes the first artifact. + +## Evidence + +The required evidence section is present. +""" + + +INVALID_SOURCE = """--- +document_type: source-artifact +--- + +# Source B + +## Summary + +This source is missing evidence. +""" + + +def cli_env() -> dict[str, str]: + env = os.environ.copy() + env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src" + return env + + +def test_parse_markdown_artifact_uses_markitect_tool_sections(tmp_path: Path) -> None: + source = tmp_path / "source.md" + source.write_text(VALID_SOURCE, encoding="utf-8") + + parsed = parse_markdown_artifact(source) + + assert parsed.frontmatter["document_type"] == "source-artifact" + assert [heading.text for heading in parsed.headings] == [ + "Source A", + "Summary", + "Evidence", + ] + assert extract_section_text(parsed, "Summary") == ( + "This source describes the first artifact." + ) + + +def test_validate_infospace_artifacts_uses_configured_contract(tmp_path: Path) -> None: + infospace = create_infospace(tmp_path, "pilot", name="Pilot") + contract = infospace.root / "contracts" / "source.contract.md" + contract.parent.mkdir() + contract.write_text(CONTRACT, encoding="utf-8") + + config = infospace.root / "infospace.yaml" + config.write_text( + config.read_text(encoding="utf-8").replace( + "schemas: {}\n", "schemas:\n source: contracts/source.contract.md\n" + ), + encoding="utf-8", + ) + + valid = tmp_path / "valid.md" + valid.write_text(VALID_SOURCE, encoding="utf-8") + invalid = tmp_path / "invalid.md" + invalid.write_text(INVALID_SOURCE, encoding="utf-8") + add_artifact(infospace.root, valid, kind="source", title="Valid") + add_artifact(infospace.root, invalid, kind="source", title="Invalid") + + results = validate_infospace_artifacts(infospace.root) + + by_id = {result.artifact_id: result for result in results} + assert by_id["source/valid.md"].valid is True + assert by_id["source/invalid.md"].valid is False + assert by_id["source/invalid.md"].diagnostics[0].code == "contract.section.missing" + + +def test_cli_validate_outputs_json_results(tmp_path: Path) -> None: + infospace = create_infospace(tmp_path, "pilot", name="Pilot") + contract = infospace.root / "contracts" / "source.contract.md" + contract.parent.mkdir() + contract.write_text(CONTRACT, encoding="utf-8") + config = infospace.root / "infospace.yaml" + config.write_text( + config.read_text(encoding="utf-8").replace( + "schemas: {}\n", "schemas:\n source: contracts/source.contract.md\n" + ), + encoding="utf-8", + ) + source = tmp_path / "invalid.md" + source.write_text(INVALID_SOURCE, encoding="utf-8") + add_artifact(infospace.root, source, kind="source", title="Invalid") + + result = subprocess.run( + [ + sys.executable, + "-m", + "infospace_bench", + "validate", + str(infospace.root), + ], + check=False, + env=cli_env(), + text=True, + capture_output=True, + ) + + assert result.returncode == 1 + payload = json.loads(result.stdout) + assert payload["valid"] is False + assert payload["results"][0]["diagnostics"][0]["code"] == ( + "contract.section.missing" + ) diff --git a/workplans/IB-WP-0006-markitect-tool-adapter.md b/workplans/IB-WP-0006-markitect-tool-adapter.md index ac6561d..5028587 100644 --- a/workplans/IB-WP-0006-markitect-tool-adapter.md +++ b/workplans/IB-WP-0006-markitect-tool-adapter.md @@ -4,7 +4,7 @@ type: workplan title: "Markitect Tool Adapter And Markdown Artifact Validation" domain: markitect repo: infospace-bench -status: planned +status: done owner: markitect topic_slug: markitect created: "2026-05-14" @@ -26,7 +26,7 @@ a thin application adapter. ```task id: IB-WP-0006-T01 -status: todo +status: done priority: high state_hub_task_id: "b438be61-6d2a-44f9-b235-2b415e9f4bca" ``` @@ -40,7 +40,7 @@ state_hub_task_id: "b438be61-6d2a-44f9-b235-2b415e9f4bca" ```task id: IB-WP-0006-T02 -status: todo +status: done priority: high state_hub_task_id: "d37d6be3-ea04-4279-a113-086ab4fb617d" ``` @@ -54,7 +54,7 @@ state_hub_task_id: "d37d6be3-ea04-4279-a113-086ab4fb617d" ```task id: IB-WP-0006-T03 -status: todo +status: done priority: high state_hub_task_id: "74a78679-4ef2-4b6b-a80c-b40119d9f90e" ``` @@ -67,7 +67,7 @@ state_hub_task_id: "74a78679-4ef2-4b6b-a80c-b40119d9f90e" ```task id: IB-WP-0006-T04 -status: todo +status: done priority: medium state_hub_task_id: "99b4e2ca-d9be-40a5-8b10-0b0f3c18fb19" ```