import json import os import shutil import subprocess import sys from pathlib import Path import pytest from infospace_bench import InfospaceError, load_infospace from infospace_bench.evaluation_io import read_entity_evaluation from infospace_bench.generation import parse_entity_bundle from infospace_bench.history import read_metrics_file from infospace_bench.markdown_adapter import validate_infospace_artifacts from infospace_bench.semantics import list_entities, list_relations REPO_ROOT = Path(__file__).resolve().parents[1] PILOT = REPO_ROOT / "infospaces" / "wealth-vsm-generation-pilot" FIXTURES = PILOT / "workflows" / "fixtures" / "wealth-vsm-fake-responses.yaml" def cli_env() -> dict[str, str]: env = os.environ.copy() env["PYTHONPATH"] = str(REPO_ROOT / "src") + ":/home/worsch/markitect-tool/src" return env def run_cli(*args: str, cwd: Path | None = None) -> subprocess.CompletedProcess[str]: return subprocess.run( [sys.executable, "-m", "infospace_bench", *args], check=False, cwd=cwd or REPO_ROOT, env=cli_env(), text=True, capture_output=True, ) def copy_pilot(tmp_path: Path) -> Path: target = tmp_path / PILOT.name shutil.copytree(PILOT, target) return target def test_wealth_vsm_generation_plan_is_explicit_and_assisted() -> None: plan = run_cli("workflow", "plan", str(PILOT), "wealth-vsm-extract-entities") workflows = run_cli("workflow", "inspect", str(PILOT)) assert plan.returncode == 0, plan.stderr assert workflows.returncode == 0, workflows.stderr plan_payload = json.loads(plan.stdout) workflow_ids = [ workflow["id"] for workflow in json.loads(workflows.stdout)["workflows"] ] assert workflow_ids == [ "wealth-vsm-extract-entities", "wealth-vsm-map-and-analyze", "wealth-vsm-evaluate-entities", ] assert plan_payload["status"] == "planned" assert plan_payload["assisted_requests"][0]["stage_id"] == "extract-entities" assert plan_payload["stages"][1]["kind"] == "split_entities" assert plan_payload["stages"][1]["status"] == "waiting_for_assisted_output" def test_wealth_vsm_generation_pipeline_runs_with_fixture_adapter( tmp_path: Path, ) -> None: root = copy_pilot(tmp_path) fixture = root / "workflows" / "fixtures" / "wealth-vsm-fake-responses.yaml" extraction = run_cli( "workflow", "run", str(root), "wealth-vsm-extract-entities", "--fixture-responses", str(fixture), ) second_extraction = run_cli( "workflow", "run", str(root), "wealth-vsm-extract-entities", "--fixture-responses", str(fixture), ) mapping = run_cli( "workflow", "run", str(root), "wealth-vsm-map-and-analyze", "--fixture-responses", str(fixture), ) evaluation = run_cli( "workflow", "run", str(root), "wealth-vsm-evaluate-entities", "--fixture-responses", str(fixture), ) check = run_cli("check", str(root)) validation = run_cli("validate", str(root)) assert extraction.returncode == 0, extraction.stderr assert second_extraction.returncode == 0, second_extraction.stderr assert mapping.returncode == 0, mapping.stderr assert evaluation.returncode == 0, evaluation.stderr assert check.returncode == 0, check.stderr assert validation.returncode == 0, validation.stderr loaded = load_infospace(root) artifact_ids = [artifact.id for artifact in loaded.artifacts] entities = list_entities(root) relations = list_relations(root) metrics = read_metrics_file(root / "output" / "metrics" / "metrics.yaml") division_eval = read_entity_evaluation( root / "output" / "evaluations" / "division-of-labour.md" ) validation_results = validate_infospace_artifacts(root) assert artifact_ids.count("entity/division-of-labour.md") == 1 assert artifact_ids.count("entity/market-extent.md") == 1 assert "generated/book-1-chapter-03-analysis.md" in artifact_ids assert [entity.slug for entity in entities] == [ "division-of-labour", "market-extent", ] assert relations[0].subject_entity_id == "entity/division-of-labour.md" assert relations[0].object_entity_id == "entity/market-extent.md" assert division_eval.artifact_id == "entity/division-of-labour.md" assert metrics["per_artifact_mean"] == 4.3 assert all(result.valid for result in validation_results) def test_entity_bundle_parser_rejects_malformed_and_duplicate_bundles() -> None: with pytest.raises(InfospaceError) as missing_h1: parse_entity_bundle("## Definition\n\nNo top-level entity heading.") with pytest.raises(InfospaceError) as duplicate: parse_entity_bundle( "# Market Extent\n\n## Definition\n\nOne.\n\n" "# Market Extent\n\n## Definition\n\nTwo.\n" ) assert missing_h1.value.code == "invalid_entity_bundle" assert duplicate.value.code == "duplicate_entity_bundle_item" def test_wealth_vsm_generation_docs_capture_scale_up_risks() -> None: doc = (REPO_ROOT / "docs" / "wealth-vsm-generation-pipeline.md").read_text( encoding="utf-8" ) report = (PILOT / "reports" / "generation-pilot-report.md").read_text( encoding="utf-8" ) assert "Legacy pipeline decomposition" in doc assert "One-chapter pilot" in doc assert "Full corpus scale-up" in doc assert "live provider-backed generation" in doc assert "one-chapter regeneration" in report assert "not the legacy process script" in report