import json import os import subprocess import sys import zipfile from pathlib import Path import yaml from infospace_bench.generator import ( init_generation_infospace, run_generation, status_generation, ) from infospace_bench.openrouter import OpenRouterAssistedGenerationAdapter from infospace_bench.source_intake import normalize_source def cli_env() -> dict[str, str]: env = os.environ.copy() env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src" return env def fixture_responses(path: Path) -> None: data = { "responses": [ { "stage_id": "summarize-source", "input_artifact_id": "*", "markdown": "# Source Summary\n\nThe source describes reusable knowledge work.\n", }, { "stage_id": "extract-entities", "input_artifact_id": "*", "markdown": ( "# Knowledge Artifact\n\n" "## Definition\n\n" "A durable unit of structured knowledge derived from a source.\n\n" "## Context\n\n" "Generated from a generic source workflow.\n\n" "# Source Claim\n\n" "## Definition\n\n" "A claim preserved from the source for later review.\n\n" "## Context\n\n" "Used to keep provenance visible.\n" ), }, { "stage_id": "extract-relations", "input_artifact_id": "*", "markdown": ( "# Knowledge Artifact Supports Source Claim\n\n" "## Subject\n\n" "Knowledge Artifact\n\n" "## Predicate\n\n" "supports\n\n" "## Object\n\n" "Source Claim\n\n" "## Relation Type\n\n" "support\n\n" "## Evidence\n\n" "The source links durable artifacts to explicit claims.\n" ), }, { "stage_id": "evaluate-entity", "input_artifact_id": "*", "markdown": ( "---\n" "artifact_id: entity/knowledge-artifact.md\n" "evaluator: fixture\n" "evaluated_at: '2026-05-14T00:00:00'\n" "scores:\n" " - name: groundedness\n" " value: 4.0\n" " max_value: 5.0\n" " - name: usefulness\n" " value: 4.0\n" " max_value: 5.0\n" "---\n" "\n" "# Evaluation: entity/knowledge-artifact.md\n" ), }, ] } path.write_text(yaml.safe_dump(data, sort_keys=False), encoding="utf-8") def write_epub_fixture(path: Path) -> None: with zipfile.ZipFile(path, "w") as archive: archive.writestr("OEBPS/chapter1.xhtml", "

Chapter One

Alpha beta.

") archive.writestr("OEBPS/chapter2.xhtml", "

Chapter Two

Gamma delta.

") def test_source_intake_accepts_article_ebook_and_folder(tmp_path: Path) -> None: article = tmp_path / "article.html" article.write_text( "Article Title" "

Article Title

One two three.

", encoding="utf-8", ) ebook = tmp_path / "book.epub" write_epub_fixture(ebook) folder = tmp_path / "collection" folder.mkdir() (folder / "note.md").write_text("# Note\n\nMarkdown source.", encoding="utf-8") (folder / "memo.txt").write_text("Plain text source.", encoding="utf-8") article_chunks = normalize_source(article) ebook_chunks = normalize_source(ebook) folder_chunks = normalize_source(folder) assert article_chunks[0].source_type == "html" assert article_chunks[0].title == "Article Title" assert article_chunks[0].chunk_id == "article-title" assert article_chunks[0].digest == normalize_source(article)[0].digest assert [chunk.source_type for chunk in ebook_chunks] == ["epub", "epub"] assert {chunk.source_type for chunk in folder_chunks} == {"markdown", "text"} assert all(chunk.markdown.startswith("# ") for chunk in folder_chunks) def test_generate_from_source_cli_fixture_builds_infospace(tmp_path: Path) -> None: source = tmp_path / "article.md" source.write_text( "# Reusable Knowledge\n\nA source about claims and durable artifacts.", encoding="utf-8", ) fixture = tmp_path / "responses.yaml" fixture_responses(fixture) result = subprocess.run( [ sys.executable, "-m", "infospace_bench", "generate", "from-source", str(source), "--workspace", str(tmp_path), "--slug", "article-space", "--name", "Article Space", "--fixture-responses", str(fixture), "--apply", ], check=False, env=cli_env(), text=True, capture_output=True, ) assert result.returncode == 0, result.stderr payload = json.loads(result.stdout) root = Path(payload["root"]) status = subprocess.run( [ sys.executable, "-m", "infospace_bench", "generate", "status", str(root), ], check=False, env=cli_env(), text=True, capture_output=True, ) assert status.returncode == 0, status.stderr status_payload = json.loads(status.stdout) assert payload["status"] == "completed" assert (root / "artifacts" / "sources" / "reusable-knowledge.md").is_file() assert (root / "artifacts" / "entities" / "knowledge-artifact.md").is_file() assert (root / "artifacts" / "relations" / "reusable-knowledge-relations.md").is_file() assert (root / "output" / "metrics" / "metrics.yaml").is_file() assert status_payload["source_chunk_count"] == 1 assert status_payload["entity_count"] == 2 assert status_payload["relation_count"] == 1 assert status_payload["stale"] is False def test_generate_from_ebook_and_folder_fixtures(tmp_path: Path) -> None: fixture = tmp_path / "responses.yaml" fixture_responses(fixture) ebook = tmp_path / "book.epub" write_epub_fixture(ebook) folder = tmp_path / "folder" folder.mkdir() (folder / "first.md").write_text("# First\n\nOne source.", encoding="utf-8") (folder / "second.txt").write_text("Second source.", encoding="utf-8") for source, slug, expected_sources in ( (ebook, "book-space", 2), (folder, "folder-space", 2), ): result = subprocess.run( [ sys.executable, "-m", "infospace_bench", "generate", "from-source", str(source), "--workspace", str(tmp_path), "--slug", slug, "--name", slug.replace("-", " ").title(), "--fixture-responses", str(fixture), "--apply", ], check=False, env=cli_env(), text=True, capture_output=True, ) assert result.returncode == 0, result.stderr payload = json.loads(result.stdout) status = status_generation(Path(payload["root"])) assert status["source_chunk_count"] == expected_sources assert status["entity_count"] == 2 assert status["relation_count"] == expected_sources assert status["history_snapshot_count"] == 1 def test_generator_resume_is_idempotent_and_detects_stale_source(tmp_path: Path) -> None: source = tmp_path / "note.md" source.write_text("# Note\n\nInitial source.", encoding="utf-8") fixture = tmp_path / "responses.yaml" fixture_responses(fixture) root = init_generation_infospace(tmp_path, source, "note-space", name="Note Space").root first = run_generation(root, fixture_responses=fixture) second = run_generation(root, fixture_responses=fixture, resume=True) generated_source = root / "artifacts" / "sources" / "note.md" generated_source.write_text("# Note\n\nChanged source.", encoding="utf-8") stale_status = status_generation(root) assert first.status == "completed" assert second.status == "skipped" assert second.skipped is True assert stale_status["stale"] is True assert stale_status["stale_sources"] == ["source/note.md"] def test_openrouter_adapter_uses_model_and_records_metadata() -> None: requests: list[dict] = [] def transport(payload: dict, headers: dict[str, str], endpoint: str) -> dict: requests.append({"payload": payload, "headers": headers, "endpoint": endpoint}) return { "id": "or-request-1", "choices": [{"message": {"content": "# Generated\n\nContent."}}], "usage": {"prompt_tokens": 5, "completion_tokens": 3}, } adapter = OpenRouterAssistedGenerationAdapter( api_key="test-key", model="openai/gpt-4o-mini", transport=transport, retry_limit=0, ) result = adapter.generate( type( "Request", (), { "prompt": "Generate markdown.", "stage_id": "extract-entities", "workflow_id": "generic-source-extract", "input_artifact_id": "source/example.md", "provider_hint": "openrouter", "metadata": {}, }, )() ) assert requests[0]["payload"]["model"] == "openai/gpt-4o-mini" assert requests[0]["headers"]["Authorization"] == "Bearer test-key" assert result.markdown == "# Generated\n\nContent." assert result.provider == "openrouter" assert result.metadata["model"] == "openai/gpt-4o-mini" assert result.metadata["request_id"] == "or-request-1" assert result.metadata["usage"]["completion_tokens"] == 3 def test_generic_generator_docs_cover_openrouter_resume_and_cost_caps() -> None: text = Path("docs/generic-source-generator.md").read_text(encoding="utf-8") assert "OPENROUTER_API_KEY" in text assert "--model" in text assert "--max-chunks" in text assert "resume" in text.lower() assert "fixture-responses" in text