generated from coulomb/repo-seed
generic source-to-infospace generator
This commit is contained in:
301
tests/test_generic_generator.py
Normal file
301
tests/test_generic_generator.py
Normal file
@@ -0,0 +1,301 @@
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
from infospace_bench.generator import (
|
||||
init_generation_infospace,
|
||||
run_generation,
|
||||
status_generation,
|
||||
)
|
||||
from infospace_bench.openrouter import OpenRouterAssistedGenerationAdapter
|
||||
from infospace_bench.source_intake import normalize_source
|
||||
|
||||
|
||||
def cli_env() -> dict[str, str]:
|
||||
env = os.environ.copy()
|
||||
env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src"
|
||||
return env
|
||||
|
||||
|
||||
def fixture_responses(path: Path) -> None:
|
||||
data = {
|
||||
"responses": [
|
||||
{
|
||||
"stage_id": "summarize-source",
|
||||
"input_artifact_id": "*",
|
||||
"markdown": "# Source Summary\n\nThe source describes reusable knowledge work.\n",
|
||||
},
|
||||
{
|
||||
"stage_id": "extract-entities",
|
||||
"input_artifact_id": "*",
|
||||
"markdown": (
|
||||
"# Knowledge Artifact\n\n"
|
||||
"## Definition\n\n"
|
||||
"A durable unit of structured knowledge derived from a source.\n\n"
|
||||
"## Context\n\n"
|
||||
"Generated from a generic source workflow.\n\n"
|
||||
"# Source Claim\n\n"
|
||||
"## Definition\n\n"
|
||||
"A claim preserved from the source for later review.\n\n"
|
||||
"## Context\n\n"
|
||||
"Used to keep provenance visible.\n"
|
||||
),
|
||||
},
|
||||
{
|
||||
"stage_id": "extract-relations",
|
||||
"input_artifact_id": "*",
|
||||
"markdown": (
|
||||
"# Knowledge Artifact Supports Source Claim\n\n"
|
||||
"## Subject\n\n"
|
||||
"Knowledge Artifact\n\n"
|
||||
"## Predicate\n\n"
|
||||
"supports\n\n"
|
||||
"## Object\n\n"
|
||||
"Source Claim\n\n"
|
||||
"## Relation Type\n\n"
|
||||
"support\n\n"
|
||||
"## Evidence\n\n"
|
||||
"The source links durable artifacts to explicit claims.\n"
|
||||
),
|
||||
},
|
||||
{
|
||||
"stage_id": "evaluate-entity",
|
||||
"input_artifact_id": "*",
|
||||
"markdown": (
|
||||
"---\n"
|
||||
"artifact_id: entity/knowledge-artifact.md\n"
|
||||
"evaluator: fixture\n"
|
||||
"evaluated_at: '2026-05-14T00:00:00'\n"
|
||||
"scores:\n"
|
||||
" - name: groundedness\n"
|
||||
" value: 4.0\n"
|
||||
" max_value: 5.0\n"
|
||||
" - name: usefulness\n"
|
||||
" value: 4.0\n"
|
||||
" max_value: 5.0\n"
|
||||
"---\n"
|
||||
"\n"
|
||||
"# Evaluation: entity/knowledge-artifact.md\n"
|
||||
),
|
||||
},
|
||||
]
|
||||
}
|
||||
path.write_text(yaml.safe_dump(data, sort_keys=False), encoding="utf-8")
|
||||
|
||||
|
||||
def write_epub_fixture(path: Path) -> None:
|
||||
with zipfile.ZipFile(path, "w") as archive:
|
||||
archive.writestr("OEBPS/chapter1.xhtml", "<h1>Chapter One</h1><p>Alpha beta.</p>")
|
||||
archive.writestr("OEBPS/chapter2.xhtml", "<h1>Chapter Two</h1><p>Gamma delta.</p>")
|
||||
|
||||
|
||||
def test_source_intake_accepts_article_ebook_and_folder(tmp_path: Path) -> None:
|
||||
article = tmp_path / "article.html"
|
||||
article.write_text(
|
||||
"<html><head><title>Article Title</title></head>"
|
||||
"<body><h1>Article Title</h1><p>One two three.</p></body></html>",
|
||||
encoding="utf-8",
|
||||
)
|
||||
ebook = tmp_path / "book.epub"
|
||||
write_epub_fixture(ebook)
|
||||
folder = tmp_path / "collection"
|
||||
folder.mkdir()
|
||||
(folder / "note.md").write_text("# Note\n\nMarkdown source.", encoding="utf-8")
|
||||
(folder / "memo.txt").write_text("Plain text source.", encoding="utf-8")
|
||||
|
||||
article_chunks = normalize_source(article)
|
||||
ebook_chunks = normalize_source(ebook)
|
||||
folder_chunks = normalize_source(folder)
|
||||
|
||||
assert article_chunks[0].source_type == "html"
|
||||
assert article_chunks[0].title == "Article Title"
|
||||
assert article_chunks[0].chunk_id == "article-title"
|
||||
assert article_chunks[0].digest == normalize_source(article)[0].digest
|
||||
assert [chunk.source_type for chunk in ebook_chunks] == ["epub", "epub"]
|
||||
assert {chunk.source_type for chunk in folder_chunks} == {"markdown", "text"}
|
||||
assert all(chunk.markdown.startswith("# ") for chunk in folder_chunks)
|
||||
|
||||
|
||||
def test_generate_from_source_cli_fixture_builds_infospace(tmp_path: Path) -> None:
|
||||
source = tmp_path / "article.md"
|
||||
source.write_text(
|
||||
"# Reusable Knowledge\n\nA source about claims and durable artifacts.",
|
||||
encoding="utf-8",
|
||||
)
|
||||
fixture = tmp_path / "responses.yaml"
|
||||
fixture_responses(fixture)
|
||||
|
||||
result = subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
"-m",
|
||||
"infospace_bench",
|
||||
"generate",
|
||||
"from-source",
|
||||
str(source),
|
||||
"--workspace",
|
||||
str(tmp_path),
|
||||
"--slug",
|
||||
"article-space",
|
||||
"--name",
|
||||
"Article Space",
|
||||
"--fixture-responses",
|
||||
str(fixture),
|
||||
"--apply",
|
||||
],
|
||||
check=False,
|
||||
env=cli_env(),
|
||||
text=True,
|
||||
capture_output=True,
|
||||
)
|
||||
assert result.returncode == 0, result.stderr
|
||||
payload = json.loads(result.stdout)
|
||||
root = Path(payload["root"])
|
||||
status = subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
"-m",
|
||||
"infospace_bench",
|
||||
"generate",
|
||||
"status",
|
||||
str(root),
|
||||
],
|
||||
check=False,
|
||||
env=cli_env(),
|
||||
text=True,
|
||||
capture_output=True,
|
||||
)
|
||||
assert status.returncode == 0, status.stderr
|
||||
status_payload = json.loads(status.stdout)
|
||||
|
||||
assert payload["status"] == "completed"
|
||||
assert (root / "artifacts" / "sources" / "reusable-knowledge.md").is_file()
|
||||
assert (root / "artifacts" / "entities" / "knowledge-artifact.md").is_file()
|
||||
assert (root / "artifacts" / "relations" / "reusable-knowledge-relations.md").is_file()
|
||||
assert (root / "output" / "metrics" / "metrics.yaml").is_file()
|
||||
assert status_payload["source_chunk_count"] == 1
|
||||
assert status_payload["entity_count"] == 2
|
||||
assert status_payload["relation_count"] == 1
|
||||
assert status_payload["stale"] is False
|
||||
|
||||
|
||||
def test_generate_from_ebook_and_folder_fixtures(tmp_path: Path) -> None:
|
||||
fixture = tmp_path / "responses.yaml"
|
||||
fixture_responses(fixture)
|
||||
ebook = tmp_path / "book.epub"
|
||||
write_epub_fixture(ebook)
|
||||
folder = tmp_path / "folder"
|
||||
folder.mkdir()
|
||||
(folder / "first.md").write_text("# First\n\nOne source.", encoding="utf-8")
|
||||
(folder / "second.txt").write_text("Second source.", encoding="utf-8")
|
||||
|
||||
for source, slug, expected_sources in (
|
||||
(ebook, "book-space", 2),
|
||||
(folder, "folder-space", 2),
|
||||
):
|
||||
result = subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
"-m",
|
||||
"infospace_bench",
|
||||
"generate",
|
||||
"from-source",
|
||||
str(source),
|
||||
"--workspace",
|
||||
str(tmp_path),
|
||||
"--slug",
|
||||
slug,
|
||||
"--name",
|
||||
slug.replace("-", " ").title(),
|
||||
"--fixture-responses",
|
||||
str(fixture),
|
||||
"--apply",
|
||||
],
|
||||
check=False,
|
||||
env=cli_env(),
|
||||
text=True,
|
||||
capture_output=True,
|
||||
)
|
||||
assert result.returncode == 0, result.stderr
|
||||
payload = json.loads(result.stdout)
|
||||
status = status_generation(Path(payload["root"]))
|
||||
assert status["source_chunk_count"] == expected_sources
|
||||
assert status["entity_count"] == 2
|
||||
assert status["relation_count"] == expected_sources
|
||||
assert status["history_snapshot_count"] == 1
|
||||
|
||||
|
||||
def test_generator_resume_is_idempotent_and_detects_stale_source(tmp_path: Path) -> None:
|
||||
source = tmp_path / "note.md"
|
||||
source.write_text("# Note\n\nInitial source.", encoding="utf-8")
|
||||
fixture = tmp_path / "responses.yaml"
|
||||
fixture_responses(fixture)
|
||||
root = init_generation_infospace(tmp_path, source, "note-space", name="Note Space").root
|
||||
|
||||
first = run_generation(root, fixture_responses=fixture)
|
||||
second = run_generation(root, fixture_responses=fixture, resume=True)
|
||||
generated_source = root / "artifacts" / "sources" / "note.md"
|
||||
generated_source.write_text("# Note\n\nChanged source.", encoding="utf-8")
|
||||
stale_status = status_generation(root)
|
||||
|
||||
assert first.status == "completed"
|
||||
assert second.status == "skipped"
|
||||
assert second.skipped is True
|
||||
assert stale_status["stale"] is True
|
||||
assert stale_status["stale_sources"] == ["source/note.md"]
|
||||
|
||||
|
||||
def test_openrouter_adapter_uses_model_and_records_metadata() -> None:
|
||||
requests: list[dict] = []
|
||||
|
||||
def transport(payload: dict, headers: dict[str, str], endpoint: str) -> dict:
|
||||
requests.append({"payload": payload, "headers": headers, "endpoint": endpoint})
|
||||
return {
|
||||
"id": "or-request-1",
|
||||
"choices": [{"message": {"content": "# Generated\n\nContent."}}],
|
||||
"usage": {"prompt_tokens": 5, "completion_tokens": 3},
|
||||
}
|
||||
|
||||
adapter = OpenRouterAssistedGenerationAdapter(
|
||||
api_key="test-key",
|
||||
model="openai/gpt-4o-mini",
|
||||
transport=transport,
|
||||
retry_limit=0,
|
||||
)
|
||||
result = adapter.generate(
|
||||
type(
|
||||
"Request",
|
||||
(),
|
||||
{
|
||||
"prompt": "Generate markdown.",
|
||||
"stage_id": "extract-entities",
|
||||
"workflow_id": "generic-source-extract",
|
||||
"input_artifact_id": "source/example.md",
|
||||
"provider_hint": "openrouter",
|
||||
"metadata": {},
|
||||
},
|
||||
)()
|
||||
)
|
||||
|
||||
assert requests[0]["payload"]["model"] == "openai/gpt-4o-mini"
|
||||
assert requests[0]["headers"]["Authorization"] == "Bearer test-key"
|
||||
assert result.markdown == "# Generated\n\nContent."
|
||||
assert result.provider == "openrouter"
|
||||
assert result.metadata["model"] == "openai/gpt-4o-mini"
|
||||
assert result.metadata["request_id"] == "or-request-1"
|
||||
assert result.metadata["usage"]["completion_tokens"] == 3
|
||||
|
||||
|
||||
def test_generic_generator_docs_cover_openrouter_resume_and_cost_caps() -> None:
|
||||
text = Path("docs/generic-source-generator.md").read_text(encoding="utf-8")
|
||||
|
||||
assert "OPENROUTER_API_KEY" in text
|
||||
assert "--model" in text
|
||||
assert "--max-chunks" in text
|
||||
assert "resume" in text.lower()
|
||||
assert "fixture-responses" in text
|
||||
Reference in New Issue
Block a user