generated from coulomb/repo-seed
302 lines
10 KiB
Python
302 lines
10 KiB
Python
import json
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import zipfile
|
|
from pathlib import Path
|
|
|
|
import yaml
|
|
|
|
from infospace_bench.generator import (
|
|
init_generation_infospace,
|
|
run_generation,
|
|
status_generation,
|
|
)
|
|
from infospace_bench.openrouter import OpenRouterAssistedGenerationAdapter
|
|
from infospace_bench.source_intake import normalize_source
|
|
|
|
|
|
def cli_env() -> dict[str, str]:
|
|
env = os.environ.copy()
|
|
env["PYTHONPATH"] = "src:/home/worsch/markitect-tool/src"
|
|
return env
|
|
|
|
|
|
def fixture_responses(path: Path) -> None:
|
|
data = {
|
|
"responses": [
|
|
{
|
|
"stage_id": "summarize-source",
|
|
"input_artifact_id": "*",
|
|
"markdown": "# Source Summary\n\nThe source describes reusable knowledge work.\n",
|
|
},
|
|
{
|
|
"stage_id": "extract-entities",
|
|
"input_artifact_id": "*",
|
|
"markdown": (
|
|
"# Knowledge Artifact\n\n"
|
|
"## Definition\n\n"
|
|
"A durable unit of structured knowledge derived from a source.\n\n"
|
|
"## Context\n\n"
|
|
"Generated from a generic source workflow.\n\n"
|
|
"# Source Claim\n\n"
|
|
"## Definition\n\n"
|
|
"A claim preserved from the source for later review.\n\n"
|
|
"## Context\n\n"
|
|
"Used to keep provenance visible.\n"
|
|
),
|
|
},
|
|
{
|
|
"stage_id": "extract-relations",
|
|
"input_artifact_id": "*",
|
|
"markdown": (
|
|
"# Knowledge Artifact Supports Source Claim\n\n"
|
|
"## Subject\n\n"
|
|
"Knowledge Artifact\n\n"
|
|
"## Predicate\n\n"
|
|
"supports\n\n"
|
|
"## Object\n\n"
|
|
"Source Claim\n\n"
|
|
"## Relation Type\n\n"
|
|
"support\n\n"
|
|
"## Evidence\n\n"
|
|
"The source links durable artifacts to explicit claims.\n"
|
|
),
|
|
},
|
|
{
|
|
"stage_id": "evaluate-entity",
|
|
"input_artifact_id": "*",
|
|
"markdown": (
|
|
"---\n"
|
|
"artifact_id: entity/knowledge-artifact.md\n"
|
|
"evaluator: fixture\n"
|
|
"evaluated_at: '2026-05-14T00:00:00'\n"
|
|
"scores:\n"
|
|
" - name: groundedness\n"
|
|
" value: 4.0\n"
|
|
" max_value: 5.0\n"
|
|
" - name: usefulness\n"
|
|
" value: 4.0\n"
|
|
" max_value: 5.0\n"
|
|
"---\n"
|
|
"\n"
|
|
"# Evaluation: entity/knowledge-artifact.md\n"
|
|
),
|
|
},
|
|
]
|
|
}
|
|
path.write_text(yaml.safe_dump(data, sort_keys=False), encoding="utf-8")
|
|
|
|
|
|
def write_epub_fixture(path: Path) -> None:
|
|
with zipfile.ZipFile(path, "w") as archive:
|
|
archive.writestr("OEBPS/chapter1.xhtml", "<h1>Chapter One</h1><p>Alpha beta.</p>")
|
|
archive.writestr("OEBPS/chapter2.xhtml", "<h1>Chapter Two</h1><p>Gamma delta.</p>")
|
|
|
|
|
|
def test_source_intake_accepts_article_ebook_and_folder(tmp_path: Path) -> None:
|
|
article = tmp_path / "article.html"
|
|
article.write_text(
|
|
"<html><head><title>Article Title</title></head>"
|
|
"<body><h1>Article Title</h1><p>One two three.</p></body></html>",
|
|
encoding="utf-8",
|
|
)
|
|
ebook = tmp_path / "book.epub"
|
|
write_epub_fixture(ebook)
|
|
folder = tmp_path / "collection"
|
|
folder.mkdir()
|
|
(folder / "note.md").write_text("# Note\n\nMarkdown source.", encoding="utf-8")
|
|
(folder / "memo.txt").write_text("Plain text source.", encoding="utf-8")
|
|
|
|
article_chunks = normalize_source(article)
|
|
ebook_chunks = normalize_source(ebook)
|
|
folder_chunks = normalize_source(folder)
|
|
|
|
assert article_chunks[0].source_type == "html"
|
|
assert article_chunks[0].title == "Article Title"
|
|
assert article_chunks[0].chunk_id == "article-title"
|
|
assert article_chunks[0].digest == normalize_source(article)[0].digest
|
|
assert [chunk.source_type for chunk in ebook_chunks] == ["epub", "epub"]
|
|
assert {chunk.source_type for chunk in folder_chunks} == {"markdown", "text"}
|
|
assert all(chunk.markdown.startswith("# ") for chunk in folder_chunks)
|
|
|
|
|
|
def test_generate_from_source_cli_fixture_builds_infospace(tmp_path: Path) -> None:
|
|
source = tmp_path / "article.md"
|
|
source.write_text(
|
|
"# Reusable Knowledge\n\nA source about claims and durable artifacts.",
|
|
encoding="utf-8",
|
|
)
|
|
fixture = tmp_path / "responses.yaml"
|
|
fixture_responses(fixture)
|
|
|
|
result = subprocess.run(
|
|
[
|
|
sys.executable,
|
|
"-m",
|
|
"infospace_bench",
|
|
"generate",
|
|
"from-source",
|
|
str(source),
|
|
"--workspace",
|
|
str(tmp_path),
|
|
"--slug",
|
|
"article-space",
|
|
"--name",
|
|
"Article Space",
|
|
"--fixture-responses",
|
|
str(fixture),
|
|
"--apply",
|
|
],
|
|
check=False,
|
|
env=cli_env(),
|
|
text=True,
|
|
capture_output=True,
|
|
)
|
|
assert result.returncode == 0, result.stderr
|
|
payload = json.loads(result.stdout)
|
|
root = Path(payload["root"])
|
|
status = subprocess.run(
|
|
[
|
|
sys.executable,
|
|
"-m",
|
|
"infospace_bench",
|
|
"generate",
|
|
"status",
|
|
str(root),
|
|
],
|
|
check=False,
|
|
env=cli_env(),
|
|
text=True,
|
|
capture_output=True,
|
|
)
|
|
assert status.returncode == 0, status.stderr
|
|
status_payload = json.loads(status.stdout)
|
|
|
|
assert payload["status"] == "completed"
|
|
assert (root / "artifacts" / "sources" / "reusable-knowledge.md").is_file()
|
|
assert (root / "artifacts" / "entities" / "knowledge-artifact.md").is_file()
|
|
assert (root / "artifacts" / "relations" / "reusable-knowledge-relations.md").is_file()
|
|
assert (root / "output" / "metrics" / "metrics.yaml").is_file()
|
|
assert status_payload["source_chunk_count"] == 1
|
|
assert status_payload["entity_count"] == 2
|
|
assert status_payload["relation_count"] == 1
|
|
assert status_payload["stale"] is False
|
|
|
|
|
|
def test_generate_from_ebook_and_folder_fixtures(tmp_path: Path) -> None:
|
|
fixture = tmp_path / "responses.yaml"
|
|
fixture_responses(fixture)
|
|
ebook = tmp_path / "book.epub"
|
|
write_epub_fixture(ebook)
|
|
folder = tmp_path / "folder"
|
|
folder.mkdir()
|
|
(folder / "first.md").write_text("# First\n\nOne source.", encoding="utf-8")
|
|
(folder / "second.txt").write_text("Second source.", encoding="utf-8")
|
|
|
|
for source, slug, expected_sources in (
|
|
(ebook, "book-space", 2),
|
|
(folder, "folder-space", 2),
|
|
):
|
|
result = subprocess.run(
|
|
[
|
|
sys.executable,
|
|
"-m",
|
|
"infospace_bench",
|
|
"generate",
|
|
"from-source",
|
|
str(source),
|
|
"--workspace",
|
|
str(tmp_path),
|
|
"--slug",
|
|
slug,
|
|
"--name",
|
|
slug.replace("-", " ").title(),
|
|
"--fixture-responses",
|
|
str(fixture),
|
|
"--apply",
|
|
],
|
|
check=False,
|
|
env=cli_env(),
|
|
text=True,
|
|
capture_output=True,
|
|
)
|
|
assert result.returncode == 0, result.stderr
|
|
payload = json.loads(result.stdout)
|
|
status = status_generation(Path(payload["root"]))
|
|
assert status["source_chunk_count"] == expected_sources
|
|
assert status["entity_count"] == 2
|
|
assert status["relation_count"] == expected_sources
|
|
assert status["history_snapshot_count"] == 1
|
|
|
|
|
|
def test_generator_resume_is_idempotent_and_detects_stale_source(tmp_path: Path) -> None:
|
|
source = tmp_path / "note.md"
|
|
source.write_text("# Note\n\nInitial source.", encoding="utf-8")
|
|
fixture = tmp_path / "responses.yaml"
|
|
fixture_responses(fixture)
|
|
root = init_generation_infospace(tmp_path, source, "note-space", name="Note Space").root
|
|
|
|
first = run_generation(root, fixture_responses=fixture)
|
|
second = run_generation(root, fixture_responses=fixture, resume=True)
|
|
generated_source = root / "artifacts" / "sources" / "note.md"
|
|
generated_source.write_text("# Note\n\nChanged source.", encoding="utf-8")
|
|
stale_status = status_generation(root)
|
|
|
|
assert first.status == "completed"
|
|
assert second.status == "skipped"
|
|
assert second.skipped is True
|
|
assert stale_status["stale"] is True
|
|
assert stale_status["stale_sources"] == ["source/note.md"]
|
|
|
|
|
|
def test_openrouter_adapter_uses_model_and_records_metadata() -> None:
|
|
requests: list[dict] = []
|
|
|
|
def transport(payload: dict, headers: dict[str, str], endpoint: str) -> dict:
|
|
requests.append({"payload": payload, "headers": headers, "endpoint": endpoint})
|
|
return {
|
|
"id": "or-request-1",
|
|
"choices": [{"message": {"content": "# Generated\n\nContent."}}],
|
|
"usage": {"prompt_tokens": 5, "completion_tokens": 3},
|
|
}
|
|
|
|
adapter = OpenRouterAssistedGenerationAdapter(
|
|
api_key="test-key",
|
|
model="openai/gpt-4o-mini",
|
|
transport=transport,
|
|
retry_limit=0,
|
|
)
|
|
result = adapter.generate(
|
|
type(
|
|
"Request",
|
|
(),
|
|
{
|
|
"prompt": "Generate markdown.",
|
|
"stage_id": "extract-entities",
|
|
"workflow_id": "generic-source-extract",
|
|
"input_artifact_id": "source/example.md",
|
|
"provider_hint": "openrouter",
|
|
"metadata": {},
|
|
},
|
|
)()
|
|
)
|
|
|
|
assert requests[0]["payload"]["model"] == "openai/gpt-4o-mini"
|
|
assert requests[0]["headers"]["Authorization"] == "Bearer test-key"
|
|
assert result.markdown == "# Generated\n\nContent."
|
|
assert result.provider == "openrouter"
|
|
assert result.metadata["model"] == "openai/gpt-4o-mini"
|
|
assert result.metadata["request_id"] == "or-request-1"
|
|
assert result.metadata["usage"]["completion_tokens"] == 3
|
|
|
|
|
|
def test_generic_generator_docs_cover_openrouter_resume_and_cost_caps() -> None:
|
|
text = Path("docs/generic-source-generator.md").read_text(encoding="utf-8")
|
|
|
|
assert "OPENROUTER_API_KEY" in text
|
|
assert "--model" in text
|
|
assert "--max-chunks" in text
|
|
assert "resume" in text.lower()
|
|
assert "fixture-responses" in text
|