from repo_registry.repo_scanning.scanner import DeterministicScanner from tests.fixtures import ( write_misleading_docs_repo, write_python_cli_repo, write_readme_only_repo, ) def test_deterministic_scanner_extracts_structural_facts(tmp_path): repo = tmp_path / "sample" repo.mkdir() (repo / "README.md").write_text("# MailRouter\n", encoding="utf-8") (repo / "pyproject.toml").write_text( '[project]\ndependencies = ["fastapi", "pytest"]\n', encoding="utf-8", ) (repo / "src").mkdir() (repo / "src" / "routes.py").write_text( "from fastapi import APIRouter\n" "router = APIRouter()\n" '@router.post("/classify-email")\n' "def classify_email():\n" " return {}\n", encoding="utf-8", ) (repo / "tests").mkdir() (repo / "tests" / "test_routes.py").write_text("def test_ok(): pass\n", encoding="utf-8") result = DeterministicScanner().scan(repo) facts = {(fact.kind, fact.name, fact.path) for fact in result.facts} assert result.file_count == 4 assert ("documentation", "README", "README.md") in facts assert ("manifest", "pyproject.toml", "pyproject.toml") in facts assert ("test", "test_routes.py", "tests/test_routes.py") in facts assert ("framework", "FastAPI", "pyproject.toml") in facts assert ("framework", "pytest", "pyproject.toml") in facts assert ("interface", "python route decorator", "src/routes.py") in facts languages = {fact.name: fact.metadata["file_count"] for fact in result.facts if fact.kind == "language"} assert languages == {"Python": 2} def test_scanner_readme_only_fixture_records_docs_without_interfaces(tmp_path): repo = write_readme_only_repo(tmp_path) result = DeterministicScanner().scan(repo) facts = {(fact.kind, fact.name, fact.path) for fact in result.facts} assert ("documentation", "README", "README.md") in facts assert {fact.kind for fact in result.facts} == {"documentation"} def test_scanner_python_cli_fixture_records_cli_and_framework_hints(tmp_path): repo = write_python_cli_repo(tmp_path) result = DeterministicScanner().scan(repo) facts = {(fact.kind, fact.name, fact.path) for fact in result.facts} assert ("framework", "Click", "requirements.txt") in facts assert ("framework", "pytest", "requirements.txt") in facts assert ("interface", "python CLI command decorator", "cli.py") in facts assert ("test", "test_cli.py", "tests/test_cli.py") in facts def test_scanner_misleading_docs_fixture_stays_observational(tmp_path): repo = write_misleading_docs_repo(tmp_path) result = DeterministicScanner().scan(repo) assert [(fact.kind, fact.name, fact.path) for fact in result.facts] == [ ("documentation", "README", "README.md") ]