from repo_scoping.repo_scanning.scanner import DeterministicScanner from tests.fixtures import ( write_javascript_typescript_package_repo, write_misleading_docs_repo, write_python_cli_repo, write_readme_only_repo, ) def test_deterministic_scanner_extracts_structural_facts(tmp_path): repo = tmp_path / "sample" repo.mkdir() (repo / "README.md").write_text("# MailRouter\n", encoding="utf-8") (repo / "pyproject.toml").write_text( '[project]\ndependencies = ["fastapi", "pytest"]\n', encoding="utf-8", ) (repo / "src").mkdir() (repo / "src" / "routes.py").write_text( "from fastapi import APIRouter\n" "router = APIRouter()\n" '@router.post("/classify-email")\n' "def classify_email():\n" " return {}\n", encoding="utf-8", ) (repo / "tests").mkdir() (repo / "tests" / "test_routes.py").write_text("def test_ok(): pass\n", encoding="utf-8") result = DeterministicScanner().scan(repo) facts = {(fact.kind, fact.name, fact.path) for fact in result.facts} assert result.file_count == 4 assert ("documentation", "README", "README.md") in facts assert ("manifest", "pyproject.toml", "pyproject.toml") in facts assert ("test", "test_routes.py", "tests/test_routes.py") in facts assert ("framework", "FastAPI", "pyproject.toml") in facts assert ("framework", "pytest", "pyproject.toml") in facts assert ("interface", "python route decorator", "src/routes.py") in facts languages = {fact.name: fact.metadata["file_count"] for fact in result.facts if fact.kind == "language"} assert languages == {"Python": 2} def test_scanner_records_intent_and_scope_with_distinct_source_roles(tmp_path): repo = tmp_path / "sample" repo.mkdir() (repo / "INTENT.md").write_text( "# INTENT\n\nProvides planned OIDC profile enforcement.\n", encoding="utf-8", ) (repo / "SCOPE.md").write_text( "# SCOPE\n\n## One-liner\n\nCurrently provides OIDC profile enforcement.\n", encoding="utf-8", ) result = DeterministicScanner().scan(repo) intent_fact = next(fact for fact in result.facts if fact.kind == "intent") assert intent_fact.name == "INTENT" assert intent_fact.path == "INTENT.md" assert intent_fact.metadata["source_role"] == "intent_summary" scope_fact = next(fact for fact in result.facts if fact.kind == "scope") assert scope_fact.name == "SCOPE" assert scope_fact.path == "SCOPE.md" assert scope_fact.metadata["source_role"] == "derived_scope" def test_scanner_readme_only_fixture_records_docs_without_interfaces(tmp_path): repo = write_readme_only_repo(tmp_path) result = DeterministicScanner().scan(repo) facts = {(fact.kind, fact.name, fact.path) for fact in result.facts} assert ("documentation", "README", "README.md") in facts assert {fact.kind for fact in result.facts} == {"documentation"} def test_scanner_python_cli_fixture_records_cli_and_framework_hints(tmp_path): repo = write_python_cli_repo(tmp_path) result = DeterministicScanner().scan(repo) facts = {(fact.kind, fact.name, fact.path) for fact in result.facts} assert ("framework", "Click", "requirements.txt") in facts assert ("framework", "pytest", "requirements.txt") in facts assert ("interface", "python CLI command decorator", "cli.py") in facts assert ("test", "test_cli.py", "tests/test_cli.py") in facts def test_scanner_misleading_docs_fixture_stays_observational(tmp_path): repo = write_misleading_docs_repo(tmp_path) result = DeterministicScanner().scan(repo) assert [(fact.kind, fact.name, fact.path) for fact in result.facts] == [ ("documentation", "README", "README.md") ] def test_scanner_javascript_typescript_package_records_package_facts(tmp_path): repo = write_javascript_typescript_package_repo(tmp_path) result = DeterministicScanner().scan(repo) facts = {(fact.kind, fact.name, fact.path) for fact in result.facts} assert ("language", "TypeScript", "") in facts assert ("manifest", "package.json", "package.json") in facts assert ("framework", "React", "package.json") in facts assert ("framework", "Vite", "package.json") in facts assert ("framework", "Vitest", "package.json") in facts assert ("interface", "possible API surface", "src/api/routes.ts") in facts assert ("test", "routes.spec.ts", "src/api/routes.spec.ts") in facts def test_scanner_ignores_runtime_var_checkouts(tmp_path): repo = tmp_path / "repo-scoping-like" repo.mkdir() (repo / "README.md").write_text("# Repo Scoping\n", encoding="utf-8") checkout = repo / "var" / "checkouts" / "llm-connect" checkout.mkdir(parents=True) (checkout / "README.md").write_text( "# LLM Connect\nSupports OpenRouter fallback.\n", encoding="utf-8", ) (checkout / "providers.py").write_text( "provider_registry = {'openrouter': OpenRouterAdapter}\n", encoding="utf-8", ) result = DeterministicScanner().scan(repo) facts = {(fact.kind, fact.name, fact.path) for fact in result.facts} assert result.file_count == 1 assert ("documentation", "README", "README.md") in facts assert all(not fact.path.startswith("var/") for fact in result.facts) assert ( "llm_provider", "OpenRouter", "var/checkouts/llm-connect/README.md", ) not in facts def test_scanner_records_llm_provider_and_fallback_facts(tmp_path): repo = tmp_path / "llm-connect-like" repo.mkdir() (repo / "README.md").write_text( "# LLM Connect\nSupports OpenRouter and Claude fallback.\n", encoding="utf-8", ) (repo / ".env.example").write_text( "OPENROUTER_API_KEY=\nANTHROPIC_API_KEY=\n", encoding="utf-8", ) (repo / "providers.py").write_text( "provider_registry = {'openrouter': OpenRouterAdapter, 'anthropic': ClaudeAdapter}\n" "fallback_provider = 'claude'\n", encoding="utf-8", ) result = DeterministicScanner().scan(repo) facts = {(fact.kind, fact.name, fact.path) for fact in result.facts} assert ("llm_provider", "OpenRouter", "README.md") in facts assert ("llm_provider", "Claude", "README.md") in facts assert ("llm_provider", "Anthropic", ".env.example") in facts assert ("credential_config", "OpenRouter API key", ".env.example") in facts assert ("credential_config", "Anthropic API key", ".env.example") in facts assert ("provider_registry", "LLM provider registry", "providers.py") in facts assert ("fallback_policy", "LLM provider fallback policy", "README.md") in facts by_key = {(fact.kind, fact.name, fact.path): fact for fact in result.facts} assert by_key[("llm_provider", "OpenRouter", "README.md")].metadata[ "utility_relationship" ] == "mention" assert by_key[("llm_provider", "OpenRouter", "providers.py")].metadata[ "utility_relationship" ] == "adapter" assert by_key[("credential_config", "OpenRouter API key", ".env.example")].metadata[ "utility_relationship" ] == "configure" assert by_key[("provider_registry", "LLM provider registry", "providers.py")].metadata[ "utility_relationship" ] == "adapter" def test_scanner_does_not_treat_agent_guidance_as_llm_provider(tmp_path): repo = tmp_path / "key-cape-like" repo.mkdir() (repo / "README.md").write_text( "# KeyCape\n\n" "Backend adapters live in src/internal/adapters.\n\n" "See `CLAUDE.md` for agent session protocol.\n", encoding="utf-8", ) (repo / "CLAUDE.md").write_text( "# CLAUDE.md\n\n" "This file provides guidance to Claude Code when working in this repo.\n", encoding="utf-8", ) (repo / "src").mkdir() (repo / "src" / "go.mod").write_text("module keycape\n", encoding="utf-8") result = DeterministicScanner().scan(repo) facts = {(fact.kind, fact.name, fact.path) for fact in result.facts} assert ("llm_provider", "Claude", "CLAUDE.md") not in facts assert ("llm_provider", "Claude", "README.md") not in facts assert ("provider_registry", "LLM provider registry", "README.md") not in facts