from repo_registry.repo_scanning.scanner import DeterministicScanner from tests.fixtures import ( write_javascript_typescript_package_repo, write_misleading_docs_repo, write_python_cli_repo, write_readme_only_repo, ) def test_deterministic_scanner_extracts_structural_facts(tmp_path): repo = tmp_path / "sample" repo.mkdir() (repo / "README.md").write_text("# MailRouter\n", encoding="utf-8") (repo / "pyproject.toml").write_text( '[project]\ndependencies = ["fastapi", "pytest"]\n', encoding="utf-8", ) (repo / "src").mkdir() (repo / "src" / "routes.py").write_text( "from fastapi import APIRouter\n" "router = APIRouter()\n" '@router.post("/classify-email")\n' "def classify_email():\n" " return {}\n", encoding="utf-8", ) (repo / "tests").mkdir() (repo / "tests" / "test_routes.py").write_text("def test_ok(): pass\n", encoding="utf-8") result = DeterministicScanner().scan(repo) facts = {(fact.kind, fact.name, fact.path) for fact in result.facts} assert result.file_count == 4 assert ("documentation", "README", "README.md") in facts assert ("manifest", "pyproject.toml", "pyproject.toml") in facts assert ("test", "test_routes.py", "tests/test_routes.py") in facts assert ("framework", "FastAPI", "pyproject.toml") in facts assert ("framework", "pytest", "pyproject.toml") in facts assert ("interface", "python route decorator", "src/routes.py") in facts languages = {fact.name: fact.metadata["file_count"] for fact in result.facts if fact.kind == "language"} assert languages == {"Python": 2} def test_scanner_readme_only_fixture_records_docs_without_interfaces(tmp_path): repo = write_readme_only_repo(tmp_path) result = DeterministicScanner().scan(repo) facts = {(fact.kind, fact.name, fact.path) for fact in result.facts} assert ("documentation", "README", "README.md") in facts assert {fact.kind for fact in result.facts} == {"documentation"} def test_scanner_python_cli_fixture_records_cli_and_framework_hints(tmp_path): repo = write_python_cli_repo(tmp_path) result = DeterministicScanner().scan(repo) facts = {(fact.kind, fact.name, fact.path) for fact in result.facts} assert ("framework", "Click", "requirements.txt") in facts assert ("framework", "pytest", "requirements.txt") in facts assert ("interface", "python CLI command decorator", "cli.py") in facts assert ("test", "test_cli.py", "tests/test_cli.py") in facts def test_scanner_misleading_docs_fixture_stays_observational(tmp_path): repo = write_misleading_docs_repo(tmp_path) result = DeterministicScanner().scan(repo) assert [(fact.kind, fact.name, fact.path) for fact in result.facts] == [ ("documentation", "README", "README.md") ] def test_scanner_javascript_typescript_package_records_package_facts(tmp_path): repo = write_javascript_typescript_package_repo(tmp_path) result = DeterministicScanner().scan(repo) facts = {(fact.kind, fact.name, fact.path) for fact in result.facts} assert ("language", "TypeScript", "") in facts assert ("manifest", "package.json", "package.json") in facts assert ("framework", "React", "package.json") in facts assert ("framework", "Vite", "package.json") in facts assert ("framework", "Vitest", "package.json") in facts assert ("interface", "possible API surface", "src/api/routes.ts") in facts assert ("test", "routes.spec.ts", "src/api/routes.spec.ts") in facts def test_scanner_records_llm_provider_and_fallback_facts(tmp_path): repo = tmp_path / "llm-connect-like" repo.mkdir() (repo / "README.md").write_text( "# LLM Connect\nSupports OpenRouter and Claude fallback.\n", encoding="utf-8", ) (repo / ".env.example").write_text( "OPENROUTER_API_KEY=\nANTHROPIC_API_KEY=\n", encoding="utf-8", ) (repo / "providers.py").write_text( "provider_registry = {'openrouter': OpenRouterAdapter, 'anthropic': ClaudeAdapter}\n" "fallback_provider = 'claude'\n", encoding="utf-8", ) result = DeterministicScanner().scan(repo) facts = {(fact.kind, fact.name, fact.path) for fact in result.facts} assert ("llm_provider", "OpenRouter", "README.md") in facts assert ("llm_provider", "Claude", "README.md") in facts assert ("llm_provider", "Anthropic", ".env.example") in facts assert ("credential_config", "OpenRouter API key", ".env.example") in facts assert ("credential_config", "Anthropic API key", ".env.example") in facts assert ("provider_registry", "LLM provider registry", "providers.py") in facts assert ("fallback_policy", "LLM provider fallback policy", "README.md") in facts