from repo_registry.repo_scanning.scanner import DeterministicScanner def test_deterministic_scanner_extracts_structural_facts(tmp_path): repo = tmp_path / "sample" repo.mkdir() (repo / "README.md").write_text("# MailRouter\n", encoding="utf-8") (repo / "pyproject.toml").write_text( '[project]\ndependencies = ["fastapi", "pytest"]\n', encoding="utf-8", ) (repo / "src").mkdir() (repo / "src" / "routes.py").write_text( "from fastapi import APIRouter\n" "router = APIRouter()\n" '@router.post("/classify-email")\n' "def classify_email():\n" " return {}\n", encoding="utf-8", ) (repo / "tests").mkdir() (repo / "tests" / "test_routes.py").write_text("def test_ok(): pass\n", encoding="utf-8") result = DeterministicScanner().scan(repo) facts = {(fact.kind, fact.name, fact.path) for fact in result.facts} assert result.file_count == 4 assert ("documentation", "README", "README.md") in facts assert ("manifest", "pyproject.toml", "pyproject.toml") in facts assert ("test", "test_routes.py", "tests/test_routes.py") in facts assert ("framework", "FastAPI", "pyproject.toml") in facts assert ("framework", "pytest", "pyproject.toml") in facts assert ("interface", "python route decorator", "src/routes.py") in facts languages = {fact.name: fact.metadata["file_count"] for fact in result.facts if fact.kind == "language"} assert languages == {"Python": 2}