import json import logging import subprocess from repo_registry.core.logging import LOGGER_NAME from repo_registry.core.service import RegistryService from repo_registry.llm_extraction import ExtractedAbility, ExtractedCapability from repo_registry.repo_ingestion.git import GitIngestionService from repo_registry.semantic import HashingEmbeddingProvider from repo_registry.storage.sqlite import NotFoundError, RegistryStore from tests.fixtures import ( write_empty_repo, write_javascript_typescript_package_repo, write_misleading_docs_repo, write_python_cli_repo, write_readme_only_repo, ) def make_service(tmp_path): store = RegistryStore(tmp_path / "registry.sqlite3") store.initialize() return RegistryService(store, ingestion=GitIngestionService(tmp_path / "checkouts")) class FakeLLMExtractor: def __init__(self, abilities): self.abilities = abilities self.calls = [] def extract(self, repository, chunks): self.calls.append((repository, chunks)) return self.abilities class FailingLLMExtractor: def extract(self, repository, chunks): raise RuntimeError("provider unavailable") def test_manual_registry_builds_ability_map(tmp_path): service = make_service(tmp_path) repository = service.register_repository( name="MailRouter", url="https://example.com/mail-router.git", description="Routes incoming customer email", ) ability_id = service.add_ability( repository.id, name="Business Email Routing", description="Route inbound messages to the right department.", confidence=0.92, ) capability_id = service.add_capability( repository.id, ability_id, name="Classify Incoming Email", description="Classify messages into intent categories.", inputs=["subject", "body"], outputs=["intent", "confidence"], confidence=0.88, ) service.add_feature( repository.id, capability_id, name="POST /api/classify-email", type="REST endpoint", location="src/routes/classify_email.py", confidence=0.84, ) service.add_evidence( repository.id, capability_id, type="unit_test", reference="tests/test_email_classification.py", strength="strong", ) ability_map = service.ability_map(repository.id) assert ability_map.repository.name == "MailRouter" assert ability_map.abilities[0].name == "Business Email Routing" capability = ability_map.abilities[0].capabilities[0] assert capability.name == "Classify Incoming Email" assert capability.inputs == ["subject", "body"] assert capability.features[0].location == "src/routes/classify_email.py" assert capability.evidence[0].strength == "strong" def test_manual_registry_updates_and_deletes_approved_entries(tmp_path): service = make_service(tmp_path) repository = service.register_repository( name="Manual", url="https://example.com/manual.git", description="Manual registry fixture.", ) ability_id = service.add_ability(repository.id, name="Original Ability") capability_id = service.add_capability( repository.id, ability_id, name="Original Capability", ) feature_id = service.add_feature( repository.id, capability_id, name="Original Feature", type="API", ) evidence_id = service.add_evidence( repository.id, capability_id, type="test", reference="tests/test_original.py", ) service.update_ability(repository.id, ability_id, name="Updated Ability") service.update_capability( repository.id, capability_id, name="Updated Capability", inputs=["request"], outputs=["response"], ) service.update_feature(repository.id, feature_id, location="src/api.py") ability_map = service.update_evidence( repository.id, evidence_id, strength="strong", ) ability = ability_map.abilities[0] capability = ability.capabilities[0] assert ability.name == "Updated Ability" assert capability.name == "Updated Capability" assert capability.inputs == ["request"] assert capability.outputs == ["response"] assert capability.features[0].location == "src/api.py" assert capability.evidence[0].strength == "strong" service.delete_feature(repository.id, feature_id) service.delete_evidence(repository.id, evidence_id) ability_map = service.delete_capability(repository.id, capability_id) assert ability_map.abilities[0].capabilities == [] ability_map = service.delete_ability(repository.id, ability_id) assert ability_map.abilities == [] def test_repository_update_and_delete(tmp_path): service = make_service(tmp_path) repository = service.register_repository( name="Original", url="https://example.com/original.git", description="Original description.", ) ability_id = service.add_ability(repository.id, name="Original Ability") updated = service.update_repository( repository.id, name="Updated", description="Updated description.", branch="develop", ) assert updated.name == "Updated" assert updated.description == "Updated description." assert updated.branch == "develop" assert service.ability_map(repository.id).abilities[0].id == ability_id service.delete_repository(repository.id) try: service.get_repository(repository.id) except NotFoundError as exc: assert "repository" in str(exc) else: raise AssertionError("expected a NotFoundError") def test_search_matches_approved_abilities_and_capabilities(tmp_path): service = make_service(tmp_path) repository = service.register_repository( name="MailRouter", url="https://example.com/mail-router.git", description="Manual test repository.", ) ability_id = service.add_ability( repository.id, name="Business Email Routing", description="Route inbound messages.", ) service.add_capability( repository.id, ability_id, name="Classify Incoming Email", description="Classify messages into intent categories.", ) results = service.search("classify") assert len(results) == 1 assert results[0].repository_name == "MailRouter" assert results[0].match_type == "capability" assert results[0].match_name == "Classify Incoming Email" abilities = service.list_abilities() capabilities = service.list_capabilities() assert abilities[0].repository_name == "MailRouter" assert abilities[0].name == "Business Email Routing" assert capabilities[0].ability_name == "Business Email Routing" assert capabilities[0].name == "Classify Incoming Email" def test_search_matches_features_and_evidence_with_context(tmp_path): service = make_service(tmp_path) repository = service.register_repository( name="MailRouter", url="https://example.com/mail-router-feature.git", description="Manual test repository.", ) ability_id = service.add_ability(repository.id, name="Business Email Routing") capability_id = service.add_capability( repository.id, ability_id, name="Classify Incoming Email", ) service.add_feature( repository.id, capability_id, name="POST /api/classify-email", type="REST endpoint", location="src/routes/classify_email.py", ) service.add_evidence( repository.id, capability_id, type="unit_test", reference="tests/test_email_classification.py", strength="strong", ) feature_results = service.search("classify_email") evidence_results = service.search("unit_test") assert feature_results[0].match_type == "feature" assert feature_results[0].matched_field == "location" assert feature_results[0].ability_name == "Business Email Routing" assert feature_results[0].capability_name == "Classify Incoming Email" assert feature_results[0].source_reference == "src/routes/classify_email.py" assert evidence_results[0].match_type == "evidence" assert evidence_results[0].evidence_level == "strong" assert evidence_results[0].confidence == 0.9 def test_search_filters_by_status_language_and_framework(tmp_path): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text("# Filterable\n", encoding="utf-8") (source / "requirements.txt").write_text("fastapi\n", encoding="utf-8") (source / "app.py").write_text( "from fastapi import FastAPI\n" "app = FastAPI()\n" '@app.get("/health")\n' "def health():\n" " return {}\n", encoding="utf-8", ) service = make_service(tmp_path) repository = service.register_repository(name="Filterable", url=str(source)) summary = service.analyze_repository(repository.id) service.approve_candidate_graph(repository.id, summary.analysis_run.id) results = service.search( "repository", status="indexed", language="Python", framework="FastAPI", ability="Support Filterable", capability="Repository Structure", ) wrong_language_results = service.search( "repository", status="indexed", language="TypeScript", framework="FastAPI", ) wrong_capability_results = service.search( "repository", status="indexed", language="Python", framework="FastAPI", capability="Email Routing", ) assert results assert {result.repository_name for result in results} == {"Filterable"} assert wrong_language_results == [] assert wrong_capability_results == [] def test_fixture_breadth_readme_only_repo_stays_conservative(tmp_path): source = write_readme_only_repo(tmp_path) service = make_service(tmp_path) repository = service.register_repository(name="Readme Only", url=str(source)) summary = service.analyze_repository(repository.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id) assert summary.analysis_run.status == "completed" assert graph.abilities[0].confidence == 0.45 assert graph.abilities[0].capabilities == [] assert service.ability_map(repository.id).abilities == [] def test_fixture_breadth_python_cli_repo_extracts_reviewable_cli_claims(tmp_path): source = write_python_cli_repo(tmp_path) service = make_service(tmp_path) repository = service.register_repository(name="Python CLI", url=str(source)) summary = service.analyze_repository(repository.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id) capability = graph.abilities[0].capabilities[0] assert summary.analysis_run.status == "completed" assert capability.name == "Expose Repository Interface" assert capability.features[0].type == "CLI" assert capability.features[0].name.startswith("CLI command surface:") assert capability.evidence[0].reference == "tests/test_cli.py" assert service.ability_map(repository.id).abilities == [] def test_fixture_breadth_javascript_typescript_package_extracts_structure_and_api(tmp_path): source = write_javascript_typescript_package_repo(tmp_path) service = make_service(tmp_path) repository = service.register_repository(name="JS TS Package", url=str(source)) summary = service.analyze_repository(repository.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id) fact_names = {(fact.kind, fact.name, fact.path) for fact in summary.facts} capability_names = { capability.name for ability in graph.abilities for capability in ability.capabilities } feature_types = { feature.type for ability in graph.abilities for capability in ability.capabilities for feature in capability.features } assert ("language", "TypeScript", "") in fact_names assert ("framework", "React", "package.json") in fact_names assert ("framework", "Vitest", "package.json") in fact_names assert "Expose Repository Interface" in capability_names assert "Describe Repository Structure" in capability_names assert "API" in feature_types assert service.ability_map(repository.id).abilities == [] def test_fixture_breadth_misleading_docs_do_not_become_approved_truth(tmp_path): source = write_misleading_docs_repo(tmp_path) service = make_service(tmp_path) repository = service.register_repository(name="Misleading Docs", url=str(source)) summary = service.analyze_repository(repository.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id) ability_map = service.ability_map(repository.id) assert summary.analysis_run.status == "completed" assert graph.abilities[0].confidence == 0.45 assert graph.abilities[0].capabilities == [] assert ability_map.abilities == [] def test_fixture_breadth_empty_repo_produces_no_candidate_claims(tmp_path): source = write_empty_repo(tmp_path) service = make_service(tmp_path) repository = service.register_repository(name="Empty Repo", url=str(source)) summary = service.analyze_repository(repository.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id) assert summary.analysis_run.status == "completed" assert summary.snapshot is not None assert summary.snapshot.file_count == 0 assert summary.facts == [] assert graph.abilities == [] assert service.ability_map(repository.id).abilities == [] def test_semantic_search_adds_hybrid_matches_without_changing_text_default(tmp_path): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text( "# Queue Worker\n\nHandles postponed customer jobs.\n", encoding="utf-8", ) text_service = make_service(tmp_path) repository = text_service.register_repository( name="Queue Worker", url=str(source), description="Processes deferred customer work.", ) ability_id = text_service.add_ability( repository.id, name="Background Job Processing", description="Run deferred work outside request handling.", confidence=0.8, ) capability_id = text_service.add_capability( repository.id, ability_id, name="Process Customer Tasks", description="Execute queued customer tasks asynchronously.", confidence=0.7, ) text_service.add_feature( repository.id, capability_id, name="worker task loop", type="background worker", location="worker.py", confidence=0.6, ) text_service.analyze_repository(repository.id) assert text_service.search("customer queued") == [] semantic_service = RegistryService( text_service.store, ingestion=GitIngestionService(tmp_path / "checkouts"), embedding_provider=HashingEmbeddingProvider(), ) results = semantic_service.search("customer queued") assert results assert results[0].match_type in {"capability", "content_chunk"} assert results[0].matched_field == "semantic" assert results[0].vector_score > 0 assert results[0].hybrid_score >= results[0].vector_score * 0.35 assert any(result.match_type == "content_chunk" for result in results) def test_register_repository_imports_metadata_when_name_is_omitted(tmp_path): source = tmp_path / "metadata-source" source.mkdir() (source / "pyproject.toml").write_text( '[project]\nname = "metadata-source"\ndescription = "Imported description."\n', encoding="utf-8", ) service = make_service(tmp_path) repository = service.register_repository(url=str(source)) assert repository.name == "metadata-source" assert repository.description == "Imported description." def test_operational_logging_records_analysis_and_review_events(tmp_path, caplog): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text("# Logged\n", encoding="utf-8") (source / "app.py").write_text( "from fastapi import FastAPI\n" "app = FastAPI()\n" '@app.get("/health")\n' "def health():\n" " return {}\n", encoding="utf-8", ) service = make_service(tmp_path) with caplog.at_level(logging.INFO, logger=LOGGER_NAME): repository = service.register_repository(name="Logged", url=str(source)) summary = service.analyze_repository(repository.id) service.approve_candidate_graph(repository.id, summary.analysis_run.id) events = [json.loads(record.message)["event"] for record in caplog.records] assert "repository_registered" in events assert "analysis_started" in events assert "analysis_completed" in events assert "review_decision_recorded" in events def test_capability_must_belong_to_repository(tmp_path): service = make_service(tmp_path) first = service.register_repository( name="First", url="https://example.com/first.git", description="Manual first repository.", ) second = service.register_repository( name="Second", url="https://example.com/second.git", description="Manual second repository.", ) ability_id = service.add_ability(first.id, name="Document Classification") try: service.add_capability(second.id, ability_id, name="Classify Document") except NotFoundError as exc: assert "ability" in str(exc) else: raise AssertionError("expected a NotFoundError") def test_analyze_repository_records_snapshot_and_observed_facts(tmp_path): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text("# Example\n", encoding="utf-8") (source / "requirements.txt").write_text("fastapi\n", encoding="utf-8") (source / "app.py").write_text( "from fastapi import FastAPI\n" "app = FastAPI()\n" '@app.get("/health")\n' "def health():\n" " return {}\n", encoding="utf-8", ) service = make_service(tmp_path) repository = service.register_repository( name="Example", url=str(source), description="A local fixture repository", ) summary = service.analyze_repository(repository.id) assert summary.analysis_run.status == "completed" assert summary.snapshot is not None assert summary.snapshot.file_count == 3 assert service.get_repository(repository.id).status == "analyzed" fact_names = {(fact.kind, fact.name, fact.path) for fact in summary.facts} assert ("documentation", "README", "README.md") in fact_names assert ("framework", "FastAPI", "requirements.txt") in fact_names assert ("interface", "python route decorator", "app.py") in fact_names chunks = service.list_content_chunks(repository.id, summary.analysis_run.id) chunk_sources = {(chunk.kind, chunk.path) for chunk in chunks} assert ("documentation", "README.md") in chunk_sources assert ("manifest", "requirements.txt") in chunk_sources assert ("interface", "app.py") in chunk_sources candidate_graph = service.candidate_graph(repository.id, summary.analysis_run.id) assert candidate_graph.repository.name == "Example" assert candidate_graph.abilities assert "Example" in candidate_graph.abilities[0].description assert "@app.get" in candidate_graph.abilities[0].capabilities[0].description assert candidate_graph.abilities[0].capabilities[0].features[0].name == "GET /health" capability_names = { capability.name for ability in candidate_graph.abilities for capability in ability.capabilities } assert "Expose Repository Interface" in capability_names def test_analyze_repository_can_use_optional_llm_extractor(tmp_path): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text( "# Email Router\nRoutes incoming customer email.\n", encoding="utf-8", ) store = RegistryStore(tmp_path / "registry.sqlite3") store.initialize() extractor = FakeLLMExtractor( [ ExtractedAbility( name="Business Email Routing", description="Route incoming messages.", source_paths=["README.md"], capabilities=[ ExtractedCapability( name="Classify Incoming Email", description="Classify messages by intent.", source_paths=["README.md"], ) ], ) ] ) service = RegistryService( store, ingestion=GitIngestionService(tmp_path / "checkouts"), llm_extractor=extractor, ) repository = service.register_repository(name="Email Router", url=str(source)) summary = service.analyze_repository(repository.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id) decisions = service.list_review_decisions(repository.id, summary.analysis_run.id) assert extractor.calls assert extractor.calls[0][1] assert graph.abilities[0].name == "Business Email Routing" assert graph.abilities[0].capabilities[0].name == "Classify Incoming Email" assert graph.abilities[0].source_refs[0].path == "README.md" assert decisions[0].action == "llm_extraction_used" assert "1 candidate ability" in decisions[0].notes def test_analyze_repository_falls_back_when_optional_llm_extractor_returns_no_candidates(tmp_path): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text("# Fallback\n", encoding="utf-8") store = RegistryStore(tmp_path / "registry.sqlite3") store.initialize() service = RegistryService( store, ingestion=GitIngestionService(tmp_path / "checkouts"), llm_extractor=FakeLLMExtractor([]), ) repository = service.register_repository(name="Fallback", url=str(source)) summary = service.analyze_repository(repository.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id) assert graph.abilities[0].name == "Support Fallback" def test_analyze_repository_records_llm_failure_and_falls_back(tmp_path): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text("# Failing LLM\n", encoding="utf-8") store = RegistryStore(tmp_path / "registry.sqlite3") store.initialize() service = RegistryService( store, ingestion=GitIngestionService(tmp_path / "checkouts"), llm_extractor=FailingLLMExtractor(), ) repository = service.register_repository(name="Failing LLM", url=str(source)) summary = service.analyze_repository(repository.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id) decisions = service.list_review_decisions(repository.id, summary.analysis_run.id) assert summary.analysis_run.status == "completed" assert graph.abilities[0].name == "Support Failing LLM" assert decisions[0].action == "llm_extraction_failed" assert "provider unavailable" in decisions[0].notes def test_approve_candidate_graph_publishes_ability_map_once(tmp_path): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text("# Example\n", encoding="utf-8") (source / "app.py").write_text( "from fastapi import FastAPI\n" "app = FastAPI()\n" '@app.get("/health")\n' "def health():\n" " return {}\n", encoding="utf-8", ) (source / "cli.py").write_text( "import click\n\n" "@click.command()\n" "def health():\n" " click.echo('ok')\n", encoding="utf-8", ) service = make_service(tmp_path) repository = service.register_repository(name="Example", url=str(source)) summary = service.analyze_repository(repository.id) ability_map = service.approve_candidate_graph( repository.id, summary.analysis_run.id, notes="Looks good for the first pass.", ) second_approval = service.approve_candidate_graph( repository.id, summary.analysis_run.id, ) assert service.get_repository(repository.id).status == "indexed" assert len(ability_map.abilities) == 1 assert len(second_approval.abilities) == 1 assert ability_map.abilities[0].name == "Support Example" assert ability_map.abilities[0].capabilities[0].features[0].location == "app.py" assert ability_map.abilities[0].capabilities[0].features[0].source_refs assert ability_map.abilities[0].capabilities[0].features[0].source_refs[0].line == 3 assert ability_map.abilities[0].capabilities[0].evidence[0].source_refs candidate_graph = service.candidate_graph(repository.id, summary.analysis_run.id) assert candidate_graph.abilities[0].status == "approved" decisions = service.list_review_decisions(repository.id, summary.analysis_run.id) assert decisions[0].action == "approve_candidate_graph" assert decisions[0].notes == "Looks good for the first pass." def test_analysis_run_diff_keeps_approved_map_stable_until_change_approval(tmp_path): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text("# Example\n", encoding="utf-8") app_file = source / "app.py" app_file.write_text( "from fastapi import FastAPI\n" "app = FastAPI()\n" '@app.get("/health")\n' "def health():\n" " return {}\n", encoding="utf-8", ) service = make_service(tmp_path) repository = service.register_repository(name="Example", url=str(source)) first_summary = service.analyze_repository(repository.id) approved_before = service.approve_candidate_graph( repository.id, first_summary.analysis_run.id, ) app_file.write_text( "from fastapi import FastAPI\n" "app = FastAPI()\n" '@app.get("/ready")\n' "def ready():\n" " return {}\n", encoding="utf-8", ) second_summary = service.analyze_repository(repository.id) approved_after_analysis = service.ability_map(repository.id) diff = service.diff_analysis_runs( repository.id, first_summary.analysis_run.id, second_summary.analysis_run.id, ) assert approved_after_analysis.abilities[0].capabilities[0].features[0].name == ( approved_before.abilities[0].capabilities[0].features[0].name ) assert any(item.item_type == "feature" for item in diff.candidates.added) assert any(item.item_type == "feature" for item in diff.candidates.removed) assert any(item.item_type == "feature" for item in diff.approved_entries.added) assert any(item.item_type == "feature" for item in diff.approved_entries.removed) approved_after_review = service.approve_analysis_run_changes( repository.id, second_summary.analysis_run.id, notes="Accept route change.", ) assert approved_after_review.abilities[0].capabilities[0].features[0].name == ( "GET /ready" ) decisions = service.list_review_decisions( repository.id, second_summary.analysis_run.id, ) assert decisions[0].action == "approve_analysis_run_changes" assert decisions[0].notes == "Accept route change." def test_reject_candidate_ability_excludes_it_from_approval(tmp_path): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text("# Rejectable\n", encoding="utf-8") (source / "app.py").write_text( "from fastapi import FastAPI\n" "app = FastAPI()\n" '@app.get("/health")\n' "def health():\n" " return {}\n", encoding="utf-8", ) service = make_service(tmp_path) repository = service.register_repository(name="Rejectable", url=str(source)) summary = service.analyze_repository(repository.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id) candidate = graph.abilities[0] rejected_graph = service.reject_candidate_ability( repository.id, summary.analysis_run.id, candidate.id, notes="Too generic.", ) ability_map = service.approve_candidate_graph( repository.id, summary.analysis_run.id, ) assert service.get_repository(repository.id).status == "reviewing" assert rejected_graph.abilities[0].status == "rejected" assert rejected_graph.abilities[0].capabilities[0].status == "rejected" assert rejected_graph.abilities[0].capabilities[0].features[0].status == "rejected" assert ability_map.abilities == [] def test_edit_candidate_graph_values_before_approval(tmp_path): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text("# Editable\n", encoding="utf-8") (source / "app.py").write_text( "from fastapi import FastAPI\n" "app = FastAPI()\n" '@app.get("/health")\n' "def health():\n" " return {}\n", encoding="utf-8", ) service = make_service(tmp_path) repository = service.register_repository(name="Editable", url=str(source)) summary = service.analyze_repository(repository.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id) candidate_ability = graph.abilities[0] candidate_capability = candidate_ability.capabilities[0] service.edit_candidate_ability( repository.id, summary.analysis_run.id, candidate_ability.id, name="Service Health Monitoring", description="Expose health state for operational monitoring.", confidence=0.91, notes="Curator renamed the generic ability.", ) service.edit_candidate_capability( repository.id, summary.analysis_run.id, candidate_capability.id, name="Report HTTP Health", description="Return a lightweight health response over HTTP.", confidence=0.87, ) ability_map = service.approve_candidate_graph(repository.id, summary.analysis_run.id) assert service.get_repository(repository.id).status == "indexed" assert ability_map.abilities[0].name == "Service Health Monitoring" assert ability_map.abilities[0].description == ( "Expose health state for operational monitoring." ) assert ability_map.abilities[0].confidence == 0.91 assert ability_map.abilities[0].capabilities[0].name == "Report HTTP Health" assert ability_map.abilities[0].capabilities[0].confidence == 0.87 def test_reject_candidate_capability_excludes_it_from_approval(tmp_path): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text("# Capability Reject\n", encoding="utf-8") (source / "app.py").write_text( "from fastapi import FastAPI\n" "app = FastAPI()\n" '@app.get("/health")\n' "def health():\n" " return {}\n", encoding="utf-8", ) service = make_service(tmp_path) repository = service.register_repository(name="Capability Reject", url=str(source)) summary = service.analyze_repository(repository.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id) candidate_capability = graph.abilities[0].capabilities[0] rejected_graph = service.reject_candidate_capability( repository.id, summary.analysis_run.id, candidate_capability.id, notes="Interface is not relevant.", ) ability_map = service.approve_candidate_graph(repository.id, summary.analysis_run.id) assert rejected_graph.abilities[0].capabilities[0].status == "rejected" assert rejected_graph.abilities[0].capabilities[0].features[0].status == "rejected" approved_capability_names = { capability.name for capability in ability_map.abilities[0].capabilities } assert candidate_capability.name not in approved_capability_names def test_reject_candidate_feature_and_evidence_excludes_only_those_leaves(tmp_path): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text("# Leaf Reject\n", encoding="utf-8") (source / "tests").mkdir() (source / "tests" / "test_health.py").write_text( "def test_health(): pass\n", encoding="utf-8", ) (source / "app.py").write_text( "from fastapi import FastAPI\n" "app = FastAPI()\n" '@app.get("/health")\n' "def health():\n" " return {}\n", encoding="utf-8", ) service = make_service(tmp_path) repository = service.register_repository(name="Leaf Reject", url=str(source)) summary = service.analyze_repository(repository.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id) capability = graph.abilities[0].capabilities[0] service.reject_candidate_feature( repository.id, summary.analysis_run.id, capability.features[0].id, notes="Feature is incidental.", ) service.reject_candidate_evidence( repository.id, summary.analysis_run.id, capability.evidence[0].id, notes="Evidence is too weak.", ) ability_map = service.approve_candidate_graph(repository.id, summary.analysis_run.id) approved_capability = ability_map.abilities[0].capabilities[0] assert approved_capability.name == capability.name assert approved_capability.features == [] assert len(approved_capability.evidence) == len(capability.evidence) - 1 def test_relink_candidate_capability_to_another_ability_before_approval(tmp_path): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text("# Relink Capability\n", encoding="utf-8") (source / "app.py").write_text( "from fastapi import FastAPI\n" "app = FastAPI()\n" '@app.get("/health")\n' "def health():\n" " return {}\n", encoding="utf-8", ) service = make_service(tmp_path) repository = service.register_repository(name="Relink Capability", url=str(source)) summary = service.analyze_repository(repository.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id) capability = graph.abilities[0].capabilities[0] with service.store.connect() as connection: cursor = connection.execute( """ INSERT INTO candidate_abilities (repository_id, analysis_run_id, name, description, confidence) VALUES (?, ?, ?, ?, ?) """, ( repository.id, summary.analysis_run.id, "Operations Visibility", "Curator-created target ability.", 0.72, ), ) target_ability_id = int(cursor.lastrowid) relinked_graph = service.relink_candidate_capability( repository.id, summary.analysis_run.id, capability.id, target_ability_id=target_ability_id, notes="Move interface under the operational ability.", ) ability_map = service.approve_candidate_graph(repository.id, summary.analysis_run.id) target_candidate = [ ability for ability in relinked_graph.abilities if ability.id == target_ability_id ][0] assert target_candidate.capabilities[0].id == capability.id approved_target = [ ability for ability in ability_map.abilities if ability.name == "Operations Visibility" ][0] assert approved_target.capabilities[0].name == capability.name def test_relink_candidate_feature_and_evidence_to_another_capability(tmp_path): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text("# Relink Leaves\n", encoding="utf-8") (source / "requirements.txt").write_text("fastapi\n", encoding="utf-8") (source / "tests").mkdir() (source / "tests" / "test_health.py").write_text( "def test_health(): pass\n", encoding="utf-8", ) (source / "app.py").write_text( "from fastapi import FastAPI\n" "app = FastAPI()\n" '@app.get("/health")\n' "def health():\n" " return {}\n", encoding="utf-8", ) service = make_service(tmp_path) repository = service.register_repository(name="Relink Leaves", url=str(source)) summary = service.analyze_repository(repository.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id) source_capability = graph.abilities[0].capabilities[0] target_capability = graph.abilities[0].capabilities[1] feature = source_capability.features[0] evidence = source_capability.evidence[0] service.relink_candidate_feature( repository.id, summary.analysis_run.id, feature.id, target_capability_id=target_capability.id, ) service.relink_candidate_evidence( repository.id, summary.analysis_run.id, evidence.id, target_capability_id=target_capability.id, ) ability_map = service.approve_candidate_graph(repository.id, summary.analysis_run.id) approved_capabilities = { capability.name: capability for capability in ability_map.abilities[0].capabilities } assert approved_capabilities[source_capability.name].features == [] assert feature.name in { item.name for item in approved_capabilities[target_capability.name].features } assert evidence.reference in { item.reference for item in approved_capabilities[target_capability.name].evidence } def test_merge_candidate_ability_moves_capabilities_to_target(tmp_path): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text("# Merge Ability\n", encoding="utf-8") (source / "app.py").write_text( "from fastapi import FastAPI\n" "app = FastAPI()\n" '@app.get("/health")\n' "def health():\n" " return {}\n", encoding="utf-8", ) service = make_service(tmp_path) repository = service.register_repository(name="Merge Ability", url=str(source)) summary = service.analyze_repository(repository.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id) source_ability = graph.abilities[0] with service.store.connect() as connection: cursor = connection.execute( """ INSERT INTO candidate_abilities (repository_id, analysis_run_id, name, description, confidence) VALUES (?, ?, ?, ?, ?) """, ( repository.id, summary.analysis_run.id, "Merged Operational Ability", "Preferred duplicate ability.", 0.83, ), ) target_ability_id = int(cursor.lastrowid) graph = service.merge_candidate_ability( repository.id, summary.analysis_run.id, source_ability.id, target_ability_id=target_ability_id, ) ability_map = service.approve_candidate_graph(repository.id, summary.analysis_run.id) merged_source = [ability for ability in graph.abilities if ability.id == source_ability.id][0] target = [ability for ability in graph.abilities if ability.id == target_ability_id][0] assert merged_source.status == "merged" assert target.capabilities assert [ability.name for ability in ability_map.abilities] == [ "Merged Operational Ability" ] assert ability_map.abilities[0].capabilities def test_merge_candidate_capability_moves_children_to_target(tmp_path): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text("# Merge Capability\n", encoding="utf-8") (source / "requirements.txt").write_text("fastapi\n", encoding="utf-8") (source / "app.py").write_text( "from fastapi import FastAPI\n" "app = FastAPI()\n" '@app.get("/health")\n' "def health():\n" " return {}\n", encoding="utf-8", ) service = make_service(tmp_path) repository = service.register_repository(name="Merge Capability", url=str(source)) summary = service.analyze_repository(repository.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id) source_capability = graph.abilities[0].capabilities[0] target_capability = graph.abilities[0].capabilities[1] graph = service.merge_candidate_capability( repository.id, summary.analysis_run.id, source_capability.id, target_capability_id=target_capability.id, ) ability_map = service.approve_candidate_graph(repository.id, summary.analysis_run.id) merged_source = [ capability for ability in graph.abilities for capability in ability.capabilities if capability.id == source_capability.id ][0] target = [ capability for ability in graph.abilities for capability in ability.capabilities if capability.id == target_capability.id ][0] assert merged_source.status == "merged" assert target.features assert [capability.name for capability in ability_map.abilities[0].capabilities] == [ target_capability.name ] assert ability_map.abilities[0].capabilities[0].features def test_merge_candidate_feature_and_evidence_omits_duplicate_leaves(tmp_path): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text("# Merge Leaves\n", encoding="utf-8") (source / "tests").mkdir() (source / "tests" / "test_health.py").write_text( "def test_health(): pass\n", encoding="utf-8", ) (source / "app.py").write_text( "from fastapi import FastAPI\n" "app = FastAPI()\n" '@app.get("/health")\n' "def health():\n" " return {}\n" '@app.get("/ready")\n' "def ready():\n" " return {}\n", encoding="utf-8", ) (source / "cli.py").write_text( "import click\n\n" "@click.command()\n" "def health():\n" " click.echo('ok')\n", encoding="utf-8", ) service = make_service(tmp_path) repository = service.register_repository(name="Merge Leaves", url=str(source)) summary = service.analyze_repository(repository.id) graph = service.candidate_graph(repository.id, summary.analysis_run.id) capability = graph.abilities[0].capabilities[0] service.merge_candidate_feature( repository.id, summary.analysis_run.id, capability.features[1].id, target_feature_id=capability.features[0].id, ) service.merge_candidate_evidence( repository.id, summary.analysis_run.id, capability.evidence[1].id, target_evidence_id=capability.evidence[0].id, ) ability_map = service.approve_candidate_graph(repository.id, summary.analysis_run.id) approved_capability = ability_map.abilities[0].capabilities[0] assert len(approved_capability.features) == len(capability.features) - 1 assert len(approved_capability.evidence) == len(capability.evidence) - 1 def test_analyze_repository_failure_is_recorded(tmp_path): service = make_service(tmp_path) repository = service.register_repository( name="Missing", url=str(tmp_path / "does-not-exist"), description="Manual missing repository.", ) summary = service.analyze_repository(repository.id) assert summary.analysis_run.status == "failed" assert summary.snapshot is None assert "does not exist" in (summary.analysis_run.error_message or "") assert service.get_repository(repository.id).status == "analysis_failed" def test_analyze_repository_clones_git_url_before_scanning(tmp_path): source = tmp_path / "git-source" source.mkdir() subprocess.run(["git", "init", "-b", "main"], cwd=source, check=True) subprocess.run( ["git", "config", "user.email", "tests@example.com"], cwd=source, check=True, ) subprocess.run( ["git", "config", "user.name", "Tests"], cwd=source, check=True, ) (source / "README.md").write_text("# Git Source\n", encoding="utf-8") (source / "requirements.txt").write_text("pytest\n", encoding="utf-8") subprocess.run(["git", "add", "."], cwd=source, check=True) subprocess.run(["git", "commit", "-m", "initial"], cwd=source, check=True) service = make_service(tmp_path) repository = service.register_repository(name="Git Source", url=source.as_uri()) summary = service.analyze_repository(repository.id) assert summary.analysis_run.status == "completed" assert summary.snapshot is not None assert str(tmp_path / "checkouts") in summary.snapshot.source_path fact_names = {(fact.kind, fact.name, fact.path) for fact in summary.facts} assert ("documentation", "README", "README.md") in fact_names assert ("framework", "pytest", "requirements.txt") in fact_names def test_analyze_repository_can_use_cached_checkout_without_fetching(tmp_path, monkeypatch): service = make_service(tmp_path) url = "https://example.com/private/repo.git" cached = tmp_path / "checkouts" / "repo-b5d250ec3c59" cached.mkdir(parents=True) (cached / "README.md").write_text("# Cached Repo\n", encoding="utf-8") def fail_run_git(*args, **kwargs): raise AssertionError("cached analysis should not run git") monkeypatch.setattr(service.ingestion, "_run_git", fail_run_git) repository = service.register_repository( name="Cached", url=url, description="Already cloned.", ) summary = service.analyze_repository( repository.id, use_cached_checkout=True, ) assert summary.analysis_run.status == "completed" assert summary.snapshot is not None assert str(cached) == summary.snapshot.source_path assert ("documentation", "README", "README.md") in { (fact.kind, fact.name, fact.path) for fact in summary.facts } def test_operational_logging_records_analysis_and_review_events(caplog, tmp_path): source = tmp_path / "repo" source.mkdir() (source / "README.md").write_text("# Logged Service\n", encoding="utf-8") (source / "requirements.txt").write_text("fastapi\n", encoding="utf-8") (source / "app.py").write_text( "from fastapi import FastAPI\n" "app = FastAPI()\n" '@app.get("/health")\n' "def health():\n" " return {}\n", encoding="utf-8", ) service = make_service(tmp_path) caplog.set_level(logging.INFO, logger=LOGGER_NAME) repository = service.register_repository(name="Logged", url=str(source)) summary = service.analyze_repository(repository.id) service.approve_candidate_graph( repository.id, summary.analysis_run.id, notes="Logged approval.", ) payloads = [ json.loads(record.message) for record in caplog.records if record.name == LOGGER_NAME ] events = {payload["event"] for payload in payloads} assert "repository_registered" in events assert "analysis_started" in events assert "analysis_completed" in events assert "review_decision_recorded" in events assert all(payload["repository_id"] == repository.id for payload in payloads)