diff --git a/src/repo_registry/repo_scanning/scanner.py b/src/repo_registry/repo_scanning/scanner.py index ed8e7c6..84ba04c 100644 --- a/src/repo_registry/repo_scanning/scanner.py +++ b/src/repo_registry/repo_scanning/scanner.py @@ -20,6 +20,7 @@ IGNORED_DIRS = { "dist", "node_modules", "target", + "var", "vendor", } diff --git a/tests/test_repository_scanner.py b/tests/test_repository_scanner.py index 5a06cd8..103b0ac 100644 --- a/tests/test_repository_scanner.py +++ b/tests/test_repository_scanner.py @@ -114,6 +114,34 @@ def test_scanner_javascript_typescript_package_records_package_facts(tmp_path): assert ("test", "routes.spec.ts", "src/api/routes.spec.ts") in facts +def test_scanner_ignores_runtime_var_checkouts(tmp_path): + repo = tmp_path / "repo-scoping-like" + repo.mkdir() + (repo / "README.md").write_text("# Repo Scoping\n", encoding="utf-8") + checkout = repo / "var" / "checkouts" / "llm-connect" + checkout.mkdir(parents=True) + (checkout / "README.md").write_text( + "# LLM Connect\nSupports OpenRouter fallback.\n", + encoding="utf-8", + ) + (checkout / "providers.py").write_text( + "provider_registry = {'openrouter': OpenRouterAdapter}\n", + encoding="utf-8", + ) + + result = DeterministicScanner().scan(repo) + + facts = {(fact.kind, fact.name, fact.path) for fact in result.facts} + assert result.file_count == 1 + assert ("documentation", "README", "README.md") in facts + assert all(not fact.path.startswith("var/") for fact in result.facts) + assert ( + "llm_provider", + "OpenRouter", + "var/checkouts/llm-connect/README.md", + ) not in facts + + def test_scanner_records_llm_provider_and_fallback_facts(tmp_path): repo = tmp_path / "llm-connect-like" repo.mkdir() diff --git a/workplans/RREG-WP-0015-self-assessment-input-hygiene.md b/workplans/RREG-WP-0015-self-assessment-input-hygiene.md new file mode 100644 index 0000000..a412f19 --- /dev/null +++ b/workplans/RREG-WP-0015-self-assessment-input-hygiene.md @@ -0,0 +1,82 @@ +--- +id: RREG-WP-0015 +type: workplan +title: "Self-Assessment Input Hygiene" +domain: capabilities +repo: repo-scoping +status: active +owner: codex +topic_slug: foerster-capabilities +created: "2026-05-15" +updated: "2026-05-15" +--- + +# Self-Assessment Input Hygiene + +The first post-WP0014 self-assessment rerun proved that the acceptance boundary +is doing useful work, but it also exposed a sharper input problem: repo-scoping +was scanning its own runtime `var/checkouts/` directory. That pulled checked-out +copies of `llm-connect`, `markitect`, and other repositories into repo-scoping's +own candidate graph and recreated the provider-routing false positive from +foreign source trees. + +This workplan keeps the self-improvement loop honest by making the source set +for self-assessment match the repository, not its local runtime cache. + +## T01: Exclude Runtime Checkout State From Scanning + +```task +id: RREG-WP-0015-T01 +status: done +priority: high +``` + +Prevent deterministic scanning from reading repo-local runtime state such as +`var/checkouts/` when the repository is analyzed from its working tree. + +Acceptance criteria: +- `var/` runtime content is excluded from scanner file traversal. +- A regression test proves nested checkout files do not produce LLM-provider + facts for the parent repo. +- Normal repository documentation, source, test, and manifest scanning still + works. + +Implementation note 2026-05-15: added `var` to the deterministic scanner's +ignored directory set and covered the repo-scoping-like failure with +`test_scanner_ignores_runtime_var_checkouts`. Runtime checkout files no longer +contribute documentation, language, or LLM-provider facts to the parent repo. + +## T02: Capture Clean Post-Acceptance Self-Assessment + +```task +id: RREG-WP-0015-T02 +status: todo +priority: high +``` + +Rerun `repo-scoping self-assess` after input hygiene is fixed and save a +reviewable challenger artifact and comparison report. + +Acceptance criteria: +- The artifact is release-bound to the repo-scoping commit that generated it. +- The artifact no longer includes files from `var/checkouts/`. +- The comparison report clearly separates remaining candidate-generation + quality issues from approved registry truth. +- The artifact/report names make their relationship to WP0014/WP0015 clear. + +## T03: Triage Remaining Generator Quality Gaps + +```task +id: RREG-WP-0015-T03 +status: todo +priority: medium +``` + +Use the clean rerun to identify the next generator-quality workplan. + +Acceptance criteria: +- Remaining missing expected capabilities are summarized. +- Remaining forbidden or downgraded candidates are summarized with source refs + and quality-gate outcomes. +- The next workplan is scoped around generator improvements, not deterministic + acceptance.