Cross repo isolation

This commit is contained in:
2026-05-02 21:55:35 +02:00
parent a462827eda
commit bf2dc4ae98
10 changed files with 695 additions and 14 deletions

View File

@@ -75,12 +75,20 @@ class LLMCandidateExtractor:
chunk_text = "\n\n".join(
(
f"Source: {chunk.path}:{chunk.start_line}-{chunk.end_line} "
f"({chunk.kind})\n{chunk.text}"
f"({chunk.kind}; source_role={self._source_role(chunk)})\n{chunk.text}"
)
for chunk in chunks[:12]
for chunk in self._prompt_chunks(chunks)
)
return (
"Extract a conservative, source-linked repository ability map.\n"
"Use original repository utility only: capabilities the repository "
"owns, intentionally exposes as a facade, or implements as an adapter.\n"
"Prefer source_role=intent_summary, product_documentation, "
"implementation_source, and test_evidence. Do not use SCOPE.md or "
"source_role=derived_scope as primary evidence; it is a derived prior "
"registry view and may be stale. Ignore agent guidance, CI/tooling, "
"dependency-only, and mention-only context unless owned product "
"evidence supports the same claim.\n"
"Return strict JSON only with this shape:\n"
"{\n"
' "abilities": [\n'
@@ -108,6 +116,46 @@ class LLMCandidateExtractor:
f"{chunk_text}\n"
)
def _prompt_chunks(self, chunks: list[ContentChunk]) -> list[ContentChunk]:
promptable = [
chunk
for chunk in chunks
if self._source_role(chunk) not in {"agent_guidance", "derived_scope"}
]
return sorted(
promptable,
key=lambda chunk: (
self._source_role_priority(self._source_role(chunk)),
chunk.path,
chunk.start_line,
),
)[:12]
def _source_role(self, chunk: ContentChunk) -> str:
role = chunk.metadata.get("source_role")
if isinstance(role, str) and role:
return role
path = chunk.path.lower()
if path.endswith("intent.md"):
return "intent_summary"
if path.endswith("scope.md"):
return "derived_scope"
if path.endswith(("agents.md", "claude.md")) or "/.claude/" in path:
return "agent_guidance"
return ""
def _source_role_priority(self, source_role: str) -> int:
priorities = {
"intent_summary": 0,
"product_documentation": 1,
"implementation_source": 2,
"test_evidence": 3,
"configuration": 4,
"dependency_declaration": 5,
"ci_tooling": 6,
}
return priorities.get(source_role, 7)
def parse_response(self, content: str) -> list[ExtractedAbility]:
try:
payload = json.loads(self._json_text(content))