diff --git a/src/repo_registry/candidate_graph/generator.py b/src/repo_registry/candidate_graph/generator.py
index 496a07e..e9978f6 100644
--- a/src/repo_registry/candidate_graph/generator.py
+++ b/src/repo_registry/candidate_graph/generator.py
@@ -63,7 +63,8 @@ class CandidateGraphGenerator:
             return []
         chunks = chunks or []
 
-        docs = self._facts(facts, "documentation")
+        scope_docs = self._facts(facts, "scope")
+        docs = scope_docs + self._facts(facts, "documentation")
         tests = self._facts(facts, "test")
         examples = self._facts(facts, "example")
         interfaces = self._facts(facts, "interface")
@@ -660,8 +661,8 @@ class CandidateGraphGenerator:
         return f"Support {self._humanize_identifier(repository.name)}"
 
     def _document_purpose_sentence(self, chunks: list[ContentChunk]) -> str:
-        for chunk in chunks:
-            if chunk.kind != "documentation":
+        for chunk in self._documentation_chunks(chunks):
+            if chunk.kind not in {"scope", "documentation"}:
                 continue
             lines = [line.strip() for line in chunk.text.splitlines() if line.strip()]
             paragraph = next((line for line in lines if not line.startswith("#")), "")
@@ -731,9 +732,7 @@ class CandidateGraphGenerator:
         )
 
     def _document_summary(self, chunks: list[ContentChunk]) -> str:
-        for chunk in chunks:
-            if chunk.kind != "documentation":
-                continue
+        for chunk in self._documentation_chunks(chunks):
             lines = [line.strip() for line in chunk.text.splitlines() if line.strip()]
             if not lines:
                 continue
@@ -744,6 +743,12 @@ class CandidateGraphGenerator:
             return heading or paragraph
         return ""
 
+    def _documentation_chunks(self, chunks: list[ContentChunk]) -> list[ContentChunk]:
+        return sorted(
+            [chunk for chunk in chunks if chunk.kind in {"scope", "documentation"}],
+            key=lambda chunk: (0 if chunk.kind == "scope" else 1, chunk.path, chunk.start_line),
+        )
+
     def _interface_summary(self, chunks: list[ContentChunk]) -> str:
         for chunk in chunks:
             if chunk.kind != "interface":
diff --git a/src/repo_registry/content_indexing/extractor.py b/src/repo_registry/content_indexing/extractor.py
index 1349c6e..ed216ab 100644
--- a/src/repo_registry/content_indexing/extractor.py
+++ b/src/repo_registry/content_indexing/extractor.py
@@ -1,12 +1,13 @@
 from __future__ import annotations
 
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path
 
 from repo_registry.core.models import ObservedFact
 
 
 INDEXED_FACT_KINDS = {
+    "scope",
     "documentation",
     "example",
     "test",
@@ -29,6 +30,7 @@ class ContentChunkCandidate:
     start_line: int
     end_line: int
     text: str
+    metadata: dict[str, object] = field(default_factory=dict)
 
 
 class ContentExtractor:
@@ -80,6 +82,7 @@ class ContentExtractor:
                     path,
                     root,
                     fact.kind,
+                    fact.metadata,
                     lines,
                     start_line,
                     end_line,
@@ -91,7 +94,15 @@ class ContentExtractor:
             start_line = start_index + 1
             end_line = min(len(lines), start_index + MAX_CHUNK_LINES)
             chunks.append(
-                self._chunk(path, root, fact.kind, lines, start_line, end_line)
+                self._chunk(
+                    path,
+                    root,
+                    fact.kind,
+                    fact.metadata,
+                    lines,
+                    start_line,
+                    end_line,
+                )
             )
         return chunks
 
@@ -100,6 +111,7 @@ class ContentExtractor:
         path: Path,
         root: Path,
         kind: str,
+        fact_metadata: dict[str, object],
         lines: list[str],
         start_line: int,
         end_line: int,
@@ -110,6 +122,7 @@ class ContentExtractor:
             start_line=start_line,
             end_line=end_line,
             text="\n".join(lines[start_line - 1 : end_line]).strip(),
+            metadata={"source_role": fact_metadata.get("source_role", "")},
         )
 
     def _is_within(self, root: Path, path: Path) -> bool:
diff --git a/src/repo_registry/core/models.py b/src/repo_registry/core/models.py
index 78148b7..db5adf7 100644
--- a/src/repo_registry/core/models.py
+++ b/src/repo_registry/core/models.py
@@ -119,6 +119,7 @@ class ContentChunk:
     start_line: int
     end_line: int
     text: str
+    metadata: dict[str, Any] = field(default_factory=dict)
 
 
 @dataclass(frozen=True)
diff --git a/src/repo_registry/repo_scanning/scanner.py b/src/repo_registry/repo_scanning/scanner.py
index cb841ef..db77d85 100644
--- a/src/repo_registry/repo_scanning/scanner.py
+++ b/src/repo_registry/repo_scanning/scanner.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import subprocess
+import re
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any
@@ -86,6 +87,17 @@ LLM_CREDENTIAL_HINTS = {
     "GOOGLE_API_KEY": "Google API key",
 }
 
+AGENT_GUIDANCE_FILES = {
+    "agents.md",
+    "claude.md",
+}
+
+AGENT_GUIDANCE_DIRS = {
+    ".claude",
+    ".codex",
+    ".cursor",
+}
+
 
 @dataclass(frozen=True)
 class FactCandidate:
@@ -153,7 +165,7 @@ class DeterministicScanner:
                 kind="language",
                 name=language,
                 value=str(count),
-                metadata={"file_count": count},
+                metadata={"file_count": count, "source_role": "implementation_source"},
             )
             for language, count in counts.items()
         ]
@@ -166,14 +178,45 @@ class DeterministicScanner:
             relative = path.relative_to(root).as_posix()
             lower = relative.lower()
             name = path.name.lower()
+            source_role = self._source_role(relative)
 
-            if name.startswith("readme"):
-                facts.append(FactCandidate("documentation", "README", relative))
+            if name == "scope.md":
+                facts.append(
+                    FactCandidate(
+                        "scope",
+                        "SCOPE",
+                        relative,
+                        metadata={"source_role": "scope_summary"},
+                    )
+                )
+            elif name.startswith("readme"):
+                facts.append(
+                    FactCandidate(
+                        "documentation",
+                        "README",
+                        relative,
+                        metadata={"source_role": "product_documentation"},
+                    )
+                )
             elif lower.startswith("docs/") or lower.startswith("doc/"):
-                facts.append(FactCandidate("documentation", path.name, relative))
+                facts.append(
+                    FactCandidate(
+                        "documentation",
+                        path.name,
+                        relative,
+                        metadata={"source_role": "product_documentation"},
+                    )
+                )
 
             if lower.startswith("examples/") or lower.startswith("example/"):
-                facts.append(FactCandidate("example", path.name, relative))
+                facts.append(
+                    FactCandidate(
+                        "example",
+                        path.name,
+                        relative,
+                        metadata={"source_role": "product_documentation"},
+                    )
+                )
 
             if (
                 lower.startswith("tests/")
@@ -183,7 +226,14 @@ class DeterministicScanner:
                 or name.endswith(".test.ts")
                 or name.endswith(".spec.ts")
             ):
-                facts.append(FactCandidate("test", path.name, relative))
+                facts.append(
+                    FactCandidate(
+                        "test",
+                        path.name,
+                        relative,
+                        metadata={"source_role": "test_evidence"},
+                    )
+                )
 
             if name in MANIFEST_FRAMEWORK_HINTS or name in {
                 "requirements.txt",
@@ -193,10 +243,24 @@ class DeterministicScanner:
                 "yarn.lock",
                 "go.mod",
             }:
-                facts.append(FactCandidate("manifest", path.name, relative))
+                facts.append(
+                    FactCandidate(
+                        "manifest",
+                        path.name,
+                        relative,
+                        metadata={"source_role": "dependency_declaration"},
+                    )
+                )
 
             if lower.endswith((".yaml", ".yml", ".toml", ".ini", ".env.example")):
-                facts.append(FactCandidate("config", path.name, relative))
+                facts.append(
+                    FactCandidate(
+                        "config",
+                        path.name,
+                        relative,
+                        metadata={"source_role": source_role},
+                    )
+                )
 
         return facts
 
@@ -223,7 +287,11 @@ class DeterministicScanner:
                         kind="framework",
                         name=framework,
                         path=path.relative_to(root).as_posix(),
-                        metadata={"source": "manifest_hint", "needle": needle},
+                        metadata={
+                            "source": "manifest_hint",
+                            "needle": needle,
+                            "source_role": "dependency_declaration",
+                        },
                     )
                 )
         return facts
@@ -236,9 +304,23 @@ class DeterministicScanner:
             if path.suffix == ".py":
                 facts.extend(self._python_interface_facts(path, relative))
             if "cli" in lower or lower.endswith("/commands.py"):
-                facts.append(FactCandidate("interface", "possible CLI", relative))
+                facts.append(
+                    FactCandidate(
+                        "interface",
+                        "possible CLI",
+                        relative,
+                        metadata={"source_role": self._source_role(relative)},
+                    )
+                )
             if "routes" in lower or "api" in lower:
-                facts.append(FactCandidate("interface", "possible API surface", relative))
+                facts.append(
+                    FactCandidate(
+                        "interface",
+                        "possible API surface",
+                        relative,
+                        metadata={"source_role": self._source_role(relative)},
+                    )
+                )
         return facts
 
     def _llm_provider_facts(self, files: list[Path], root: Path) -> list[FactCandidate]:
@@ -264,8 +346,11 @@ class DeterministicScanner:
                 continue
             lower_text = text.lower()
             relative = path.relative_to(root).as_posix()
+            source_role = self._source_role(relative)
+            if source_role == "agent_guidance":
+                continue
             for needle, provider in LLM_PROVIDER_HINTS.items():
-                if needle not in lower_text:
+                if not self._has_provider_signal(lower_text, needle):
                     continue
                 self._append_once(
                     facts,
@@ -275,7 +360,10 @@ class DeterministicScanner:
                         name=provider,
                         path=relative,
                         value=needle,
-                        metadata={"source": "provider_hint"},
+                        metadata={
+                            "source": "provider_hint",
+                            "source_role": source_role,
+                        },
                     ),
                 )
             for env_name, label in LLM_CREDENTIAL_HINTS.items():
@@ -289,11 +377,22 @@ class DeterministicScanner:
                         name=label,
                         path=relative,
                         value=env_name,
-                        metadata={"source": "environment_variable"},
+                        metadata={
+                            "source": "environment_variable",
+                            "source_role": source_role,
+                        },
                     ),
                 )
-            if any(term in lower_text for term in ("provider_registry", "providers =", "adapter")):
-                if any(needle in lower_text for needle in LLM_PROVIDER_HINTS):
+            registry_hint = (
+                "provider_registry" in lower_text
+                or "providers =" in lower_text
+                or ("adapter" in lower_text and source_role == "implementation_source")
+            )
+            if registry_hint:
+                if any(
+                    self._has_provider_signal(lower_text, needle)
+                    for needle in LLM_PROVIDER_HINTS
+                ):
                     self._append_once(
                         facts,
                         seen,
@@ -301,11 +400,15 @@ class DeterministicScanner:
                             kind="provider_registry",
                             name="LLM provider registry",
                             path=relative,
-                            metadata={"source": "provider_registry_hint"},
+                            metadata={
+                                "source": "provider_registry_hint",
+                                "source_role": source_role,
+                            },
                         ),
                     )
             if "fallback" in lower_text and any(
-                needle in lower_text for needle in LLM_PROVIDER_HINTS
+                self._has_provider_signal(lower_text, needle)
+                for needle in LLM_PROVIDER_HINTS
             ):
                 self._append_once(
                     facts,
@@ -314,11 +417,47 @@ class DeterministicScanner:
                         kind="fallback_policy",
                         name="LLM provider fallback policy",
                         path=relative,
-                        metadata={"source": "fallback_hint"},
+                        metadata={
+                            "source": "fallback_hint",
+                            "source_role": source_role,
+                        },
                     ),
                 )
         return facts
 
+    def _source_role(self, relative_path: str) -> str:
+        lower = relative_path.lower()
+        parts = lower.split("/")
+        name = parts[-1]
+        if name == "scope.md":
+            return "scope_summary"
+        if name in AGENT_GUIDANCE_FILES or any(part in AGENT_GUIDANCE_DIRS for part in parts):
+            return "agent_guidance"
+        if lower.startswith((".github/workflows/", ".gitea/workflows/")):
+            return "ci_tooling"
+        if lower.startswith(("tests/", "test/")) or name.startswith("test_"):
+            return "test_evidence"
+        if name.startswith("readme") or lower.startswith(("docs/", "doc/", "wiki/")):
+            return "product_documentation"
+        if name in MANIFEST_FRAMEWORK_HINTS or name.endswith((".lock", ".mod")):
+            return "dependency_declaration"
+        if lower.endswith((".yaml", ".yml", ".toml", ".ini", ".env.example")):
+            return "configuration"
+        return "implementation_source"
+
+    def _has_provider_signal(self, lower_text: str, needle: str) -> bool:
+        pattern = re.compile(rf"(?<![a-z0-9-]){re.escape(needle.lower())}(?![a-z0-9-])")
+        for match in pattern.finditer(lower_text):
+            context = lower_text[max(0, match.start() - 20) : match.end() + 20]
+            if needle == "claude" and (
+                "claude.md" in context
+                or "claude code" in context
+                or "claude.ai/code" in context
+            ):
+                continue
+            return True
+        return False
+
     def _append_once(
         self,
         facts: list[FactCandidate],
@@ -347,7 +486,10 @@ class DeterministicScanner:
                         name="python route decorator",
                         path=relative,
                         value=stripped,
-                        metadata={"line": line_number},
+                        metadata={
+                            "line": line_number,
+                            "source_role": self._source_role(relative),
+                        },
                     )
                 )
             elif stripped.startswith("@click.command") or stripped.startswith("@app.command"):
@@ -357,7 +499,10 @@ class DeterministicScanner:
                         name="python CLI command decorator",
                         path=relative,
                         value=stripped,
-                        metadata={"line": line_number},
+                        metadata={
+                            "line": line_number,
+                            "source_role": self._source_role(relative),
+                        },
                     )
                 )
         return facts
diff --git a/src/repo_registry/storage/sqlite.py b/src/repo_registry/storage/sqlite.py
index e099368..9965b36 100644
--- a/src/repo_registry/storage/sqlite.py
+++ b/src/repo_registry/storage/sqlite.py
@@ -180,6 +180,14 @@ class RegistryStore:
             )
             """
         )
+        columns = {
+            row["name"]
+            for row in connection.execute("PRAGMA table_info(content_chunks)").fetchall()
+        }
+        if "metadata" not in columns:
+            connection.execute(
+                "ALTER TABLE content_chunks ADD COLUMN metadata TEXT NOT NULL DEFAULT '{}'"
+            )
         connection.execute(
             "CREATE INDEX IF NOT EXISTS idx_content_chunks_repository ON content_chunks(repository_id)"
         )
@@ -1675,8 +1683,8 @@ class RegistryStore:
                 """
                 INSERT INTO content_chunks
                   (repository_id, analysis_run_id, snapshot_id, path, kind,
-                   start_line, end_line, text)
-                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+                   start_line, end_line, text, metadata)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
                 """,
                 [
                     (
@@ -1688,6 +1696,7 @@ class RegistryStore:
                         chunk.start_line,
                         chunk.end_line,
                         chunk.text,
+                        json.dumps(chunk.metadata),
                     )
                     for chunk in chunks
                 ],
@@ -1709,7 +1718,7 @@ class RegistryStore:
             rows = connection.execute(
                 f"""
                 SELECT id, repository_id, analysis_run_id, snapshot_id, path, kind,
-                       start_line, end_line, text
+                       start_line, end_line, text, metadata
                 FROM content_chunks
                 {where}
                 ORDER BY path ASC, start_line ASC, id ASC
@@ -2842,6 +2851,7 @@ class RegistryStore:
             start_line=row["start_line"],
             end_line=row["end_line"],
             text=row["text"],
+            metadata=json.loads(row["metadata"]),
         )
 
     @staticmethod
diff --git a/src/repo_registry/web_api/schemas.py b/src/repo_registry/web_api/schemas.py
index 0561ab8..b363de3 100644
--- a/src/repo_registry/web_api/schemas.py
+++ b/src/repo_registry/web_api/schemas.py
@@ -462,6 +462,7 @@ class ContentChunkResponse(BaseModel):
     start_line: int
     end_line: int
     text: str
+    metadata: dict[str, Any]
 
 
 class ScanSummaryResponse(BaseModel):
diff --git a/tests/test_content_indexing.py b/tests/test_content_indexing.py
index 7433c94..e7cb07e 100644
--- a/tests/test_content_indexing.py
+++ b/tests/test_content_indexing.py
@@ -2,10 +2,12 @@ from repo_registry.content_indexing.extractor import ContentExtractor
 from repo_registry.core.models import ObservedFact
 
 
-def fact(id, kind, name, path="", line=None):
+def fact(id, kind, name, path="", line=None, source_role=""):
     metadata = {}
     if line is not None:
         metadata["line"] = line
+    if source_role:
+        metadata["source_role"] = source_role
     return ObservedFact(
         id=id,
         repository_id=1,
@@ -82,3 +84,20 @@ def test_content_extractor_chunks_provider_related_config(tmp_path):
     assert len(chunks) == 1
     assert chunks[0].path == ".env.example"
     assert "OPENROUTER_API_KEY" in chunks[0].text
+
+
+def test_content_extractor_preserves_source_role_metadata(tmp_path):
+    repo = tmp_path / "repo"
+    repo.mkdir()
+    (repo / "SCOPE.md").write_text("# SCOPE\n\nProvides OIDC.\n", encoding="utf-8")
+
+    chunks = ContentExtractor().extract(
+        repo,
+        [
+            fact(1, "scope", "SCOPE", "SCOPE.md", source_role="scope_summary"),
+        ],
+    )
+
+    assert len(chunks) == 1
+    assert chunks[0].kind == "scope"
+    assert chunks[0].metadata["source_role"] == "scope_summary"
diff --git a/tests/test_repository_scanner.py b/tests/test_repository_scanner.py
index 9f19626..1af2993 100644
--- a/tests/test_repository_scanner.py
+++ b/tests/test_repository_scanner.py
@@ -42,6 +42,22 @@ def test_deterministic_scanner_extracts_structural_facts(tmp_path):
     assert languages == {"Python": 2}
 
 
+def test_scanner_records_scope_with_source_role(tmp_path):
+    repo = tmp_path / "sample"
+    repo.mkdir()
+    (repo / "SCOPE.md").write_text(
+        "# SCOPE\n\n## One-liner\n\nProvides OIDC profile enforcement.\n",
+        encoding="utf-8",
+    )
+
+    result = DeterministicScanner().scan(repo)
+
+    scope_fact = next(fact for fact in result.facts if fact.kind == "scope")
+    assert scope_fact.name == "SCOPE"
+    assert scope_fact.path == "SCOPE.md"
+    assert scope_fact.metadata["source_role"] == "scope_summary"
+
+
 def test_scanner_readme_only_fixture_records_docs_without_interfaces(tmp_path):
     repo = write_readme_only_repo(tmp_path)
 
@@ -116,3 +132,28 @@ def test_scanner_records_llm_provider_and_fallback_facts(tmp_path):
     assert ("credential_config", "Anthropic API key", ".env.example") in facts
     assert ("provider_registry", "LLM provider registry", "providers.py") in facts
     assert ("fallback_policy", "LLM provider fallback policy", "README.md") in facts
+
+
+def test_scanner_does_not_treat_agent_guidance_as_llm_provider(tmp_path):
+    repo = tmp_path / "key-cape-like"
+    repo.mkdir()
+    (repo / "README.md").write_text(
+        "# KeyCape\n\n"
+        "Backend adapters live in src/internal/adapters.\n\n"
+        "See `CLAUDE.md` for agent session protocol.\n",
+        encoding="utf-8",
+    )
+    (repo / "CLAUDE.md").write_text(
+        "# CLAUDE.md\n\n"
+        "This file provides guidance to Claude Code when working in this repo.\n",
+        encoding="utf-8",
+    )
+    (repo / "src").mkdir()
+    (repo / "src" / "go.mod").write_text("module keycape\n", encoding="utf-8")
+
+    result = DeterministicScanner().scan(repo)
+
+    facts = {(fact.kind, fact.name, fact.path) for fact in result.facts}
+    assert ("llm_provider", "Claude", "CLAUDE.md") not in facts
+    assert ("llm_provider", "Claude", "README.md") not in facts
+    assert ("provider_registry", "LLM provider registry", "README.md") not in facts
diff --git a/workplans/RREG-WP-0009-provenance-aware-characteristic-rebuild.md b/workplans/RREG-WP-0009-provenance-aware-characteristic-rebuild.md
index ffd4e3d..0fb4f60 100644
--- a/workplans/RREG-WP-0009-provenance-aware-characteristic-rebuild.md
+++ b/workplans/RREG-WP-0009-provenance-aware-characteristic-rebuild.md
@@ -34,7 +34,7 @@ The target behavior is facts-first and provenance-aware:
 
 ```task
 id: RREG-WP-0009-T01
-status: todo
+status: done
 priority: high
 state_hub_task_id: "0c189443-5000-4025-a144-75e5bf1e3be5"
 ```
@@ -68,7 +68,7 @@ Acceptance criteria:
 
 ```task
 id: RREG-WP-0009-T02
-status: todo
+status: in_progress
 priority: high
 state_hub_task_id: "3ef728a0-832f-4441-9ece-16888ef68c47"
 ```