From 3e906c1dd43ad58fe21a69d14e031495dc3344f3 Mon Sep 17 00:00:00 2001
From: tegwick <bernd.worsch@gmail.com>
Date: Sat, 16 May 2026 00:57:44 +0200
Subject: [PATCH] Fix rerun assessment and candidate extraction

---
 src/repo_scoping/candidate_graph/generator.py | 55 ++++++++++++-
 src/repo_scoping/cli.py                       |  2 +-
 src/repo_scoping/core/service.py              |  3 +-
 tests/test_candidate_graph.py                 | 82 ++++++++++++++++++-
 tests/test_cli.py                             | 14 ++++
 tests/test_registry_service.py                | 44 ++++++++++
 ...entic-hierarchy-and-intent-scope-review.md | 39 +++++++--
 7 files changed, 227 insertions(+), 12 deletions(-)

diff --git a/src/repo_scoping/candidate_graph/generator.py b/src/repo_scoping/candidate_graph/generator.py
index 686d86a..d2db2f1 100644
--- a/src/repo_scoping/candidate_graph/generator.py
+++ b/src/repo_scoping/candidate_graph/generator.py
@@ -566,17 +566,41 @@ class CandidateGraphGenerator:
     def _intent_capability_items(self, chunks: list[ContentChunk]) -> list[str]:
         items: list[str] = []
         in_capability_section = False
+        capability_section_level = 0
         for chunk in sorted(chunks, key=lambda item: (item.path, item.start_line)):
             for raw_line in chunk.text.splitlines():
                 line = raw_line.strip()
                 if not line:
                     continue
                 if line.startswith("#"):
-                    heading = line.lstrip("#").strip().lower()
-                    in_capability_section = (
+                    level = len(line) - len(line.lstrip("#"))
+                    heading_text = re.sub(r"\\([._-])", r"\1", line.lstrip("#").strip())
+                    heading = re.sub(
+                        r"^\d+(?:\.\d+)*\.?\s+",
+                        "",
+                        heading_text,
+                    ).lower()
+                    if in_capability_section and level > capability_section_level:
+                        item = re.sub(
+                            r"^\d+(?:\.\d+)*\.?\s+",
+                            "",
+                            heading_text,
+                        )
+                        if item and item.lower() not in {"capabilities", "intended capabilities"}:
+                            items.append(item)
+                        continue
+                    opens_capability_section = (
                         "capabilit" in heading
-                        or heading in {"primary utility", "core utility"}
+                        or heading
+                        in {
+                            "outcomes",
+                            "primary outcomes",
+                            "primary utility",
+                            "core utility",
+                        }
                     )
+                    in_capability_section = opens_capability_section
+                    capability_section_level = level if opens_capability_section else 0
                     continue
                 if not in_capability_section:
                     continue
@@ -594,6 +618,16 @@ class CandidateGraphGenerator:
             return "Make Connectivity Observable Auditable And Controllable"
         if "cli tool" in lowered and "mcp" in lowered:
             return "Expose CLI And MCP Accessible Service"
+        capability_outcomes = {
+            "capability discovery": "Support Capability Discovery",
+            "capability modeling": "Model Capabilities",
+            "capability realisation": "Realize Capabilities",
+            "capability realization": "Realize Capabilities",
+            "capability validation": "Validate Capabilities",
+            "capability evolution": "Evolve Capabilities",
+        }
+        if lowered.strip(" .:-") in capability_outcomes:
+            return capability_outcomes[lowered.strip(" .:-")]
         candidate = re.split(r"\s+-\s+|\s*:\s*|[.!?]\s+", text.strip(), maxsplit=1)[0]
         candidate = candidate.strip(" .:-")
         if not candidate:
@@ -601,6 +635,12 @@ class CandidateGraphGenerator:
         words = candidate.split()
         if words:
             words[0] = self._imperative_verb(words[0])
+            if (
+                len(words) > 1
+                and words[0].lower() in {"analyze", "compare", "detect", "explore", "identify", "interpret"}
+                and words[1].lower().strip(",;:") == "of"
+            ):
+                words.pop(1)
         while words and words[-1].lower().strip(",;:") in {"a", "an", "the", "and", "or", "as", "both"}:
             words.pop()
         return self._title_from_words(words[:10])
@@ -1614,6 +1654,7 @@ class CandidateGraphGenerator:
 
     def _imperative_purpose(self, text: str) -> str:
         cleaned = re.sub(r"\s+", " ", text.strip())
+        cleaned = re.split("\\s+(?:-|\\u2013|\\u2014)\\s+", cleaned, maxsplit=1)[0]
         cleaned = re.split(r"[.!?]\s+", cleaned, maxsplit=1)[0]
         cleaned = re.sub(
             r"(?i)^this\s+repository\s+exists\s+to\s+provide\s+(?:an?\s+)?",
@@ -1635,8 +1676,14 @@ class CandidateGraphGenerator:
             return word
         lower = word.lower().strip(",;:")
         irregular = {
+            "analysis": "analyze",
+            "comparison": "compare",
+            "detection": "detect",
             "does": "do",
+            "exploration": "explore",
             "has": "have",
+            "identification": "identify",
+            "interpretation": "interpret",
             "is": "be",
         }
         if lower in irregular:
@@ -1655,7 +1702,7 @@ class CandidateGraphGenerator:
 
     def _title_from_words(self, words: list[str]) -> str:
         cleaned_words = [
-            re.sub(r"[^A-Za-z0-9_/{}-]", "", word)
+            re.sub(r"[^\w/{}-]", "", word, flags=re.UNICODE)
             for word in words
         ]
         return " ".join(
diff --git a/src/repo_scoping/cli.py b/src/repo_scoping/cli.py
index 1271584..6e10cdb 100644
--- a/src/repo_scoping/cli.py
+++ b/src/repo_scoping/cli.py
@@ -334,7 +334,7 @@ def dataset_assessment(service: RegistryService) -> dict[str, object]:
     }
     for repository in service.list_repositories():
         runs = service.list_analysis_runs(repository.id)
-        latest_run = next((run for run in reversed(runs) if run.status == "completed"), None)
+        latest_run = next((run for run in runs if run.status == "completed"), None)
         facts = service.list_observed_facts(repository.id, latest_run.id) if latest_run else []
         chunks = service.list_content_chunks(repository.id, latest_run.id) if latest_run else []
         candidate_counts = {
diff --git a/src/repo_scoping/core/service.py b/src/repo_scoping/core/service.py
index 10dfb5e..185cd6c 100644
--- a/src/repo_scoping/core/service.py
+++ b/src/repo_scoping/core/service.py
@@ -1581,6 +1581,7 @@ class RegistryService:
                     "kind": kind,
                     "layer": self._dependency_layer(kind),
                     "label": detail.get("label")
+                    or detail.get("name")
                     or self._dependency_node_label(repository_id, kind, key, item_id),
                     "reviewState": detail.get("reviewState", "accepted"),
                     "name": detail.get("name")
@@ -2724,7 +2725,7 @@ class RegistryService:
             for run in self.store.list_analysis_runs(repository_id)
             if run.status == "completed"
         ]
-        return completed[-1] if completed else None
+        return completed[0] if completed else None
 
     def _candidate_graph_or_none(
         self,
diff --git a/tests/test_candidate_graph.py b/tests/test_candidate_graph.py
index 857b781..74e9400 100644
--- a/tests/test_candidate_graph.py
+++ b/tests/test_candidate_graph.py
@@ -135,6 +135,85 @@ def test_candidate_generator_extracts_intended_capability_blocks_from_intent_chu
     assert [ref.path for ref in intent_capability.source_refs] == ["INTENT.md"]
 
 
+def test_candidate_generator_preserves_unicode_and_normalizes_analysis_names():
+    repository = Repository(
+        id=1,
+        name="VergabeTeilnahme",
+        url="/tmp/vergabe-teilnahme",
+        description=None,
+        branch="main",
+        status="analyzed",
+    )
+    facts = [
+        fact(
+            1,
+            "intent",
+            "INTENT",
+            "INTENT.md",
+            metadata={"source_role": "intent_summary"},
+        )
+    ]
+    chunks = [
+        chunk(
+            1,
+            "intent",
+            "INTENT.md",
+            "# INTENT\n\n"
+            "Vollständiger Implementierungsplan in 12 Ralph-Loop-Workplans.\n\n"
+            "## Intended Capabilities\n\n"
+            "- Analysis of impact risk and dependency chains.\n",
+        )
+    ]
+
+    graph = CandidateGraphGenerator().generate(repository, facts, chunks)
+
+    assert graph[0].name == "Vollständiger Implementierungsplan In 12 Ralph-Loop-Workplans"
+    assert graph[0].capabilities[0].name == "Analyze Impact Risk And Dependency Chains"
+
+
+def test_candidate_generator_extracts_primary_outcome_subsections_from_intent():
+    repository = Repository(
+        id=1,
+        name="HelixForge",
+        url="/tmp/helix-forge",
+        description=None,
+        branch="main",
+        status="analyzed",
+    )
+    facts = [
+        fact(
+            1,
+            "intent",
+            "INTENT",
+            "INTENT.md",
+            metadata={"source_role": "intent_summary"},
+        )
+    ]
+    chunks = [
+        chunk(
+            1,
+            "intent",
+            "INTENT.md",
+            "# INTENT\n\n"
+            "HelixForge turns intent into structure.\n\n"
+            "## 4\\. Primary outcomes\n\n"
+            "### 4.1 Capability discovery\n\n"
+            "Clarify scope and ownership.\n\n"
+            "### 4.2 Capability validation\n\n"
+            "Validate architecture descriptions structurally and semantically.\n\n"
+            "## Architectural foundation\n\n"
+            "This section should not become a capability.\n",
+        )
+    ]
+
+    graph = CandidateGraphGenerator().generate(repository, facts, chunks)
+
+    capability_names = {capability.name for capability in graph[0].capabilities}
+    assert "Support Capability Discovery" in capability_names
+    assert "Validate Capabilities" in capability_names
+    assert "Architectural Foundation" not in capability_names
+
+
 def test_candidate_generator_prefers_intent_over_derived_scope_for_ability_name():
     repository = Repository(
         id=1,
@@ -238,7 +317,7 @@ def test_candidate_generator_extracts_current_capabilities_from_scope_blocks():
             "scope",
             "SCOPE.md",
             "# SCOPE\n\n## One-liner\n"
-            "S5 Workloads and Experience layer of the Railiance OAS Stack.\n\n"
+            "S5 Workloads and Experience layer of the Railiance OAS Stack -- owns applications.\n\n"
             "## Provided Capabilities\n\n"
             "```capability\n"
             "type: infrastructure\n"
@@ -255,6 +334,7 @@ def test_candidate_generator_extracts_current_capabilities_from_scope_blocks():
 
     ability = graph[0]
     assert ability.name == "S5 Workloads And Experience Layer Of The Railiance OAS Stack"
+    assert ability.name == "S5 Workloads And Experience Layer Of The Railiance OAS Stack"
     capability = ability.capabilities[0]
     assert capability.name == "Application workload deployment"
     assert capability.primary_class == "infrastructure"
diff --git a/tests/test_cli.py b/tests/test_cli.py
index f0923b0..aadf47c 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -226,6 +226,19 @@ def test_assess_dataset_cli_reports_sparse_hierarchy_issues(tmp_path):
     )
     repository = service.register_repository(name="Scope Only", url=str(source))
     service.analyze_repository(repository.id, use_llm_assistance=False)
+    (source / "SCOPE.md").write_text(
+        "# SCOPE\n\n"
+        "## One-liner\n"
+        "Scope-only current behavior.\n\n"
+        "## Provided Capabilities\n\n"
+        "```capability\n"
+        "name: Review Latest Scope Facts\n"
+        "type: scope-review\n"
+        "description: Review the latest scope facts instead of stale runs.\n"
+        "```\n",
+        encoding="utf-8",
+    )
+    latest_summary = service.analyze_repository(repository.id, use_llm_assistance=False)
     output_path = tmp_path / "dataset.json"
 
     exit_code = main(
@@ -247,6 +260,7 @@ def test_assess_dataset_cli_reports_sparse_hierarchy_issues(tmp_path):
     assert exit_code == 0
     assert report["schema_version"] == "repo-scoping-dataset-assessment/v1"
     assert repo_report["name"] == "Scope Only"
+    assert repo_report["latest_analysis_run_id"] == latest_summary.analysis_run.id
     assert repo_report["documents"]["SCOPE.md"] is True
     assert repo_report["candidate_counts"]["capabilities"] >= 1
     assert repo_report["dependency_graph"]["node_count"] > 0
diff --git a/tests/test_registry_service.py b/tests/test_registry_service.py
index e72db22..5f71314 100644
--- a/tests/test_registry_service.py
+++ b/tests/test_registry_service.py
@@ -541,6 +541,50 @@ def test_dependency_graph_renders_candidate_fallback_when_approved_hierarchy_mis
     assert any(edge["dependencyType"] == "draft-supports" for edge in edges)
 
 
+def test_dependency_graph_candidate_fallback_uses_latest_completed_run(tmp_path):
+    service = make_service(tmp_path)
+    source = tmp_path / "latest-scope-candidate"
+    source.mkdir()
+    (source / "SCOPE.md").write_text(
+        "# SCOPE\n\n## One-liner\nOld scope summary.\n",
+        encoding="utf-8",
+    )
+    repository = service.register_repository(name="Latest Scope Candidate", url=str(source))
+    service.analyze_repository(
+        repository.id,
+        source_path=str(source),
+        use_llm_assistance=False,
+    )
+    (source / "SCOPE.md").write_text(
+        "# SCOPE\n\n"
+        "## One-liner\n"
+        "Latest scope summary.\n\n"
+        "## Provided Capabilities\n\n"
+        "```capability\n"
+        "type: review\n"
+        "title: Latest Scope Capability\n"
+        "description: The second run should drive graph fallback.\n"
+        "```\n",
+        encoding="utf-8",
+    )
+    latest = service.analyze_repository(
+        repository.id,
+        source_path=str(source),
+        use_llm_assistance=False,
+    )
+
+    payload = service.dependency_graph_elements(repository.id, use_latest_profile=False)
+
+    labels = {
+        element["data"].get("label")
+        for element in payload["elements"]
+        if "source" not in element["data"]
+    }
+    assert latest.analysis_run.id == service.list_analysis_runs(repository.id)[0].id
+    assert "Latest Scope Capability" in labels
+    assert "Old Scope Summary" not in labels
+
+
 def test_manual_registry_updates_and_deletes_approved_entries(tmp_path):
     service = make_service(tmp_path)
     repository = service.register_repository(
diff --git a/workplans/RREG-WP-0018-agentic-hierarchy-and-intent-scope-review.md b/workplans/RREG-WP-0018-agentic-hierarchy-and-intent-scope-review.md
index 4263bca..9fa2a7d 100644
--- a/workplans/RREG-WP-0018-agentic-hierarchy-and-intent-scope-review.md
+++ b/workplans/RREG-WP-0018-agentic-hierarchy-and-intent-scope-review.md
@@ -235,8 +235,37 @@ dataset:
   scope-vs-intent separation; deterministic gates can require review but do not
   accept registry truth.
 
-The latest local assessment command currently sees nine repositories because
-`vantage-point` has been added. It still reports old sparse Railiance candidate
-counts because those stored analysis runs predate this implementation. T07 stays
-open until the affected repositories are rerun and compared against the sparse
-baseline.
+The latest local assessment command initially saw nine repositories because
+`vantage-point` had been added. It still reported old sparse Railiance candidate
+counts because those stored analysis runs predated this implementation. T07
+stays open until the affected repositories are rerun and compared against the
+sparse baseline.
+
+## Rerun Review 2026-05-16
+
+The local dataset now contains ten repositories and several post-implementation
+reruns. A review found that `assess-dataset` and the dependency graph fallback
+were incorrectly selecting the oldest completed analysis run because
+`list_analysis_runs` is sorted newest-first. That has been corrected.
+
+Corrected assessment results:
+- Dataset total: `10` repos, `430` facts, candidate hierarchy
+  `10/26/36/44`, graph `210/387`.
+- Improved: `railiance-cluster` now has `3` capabilities / `3` features;
+  `railiance-platform` has `3` / `3`; `railiance-enablement` has `2` / `2`;
+  `ops-warden` has repo-specific scope naming and `1` / `2`;
+  `vergabe-teilnahme` has `1` / `4`.
+- Still sparse because they were not rerun after the implementation:
+  `railiance-infra` and `railiance-apps`. Read-only generator preview shows
+  they would now produce `3` and `1` scope-derived capabilities respectively.
+- New sparse repo: `helix-forge`. Its `INTENT.md` uses numbered/escaped
+  `Primary outcomes` sections rather than bullet-based intended capabilities;
+  generator support was added for this shape and preview now yields five
+  outcome-derived capabilities.
+- Naming polish added for reviewability: preserve non-ASCII letters, normalize
+  nominalized capability names such as `Analysis of...` to `Analyze...`, and
+  trim explanatory dash clauses from scope one-liners.
+
+T07 remains in progress until `railiance-infra`, `railiance-apps`, and
+`helix-forge` are rerun and the corrected assessment report is captured as the
+comparison artifact.