Fix rerun assessment and candidate extraction

This commit is contained in:
2026-05-16 00:57:44 +02:00
parent bee770fad7
commit 3e906c1dd4
7 changed files with 227 additions and 12 deletions

View File

@@ -566,17 +566,41 @@ class CandidateGraphGenerator:
def _intent_capability_items(self, chunks: list[ContentChunk]) -> list[str]: def _intent_capability_items(self, chunks: list[ContentChunk]) -> list[str]:
items: list[str] = [] items: list[str] = []
in_capability_section = False in_capability_section = False
capability_section_level = 0
for chunk in sorted(chunks, key=lambda item: (item.path, item.start_line)): for chunk in sorted(chunks, key=lambda item: (item.path, item.start_line)):
for raw_line in chunk.text.splitlines(): for raw_line in chunk.text.splitlines():
line = raw_line.strip() line = raw_line.strip()
if not line: if not line:
continue continue
if line.startswith("#"): if line.startswith("#"):
heading = line.lstrip("#").strip().lower() level = len(line) - len(line.lstrip("#"))
in_capability_section = ( heading_text = re.sub(r"\\([._-])", r"\1", line.lstrip("#").strip())
"capabilit" in heading heading = re.sub(
or heading in {"primary utility", "core utility"} r"^\d+(?:\.\d+)*\.?\s+",
"",
heading_text,
).lower()
if in_capability_section and level > capability_section_level:
item = re.sub(
r"^\d+(?:\.\d+)*\.?\s+",
"",
heading_text,
) )
if item and item.lower() not in {"capabilities", "intended capabilities"}:
items.append(item)
continue
opens_capability_section = (
"capabilit" in heading
or heading
in {
"outcomes",
"primary outcomes",
"primary utility",
"core utility",
}
)
in_capability_section = opens_capability_section
capability_section_level = level if opens_capability_section else 0
continue continue
if not in_capability_section: if not in_capability_section:
continue continue
@@ -594,6 +618,16 @@ class CandidateGraphGenerator:
return "Make Connectivity Observable Auditable And Controllable" return "Make Connectivity Observable Auditable And Controllable"
if "cli tool" in lowered and "mcp" in lowered: if "cli tool" in lowered and "mcp" in lowered:
return "Expose CLI And MCP Accessible Service" return "Expose CLI And MCP Accessible Service"
capability_outcomes = {
"capability discovery": "Support Capability Discovery",
"capability modeling": "Model Capabilities",
"capability realisation": "Realize Capabilities",
"capability realization": "Realize Capabilities",
"capability validation": "Validate Capabilities",
"capability evolution": "Evolve Capabilities",
}
if lowered.strip(" .:-") in capability_outcomes:
return capability_outcomes[lowered.strip(" .:-")]
candidate = re.split(r"\s+-\s+|\s*:\s*|[.!?]\s+", text.strip(), maxsplit=1)[0] candidate = re.split(r"\s+-\s+|\s*:\s*|[.!?]\s+", text.strip(), maxsplit=1)[0]
candidate = candidate.strip(" .:-") candidate = candidate.strip(" .:-")
if not candidate: if not candidate:
@@ -601,6 +635,12 @@ class CandidateGraphGenerator:
words = candidate.split() words = candidate.split()
if words: if words:
words[0] = self._imperative_verb(words[0]) words[0] = self._imperative_verb(words[0])
if (
len(words) > 1
and words[0].lower() in {"analyze", "compare", "detect", "explore", "identify", "interpret"}
and words[1].lower().strip(",;:") == "of"
):
words.pop(1)
while words and words[-1].lower().strip(",;:") in {"a", "an", "the", "and", "or", "as", "both"}: while words and words[-1].lower().strip(",;:") in {"a", "an", "the", "and", "or", "as", "both"}:
words.pop() words.pop()
return self._title_from_words(words[:10]) return self._title_from_words(words[:10])
@@ -1614,6 +1654,7 @@ class CandidateGraphGenerator:
def _imperative_purpose(self, text: str) -> str: def _imperative_purpose(self, text: str) -> str:
cleaned = re.sub(r"\s+", " ", text.strip()) cleaned = re.sub(r"\s+", " ", text.strip())
cleaned = re.split("\\s+(?:-|\\u2013|\\u2014)\\s+", cleaned, maxsplit=1)[0]
cleaned = re.split(r"[.!?]\s+", cleaned, maxsplit=1)[0] cleaned = re.split(r"[.!?]\s+", cleaned, maxsplit=1)[0]
cleaned = re.sub( cleaned = re.sub(
r"(?i)^this\s+repository\s+exists\s+to\s+provide\s+(?:an?\s+)?", r"(?i)^this\s+repository\s+exists\s+to\s+provide\s+(?:an?\s+)?",
@@ -1635,8 +1676,14 @@ class CandidateGraphGenerator:
return word return word
lower = word.lower().strip(",;:") lower = word.lower().strip(",;:")
irregular = { irregular = {
"analysis": "analyze",
"comparison": "compare",
"detection": "detect",
"does": "do", "does": "do",
"exploration": "explore",
"has": "have", "has": "have",
"identification": "identify",
"interpretation": "interpret",
"is": "be", "is": "be",
} }
if lower in irregular: if lower in irregular:
@@ -1655,7 +1702,7 @@ class CandidateGraphGenerator:
def _title_from_words(self, words: list[str]) -> str: def _title_from_words(self, words: list[str]) -> str:
cleaned_words = [ cleaned_words = [
re.sub(r"[^A-Za-z0-9_/{}-]", "", word) re.sub(r"[^\w/{}-]", "", word, flags=re.UNICODE)
for word in words for word in words
] ]
return " ".join( return " ".join(

View File

@@ -334,7 +334,7 @@ def dataset_assessment(service: RegistryService) -> dict[str, object]:
} }
for repository in service.list_repositories(): for repository in service.list_repositories():
runs = service.list_analysis_runs(repository.id) runs = service.list_analysis_runs(repository.id)
latest_run = next((run for run in reversed(runs) if run.status == "completed"), None) latest_run = next((run for run in runs if run.status == "completed"), None)
facts = service.list_observed_facts(repository.id, latest_run.id) if latest_run else [] facts = service.list_observed_facts(repository.id, latest_run.id) if latest_run else []
chunks = service.list_content_chunks(repository.id, latest_run.id) if latest_run else [] chunks = service.list_content_chunks(repository.id, latest_run.id) if latest_run else []
candidate_counts = { candidate_counts = {

View File

@@ -1581,6 +1581,7 @@ class RegistryService:
"kind": kind, "kind": kind,
"layer": self._dependency_layer(kind), "layer": self._dependency_layer(kind),
"label": detail.get("label") "label": detail.get("label")
or detail.get("name")
or self._dependency_node_label(repository_id, kind, key, item_id), or self._dependency_node_label(repository_id, kind, key, item_id),
"reviewState": detail.get("reviewState", "accepted"), "reviewState": detail.get("reviewState", "accepted"),
"name": detail.get("name") "name": detail.get("name")
@@ -2724,7 +2725,7 @@ class RegistryService:
for run in self.store.list_analysis_runs(repository_id) for run in self.store.list_analysis_runs(repository_id)
if run.status == "completed" if run.status == "completed"
] ]
return completed[-1] if completed else None return completed[0] if completed else None
def _candidate_graph_or_none( def _candidate_graph_or_none(
self, self,

View File

@@ -135,6 +135,85 @@ def test_candidate_generator_extracts_intended_capability_blocks_from_intent_chu
assert [ref.path for ref in intent_capability.source_refs] == ["INTENT.md"] assert [ref.path for ref in intent_capability.source_refs] == ["INTENT.md"]
def test_candidate_generator_preserves_unicode_and_normalizes_analysis_names():
repository = Repository(
id=1,
name="VergabeTeilnahme",
url="/tmp/vergabe-teilnahme",
description=None,
branch="main",
status="analyzed",
)
facts = [
fact(
1,
"intent",
"INTENT",
"INTENT.md",
metadata={"source_role": "intent_summary"},
)
]
chunks = [
chunk(
1,
"intent",
"INTENT.md",
"# INTENT\n\n"
"Vollständiger Implementierungsplan in 12 Ralph-Loop-Workplans.\n\n"
"## Intended Capabilities\n\n"
"- Analysis of impact risk and dependency chains.\n",
)
]
graph = CandidateGraphGenerator().generate(repository, facts, chunks)
assert graph[0].name == "Vollständiger Implementierungsplan In 12 Ralph-Loop-Workplans"
assert graph[0].capabilities[0].name == "Analyze Impact Risk And Dependency Chains"
def test_candidate_generator_extracts_primary_outcome_subsections_from_intent():
repository = Repository(
id=1,
name="HelixForge",
url="/tmp/helix-forge",
description=None,
branch="main",
status="analyzed",
)
facts = [
fact(
1,
"intent",
"INTENT",
"INTENT.md",
metadata={"source_role": "intent_summary"},
)
]
chunks = [
chunk(
1,
"intent",
"INTENT.md",
"# INTENT\n\n"
"HelixForge turns intent into structure.\n\n"
"## 4\\. Primary outcomes\n\n"
"### 4.1 Capability discovery\n\n"
"Clarify scope and ownership.\n\n"
"### 4.2 Capability validation\n\n"
"Validate architecture descriptions structurally and semantically.\n\n"
"## Architectural foundation\n\n"
"This section should not become a capability.\n",
)
]
graph = CandidateGraphGenerator().generate(repository, facts, chunks)
capability_names = {capability.name for capability in graph[0].capabilities}
assert "Support Capability Discovery" in capability_names
assert "Validate Capabilities" in capability_names
assert "Architectural Foundation" not in capability_names
def test_candidate_generator_prefers_intent_over_derived_scope_for_ability_name(): def test_candidate_generator_prefers_intent_over_derived_scope_for_ability_name():
repository = Repository( repository = Repository(
id=1, id=1,
@@ -238,7 +317,7 @@ def test_candidate_generator_extracts_current_capabilities_from_scope_blocks():
"scope", "scope",
"SCOPE.md", "SCOPE.md",
"# SCOPE\n\n## One-liner\n" "# SCOPE\n\n## One-liner\n"
"S5 Workloads and Experience layer of the Railiance OAS Stack.\n\n" "S5 Workloads and Experience layer of the Railiance OAS Stack -- owns applications.\n\n"
"## Provided Capabilities\n\n" "## Provided Capabilities\n\n"
"```capability\n" "```capability\n"
"type: infrastructure\n" "type: infrastructure\n"
@@ -255,6 +334,7 @@ def test_candidate_generator_extracts_current_capabilities_from_scope_blocks():
ability = graph[0] ability = graph[0]
assert ability.name == "S5 Workloads And Experience Layer Of The Railiance OAS Stack" assert ability.name == "S5 Workloads And Experience Layer Of The Railiance OAS Stack"
assert ability.name == "S5 Workloads And Experience Layer Of The Railiance OAS Stack"
capability = ability.capabilities[0] capability = ability.capabilities[0]
assert capability.name == "Application workload deployment" assert capability.name == "Application workload deployment"
assert capability.primary_class == "infrastructure" assert capability.primary_class == "infrastructure"

View File

@@ -226,6 +226,19 @@ def test_assess_dataset_cli_reports_sparse_hierarchy_issues(tmp_path):
) )
repository = service.register_repository(name="Scope Only", url=str(source)) repository = service.register_repository(name="Scope Only", url=str(source))
service.analyze_repository(repository.id, use_llm_assistance=False) service.analyze_repository(repository.id, use_llm_assistance=False)
(source / "SCOPE.md").write_text(
"# SCOPE\n\n"
"## One-liner\n"
"Scope-only current behavior.\n\n"
"## Provided Capabilities\n\n"
"```capability\n"
"name: Review Latest Scope Facts\n"
"type: scope-review\n"
"description: Review the latest scope facts instead of stale runs.\n"
"```\n",
encoding="utf-8",
)
latest_summary = service.analyze_repository(repository.id, use_llm_assistance=False)
output_path = tmp_path / "dataset.json" output_path = tmp_path / "dataset.json"
exit_code = main( exit_code = main(
@@ -247,6 +260,7 @@ def test_assess_dataset_cli_reports_sparse_hierarchy_issues(tmp_path):
assert exit_code == 0 assert exit_code == 0
assert report["schema_version"] == "repo-scoping-dataset-assessment/v1" assert report["schema_version"] == "repo-scoping-dataset-assessment/v1"
assert repo_report["name"] == "Scope Only" assert repo_report["name"] == "Scope Only"
assert repo_report["latest_analysis_run_id"] == latest_summary.analysis_run.id
assert repo_report["documents"]["SCOPE.md"] is True assert repo_report["documents"]["SCOPE.md"] is True
assert repo_report["candidate_counts"]["capabilities"] >= 1 assert repo_report["candidate_counts"]["capabilities"] >= 1
assert repo_report["dependency_graph"]["node_count"] > 0 assert repo_report["dependency_graph"]["node_count"] > 0

View File

@@ -541,6 +541,50 @@ def test_dependency_graph_renders_candidate_fallback_when_approved_hierarchy_mis
assert any(edge["dependencyType"] == "draft-supports" for edge in edges) assert any(edge["dependencyType"] == "draft-supports" for edge in edges)
def test_dependency_graph_candidate_fallback_uses_latest_completed_run(tmp_path):
service = make_service(tmp_path)
source = tmp_path / "latest-scope-candidate"
source.mkdir()
(source / "SCOPE.md").write_text(
"# SCOPE\n\n## One-liner\nOld scope summary.\n",
encoding="utf-8",
)
repository = service.register_repository(name="Latest Scope Candidate", url=str(source))
service.analyze_repository(
repository.id,
source_path=str(source),
use_llm_assistance=False,
)
(source / "SCOPE.md").write_text(
"# SCOPE\n\n"
"## One-liner\n"
"Latest scope summary.\n\n"
"## Provided Capabilities\n\n"
"```capability\n"
"type: review\n"
"title: Latest Scope Capability\n"
"description: The second run should drive graph fallback.\n"
"```\n",
encoding="utf-8",
)
latest = service.analyze_repository(
repository.id,
source_path=str(source),
use_llm_assistance=False,
)
payload = service.dependency_graph_elements(repository.id, use_latest_profile=False)
labels = {
element["data"].get("label")
for element in payload["elements"]
if "source" not in element["data"]
}
assert latest.analysis_run.id == service.list_analysis_runs(repository.id)[0].id
assert "Latest Scope Capability" in labels
assert "Old Scope Summary" not in labels
def test_manual_registry_updates_and_deletes_approved_entries(tmp_path): def test_manual_registry_updates_and_deletes_approved_entries(tmp_path):
service = make_service(tmp_path) service = make_service(tmp_path)
repository = service.register_repository( repository = service.register_repository(

View File

@@ -235,8 +235,37 @@ dataset:
scope-vs-intent separation; deterministic gates can require review but do not scope-vs-intent separation; deterministic gates can require review but do not
accept registry truth. accept registry truth.
The latest local assessment command currently sees nine repositories because The latest local assessment command initially saw nine repositories because
`vantage-point` has been added. It still reports old sparse Railiance candidate `vantage-point` had been added. It still reported old sparse Railiance candidate
counts because those stored analysis runs predate this implementation. T07 stays counts because those stored analysis runs predated this implementation. T07
open until the affected repositories are rerun and compared against the sparse stays open until the affected repositories are rerun and compared against the
baseline. sparse baseline.
## Rerun Review 2026-05-16
The local dataset now contains ten repositories and several post-implementation
reruns. A review found that `assess-dataset` and the dependency graph fallback
were incorrectly selecting the oldest completed analysis run because
`list_analysis_runs` is sorted newest-first. That has been corrected.
Corrected assessment results:
- Dataset total: `10` repos, `430` facts, candidate hierarchy
`10/26/36/44`, graph `210/387`.
- Improved: `railiance-cluster` now has `3` capabilities / `3` features;
`railiance-platform` has `3` / `3`; `railiance-enablement` has `2` / `2`;
`ops-warden` has repo-specific scope naming and `1` / `2`;
`vergabe-teilnahme` has `1` / `4`.
- Still sparse because they were not rerun after the implementation:
`railiance-infra` and `railiance-apps`. Read-only generator preview shows
they would now produce `3` and `1` scope-derived capabilities respectively.
- New sparse repo: `helix-forge`. Its `INTENT.md` uses numbered/escaped
`Primary outcomes` sections rather than bullet-based intended capabilities;
generator support was added for this shape and preview now yields five
outcome-derived capabilities.
- Naming polish added for reviewability: preserve non-ASCII letters, normalize
nominalized capability names such as `Analysis of...` to `Analyze...`, and
trim explanatory dash clauses from scope one-liners.
T07 remains in progress until `railiance-infra`, `railiance-apps`, and
`helix-forge` are rerun and the corrected assessment report is captured as the
comparison artifact.