generated from coulomb/repo-seed
Fix rerun assessment and candidate extraction
This commit is contained in:
@@ -566,17 +566,41 @@ class CandidateGraphGenerator:
|
||||
def _intent_capability_items(self, chunks: list[ContentChunk]) -> list[str]:
|
||||
items: list[str] = []
|
||||
in_capability_section = False
|
||||
capability_section_level = 0
|
||||
for chunk in sorted(chunks, key=lambda item: (item.path, item.start_line)):
|
||||
for raw_line in chunk.text.splitlines():
|
||||
line = raw_line.strip()
|
||||
if not line:
|
||||
continue
|
||||
if line.startswith("#"):
|
||||
heading = line.lstrip("#").strip().lower()
|
||||
in_capability_section = (
|
||||
level = len(line) - len(line.lstrip("#"))
|
||||
heading_text = re.sub(r"\\([._-])", r"\1", line.lstrip("#").strip())
|
||||
heading = re.sub(
|
||||
r"^\d+(?:\.\d+)*\.?\s+",
|
||||
"",
|
||||
heading_text,
|
||||
).lower()
|
||||
if in_capability_section and level > capability_section_level:
|
||||
item = re.sub(
|
||||
r"^\d+(?:\.\d+)*\.?\s+",
|
||||
"",
|
||||
heading_text,
|
||||
)
|
||||
if item and item.lower() not in {"capabilities", "intended capabilities"}:
|
||||
items.append(item)
|
||||
continue
|
||||
opens_capability_section = (
|
||||
"capabilit" in heading
|
||||
or heading in {"primary utility", "core utility"}
|
||||
or heading
|
||||
in {
|
||||
"outcomes",
|
||||
"primary outcomes",
|
||||
"primary utility",
|
||||
"core utility",
|
||||
}
|
||||
)
|
||||
in_capability_section = opens_capability_section
|
||||
capability_section_level = level if opens_capability_section else 0
|
||||
continue
|
||||
if not in_capability_section:
|
||||
continue
|
||||
@@ -594,6 +618,16 @@ class CandidateGraphGenerator:
|
||||
return "Make Connectivity Observable Auditable And Controllable"
|
||||
if "cli tool" in lowered and "mcp" in lowered:
|
||||
return "Expose CLI And MCP Accessible Service"
|
||||
capability_outcomes = {
|
||||
"capability discovery": "Support Capability Discovery",
|
||||
"capability modeling": "Model Capabilities",
|
||||
"capability realisation": "Realize Capabilities",
|
||||
"capability realization": "Realize Capabilities",
|
||||
"capability validation": "Validate Capabilities",
|
||||
"capability evolution": "Evolve Capabilities",
|
||||
}
|
||||
if lowered.strip(" .:-") in capability_outcomes:
|
||||
return capability_outcomes[lowered.strip(" .:-")]
|
||||
candidate = re.split(r"\s+-\s+|\s*:\s*|[.!?]\s+", text.strip(), maxsplit=1)[0]
|
||||
candidate = candidate.strip(" .:-")
|
||||
if not candidate:
|
||||
@@ -601,6 +635,12 @@ class CandidateGraphGenerator:
|
||||
words = candidate.split()
|
||||
if words:
|
||||
words[0] = self._imperative_verb(words[0])
|
||||
if (
|
||||
len(words) > 1
|
||||
and words[0].lower() in {"analyze", "compare", "detect", "explore", "identify", "interpret"}
|
||||
and words[1].lower().strip(",;:") == "of"
|
||||
):
|
||||
words.pop(1)
|
||||
while words and words[-1].lower().strip(",;:") in {"a", "an", "the", "and", "or", "as", "both"}:
|
||||
words.pop()
|
||||
return self._title_from_words(words[:10])
|
||||
@@ -1614,6 +1654,7 @@ class CandidateGraphGenerator:
|
||||
|
||||
def _imperative_purpose(self, text: str) -> str:
|
||||
cleaned = re.sub(r"\s+", " ", text.strip())
|
||||
cleaned = re.split("\\s+(?:-|\\u2013|\\u2014)\\s+", cleaned, maxsplit=1)[0]
|
||||
cleaned = re.split(r"[.!?]\s+", cleaned, maxsplit=1)[0]
|
||||
cleaned = re.sub(
|
||||
r"(?i)^this\s+repository\s+exists\s+to\s+provide\s+(?:an?\s+)?",
|
||||
@@ -1635,8 +1676,14 @@ class CandidateGraphGenerator:
|
||||
return word
|
||||
lower = word.lower().strip(",;:")
|
||||
irregular = {
|
||||
"analysis": "analyze",
|
||||
"comparison": "compare",
|
||||
"detection": "detect",
|
||||
"does": "do",
|
||||
"exploration": "explore",
|
||||
"has": "have",
|
||||
"identification": "identify",
|
||||
"interpretation": "interpret",
|
||||
"is": "be",
|
||||
}
|
||||
if lower in irregular:
|
||||
@@ -1655,7 +1702,7 @@ class CandidateGraphGenerator:
|
||||
|
||||
def _title_from_words(self, words: list[str]) -> str:
|
||||
cleaned_words = [
|
||||
re.sub(r"[^A-Za-z0-9_/{}-]", "", word)
|
||||
re.sub(r"[^\w/{}-]", "", word, flags=re.UNICODE)
|
||||
for word in words
|
||||
]
|
||||
return " ".join(
|
||||
|
||||
@@ -334,7 +334,7 @@ def dataset_assessment(service: RegistryService) -> dict[str, object]:
|
||||
}
|
||||
for repository in service.list_repositories():
|
||||
runs = service.list_analysis_runs(repository.id)
|
||||
latest_run = next((run for run in reversed(runs) if run.status == "completed"), None)
|
||||
latest_run = next((run for run in runs if run.status == "completed"), None)
|
||||
facts = service.list_observed_facts(repository.id, latest_run.id) if latest_run else []
|
||||
chunks = service.list_content_chunks(repository.id, latest_run.id) if latest_run else []
|
||||
candidate_counts = {
|
||||
|
||||
@@ -1581,6 +1581,7 @@ class RegistryService:
|
||||
"kind": kind,
|
||||
"layer": self._dependency_layer(kind),
|
||||
"label": detail.get("label")
|
||||
or detail.get("name")
|
||||
or self._dependency_node_label(repository_id, kind, key, item_id),
|
||||
"reviewState": detail.get("reviewState", "accepted"),
|
||||
"name": detail.get("name")
|
||||
@@ -2724,7 +2725,7 @@ class RegistryService:
|
||||
for run in self.store.list_analysis_runs(repository_id)
|
||||
if run.status == "completed"
|
||||
]
|
||||
return completed[-1] if completed else None
|
||||
return completed[0] if completed else None
|
||||
|
||||
def _candidate_graph_or_none(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user