Fix workplan frontmatter join and prefix inference (STATE-WP-0067)

Repair glued --- delimiters, infer prefixes from frontmatter ids, and support bare WP-* workplan schemes.
2026-06-22 23:16:15 +02:00
parent fcb41e8c25
commit ae2302df64
2 changed files with 37 additions and 8 deletions
--- a/scripts/normalize_attached_repo_workplans.py
+++ b/scripts/normalize_attached_repo_workplans.py
@@ -51,17 +51,36 @@ def choose_repos(repos: list[dict], only_slugs: set[str] | None) -> list[dict]:
    return sorted(by_slug.values(), key=lambda repo: repo["slug"])


+def repair_frontmatter_delimiter(text: str) -> str:
+    """Fix a glued closing --- delimiter introduced by an earlier buggy join."""
+    if not text.startswith("---\n"):
+        return text
+    repaired = re.sub(r'\"---', '"\n---', text, count=1)
+    repaired = re.sub(r"(\d{4}-\d{2}-\d{2})---", r"\1\n---", repaired, count=1)
+    repaired = re.sub(r"([0-9a-f-]{36})---", r"\1\n---", repaired, count=1)
+    if repaired != text and not repaired.split("---", 2)[1].endswith("\n"):
+        repaired = repaired.replace("\n---\n", "\n---\n", 1)
+    return repaired
+
+
 def split_frontmatter(text: str) -> tuple[str | None, str]:
+    text = repair_frontmatter_delimiter(text)
    if not text.startswith("---\n"):
        return None, text
    end = text.find("\n---", 4)
    if end == -1:
        return None, text
-    return text[4:end], text[end + 4 :]
+    body = text[end + 4 :]
+    if body and not body.startswith("\n"):
+        body = "\n" + body
+    return text[4:end], body


 def join_frontmatter(frontmatter: str, body: str) -> str:
-    return f"---\n{frontmatter}---{body}"
+    fm = frontmatter.rstrip("\n") + "\n"
+    if body and not body.startswith("\n"):
+        body = "\n" + body
+    return f"---\n{fm}---{body}"


 def normalize_frontmatter(frontmatter: str, domain_slug: str, topic_slug: str | None) -> tuple[str, bool]:
@@ -142,13 +161,15 @@ def normalize_workplan_file(
    dry_run: bool,
 ) -> bool:
    original = path.read_text(encoding="utf-8")
-    frontmatter, body = split_frontmatter(original)
+    repaired = repair_frontmatter_delimiter(original)
+    frontmatter, body = split_frontmatter(repaired)
    if frontmatter is None:
        return False

    fm, fm_changed = normalize_frontmatter(frontmatter, domain_slug, topic_slug)
    body, body_changed = normalize_task_blocks(body)
-    if not (fm_changed or body_changed):
+    delimiter_changed = repaired != original
+    if not (fm_changed or body_changed or delimiter_changed):
        return False

    updated = join_frontmatter(fm, body)
--- a/scripts/update_agent_instruction_files.py
+++ b/scripts/update_agent_instruction_files.py
@@ -14,7 +14,10 @@ ROOT = Path(__file__).resolve().parent.parent
 TEMPLATE_DIR = ROOT / "scripts" / "project_rules"
 API_BASE = "http://127.0.0.1:8000"
 HOME_ROOT = Path("/home/worsch")
-WP_FILE_RE = re.compile(r"^([A-Z][A-Z0-9-]*-WP)-\d+")
+WP_FILE_RE = re.compile(r"^([A-Za-z][A-Za-z0-9-]*-WP)-\d+", re.IGNORECASE)
+WP_BARE_RE = re.compile(r"^(WP)-\d+")
+ID_PREFIX_RE = re.compile(r"^id:\s*([A-Z][A-Z0-9-]*-WP)-\d+", re.MULTILINE)
+ID_BARE_RE = re.compile(r"^id:\s*(WP)-\d+", re.MULTILINE)


 def fetch(path: str):
@@ -70,9 +73,14 @@ def infer_wp_prefix(repo_path: Path, repo_slug: str) -> str:
        for workplan in workplans_dir.glob("*.md"):
            if workplan.name.startswith("ADHOC"):
                continue
-            match = WP_FILE_RE.match(workplan.name)
-            if match:
-                counts[match.group(1)] += 1
+            text = workplan.read_text(encoding="utf-8", errors="replace")
+            id_match = ID_PREFIX_RE.search(text) or ID_BARE_RE.search(text)
+            if id_match:
+                counts[id_match.group(1)] += 1
+                continue
+            file_match = WP_FILE_RE.match(workplan.name) or WP_BARE_RE.match(workplan.name)
+            if file_match:
+                counts[file_match.group(1).upper()] += 1
    if not counts:
        return default_wp_prefix(repo_slug)
    top_prefix, top_count = counts.most_common(1)[0]