Add STATE-WP-0067 attached-repo agent and workplan normalization

Infer workplan prefixes from on-disk filenames instead of first-token derivation, add a frontmatter normalization script, and wire Make targets for dirty-repo sweeps.
2026-06-22 23:15:15 +02:00
parent e4ab64fa54
commit fcb41e8c25
4 changed files with 512 additions and 47 deletions
--- a/19
+++ b/19
@@ -253,6 +253,25 @@ fix-consistency:
 	  $(if $(REPO_PATH),--repo-path "$(REPO_PATH)",); \
 	  e=$$?; [ $$e -eq 2 ] && exit 0 || exit $$e

+## Normalize workplan frontmatter and task status literals in attached repos.
+## Usage: make normalize-attached-workplans REPO=artifact-store
+##        make normalize-attached-workplans DIRTY=1
+normalize-attached-workplans:
+	$(UV) run python scripts/normalize_attached_repo_workplans.py \
+	  $(if $(REPO),--repo "$(REPO)",) \
+	  $(if $(DIRTY),--dirty,) \
+	  $(if $(DRY_RUN),--dry-run,)
+	@test -n "$(REPO)$(DIRTY)" || (echo "ERROR: set REPO=<slug> or DIRTY=1"; exit 1)
+
+## Regenerate AGENTS.md / CLAUDE.md / .claude/rules from templates.
+## Usage: make update-agent-instructions REPO=artifact-store
+##        make update-agent-instructions DIRTY=1
+update-agent-instructions:
+	$(UV) run python scripts/update_agent_instruction_files.py \
+	  $(if $(REPO),--repo "$(REPO)",) \
+	  $(if $(DIRTY),--dirty,)
+	@test -n "$(REPO)$(DIRTY)" || (echo "ERROR: set REPO=<slug> or DIRTY=1"; exit 1)
+
 ## Reconcile measured token sources against State Hub.
 ## Usage: make token-reconcile [SINCE=2026-05-19] [APPLY=1] [ZERO_FALLBACKS=1]
 token-reconcile:
--- a/scripts/normalize_attached_repo_workplans.py
+++ b/scripts/normalize_attached_repo_workplans.py
@@ -0,0 +1,222 @@
+#!/usr/bin/env python3
+"""Normalize workplan frontmatter and task status literals in attached repos."""
+from __future__ import annotations
+
+import argparse
+import json
+import re
+import subprocess
+import sys
+import urllib.request
+from collections import Counter
+from pathlib import Path
+
+API_BASE = "http://127.0.0.1:8000"
+HOME_ROOT = Path("/home/worsch")
+WP_FILE_RE = re.compile(r"^([A-Z][A-Z0-9-]*-WP)-\d+")
+TASK_BLOCK_RE = re.compile(r"```task\n(.*?)```", re.DOTALL)
+TASK_STATUS_MAP = {
+    "blocked": "wait",
+    "in_progress": "progress",
+    "cancelled": "cancel",
+    "canceled": "cancel",
+}
+
+
+def fetch(path: str):
+    with urllib.request.urlopen(f"{API_BASE}{path}") as response:
+        return json.load(response)
+
+
+def dirty_repo_slugs(home: Path = HOME_ROOT) -> list[str]:
+    slugs: list[str] = []
+    for path in sorted(home.iterdir()):
+        if not (path / ".git").is_dir():
+            continue
+        result = subprocess.run(
+            ["git", "-C", str(path), "status", "--porcelain"],
+            capture_output=True,
+            text=True,
+            check=False,
+        )
+        if result.stdout.strip():
+            slugs.append(path.name)
+    return slugs
+
+
+def choose_repos(repos: list[dict], only_slugs: set[str] | None) -> list[dict]:
+    by_slug = {repo["slug"]: repo for repo in repos if repo.get("slug")}
+    if only_slugs is not None:
+        return [by_slug[slug] for slug in sorted(only_slugs) if slug in by_slug]
+    return sorted(by_slug.values(), key=lambda repo: repo["slug"])
+
+
+def split_frontmatter(text: str) -> tuple[str | None, str]:
+    if not text.startswith("---\n"):
+        return None, text
+    end = text.find("\n---", 4)
+    if end == -1:
+        return None, text
+    return text[4:end], text[end + 4 :]
+
+
+def join_frontmatter(frontmatter: str, body: str) -> str:
+    return f"---\n{frontmatter}---{body}"
+
+
+def normalize_frontmatter(frontmatter: str, domain_slug: str, topic_slug: str | None) -> tuple[str, bool]:
+    changed = False
+    fm = frontmatter
+
+    if domain_slug:
+        new_fm, count = re.subn(
+            r"^domain:\s*.+$",
+            f"domain: {domain_slug}",
+            fm,
+            count=1,
+            flags=re.MULTILINE,
+        )
+        if count:
+            fm = new_fm
+            changed = True
+        elif "domain:" not in fm:
+            fm = fm.rstrip() + f"\ndomain: {domain_slug}\n"
+            changed = True
+
+    if topic_slug:
+        if re.search(r"^topic_slug:\s", fm, re.MULTILINE):
+            new_fm, count = re.subn(
+                r"^topic_slug:\s*.+$",
+                f"topic_slug: {topic_slug}",
+                fm,
+                count=1,
+                flags=re.MULTILINE,
+            )
+            if count:
+                fm = new_fm
+                changed = True
+        else:
+            if re.search(r"^domain:\s", fm, re.MULTILINE):
+                fm = re.sub(
+                    r"^(domain:\s*.+)$",
+                    rf"\1\ntopic_slug: {topic_slug}",
+                    fm,
+                    count=1,
+                    flags=re.MULTILINE,
+                )
+            else:
+                fm = fm.rstrip() + f"\ntopic_slug: {topic_slug}\n"
+            changed = True
+
+    return fm, changed
+
+
+def normalize_task_blocks(body: str) -> tuple[str, bool]:
+    changed = False
+
+    def repl(match: re.Match[str]) -> str:
+        nonlocal changed
+        block = match.group(1)
+        updated = block
+        for legacy, canon in TASK_STATUS_MAP.items():
+            new_block, count = re.subn(
+                rf"^status:\s*{re.escape(legacy)}\s*$",
+                f"status: {canon}",
+                updated,
+                count=1,
+                flags=re.MULTILINE,
+            )
+            if count:
+                updated = new_block
+                changed = True
+        return f"```task\n{updated}```"
+
+    return TASK_BLOCK_RE.sub(repl, body), changed
+
+
+def normalize_workplan_file(
+    path: Path,
+    domain_slug: str,
+    topic_slug: str | None,
+    *,
+    dry_run: bool,
+) -> bool:
+    original = path.read_text(encoding="utf-8")
+    frontmatter, body = split_frontmatter(original)
+    if frontmatter is None:
+        return False
+
+    fm, fm_changed = normalize_frontmatter(frontmatter, domain_slug, topic_slug)
+    body, body_changed = normalize_task_blocks(body)
+    if not (fm_changed or body_changed):
+        return False
+
+    updated = join_frontmatter(fm, body)
+    if not dry_run:
+        path.write_text(updated, encoding="utf-8")
+    return True
+
+
+def repo_topic_slug(repo: dict, topics_by_id: dict[str, dict]) -> str | None:
+    topic_id = repo.get("topic_id")
+    if not topic_id:
+        return None
+    topic = topics_by_id.get(topic_id)
+    return topic.get("slug") if topic else None
+
+
+def normalize_repo(repo: dict, topics_by_id: dict[str, dict], *, dry_run: bool) -> list[str]:
+    path = Path(repo["local_path"])
+    workplans_dir = path / "workplans"
+    if not workplans_dir.is_dir():
+        return []
+
+    domain_slug = repo.get("domain_slug") or ""
+    topic_slug = repo_topic_slug(repo, topics_by_id)
+    updated_files: list[str] = []
+
+    for workplan in sorted(workplans_dir.glob("*.md")):
+        if workplan.name.startswith("ADHOC"):
+            continue
+        if normalize_workplan_file(workplan, domain_slug, topic_slug, dry_run=dry_run):
+            updated_files.append(str(workplan.relative_to(path)))
+
+    return updated_files
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("--repo", action="append", dest="repos", help="Repo slug to normalize")
+    parser.add_argument("--dirty", action="store_true", help="Normalize repos with local git changes")
+    parser.add_argument("--dry-run", action="store_true", help="Report changes without writing")
+    args = parser.parse_args()
+
+    only_slugs: set[str] | None
+    if args.repos:
+        only_slugs = set(args.repos)
+    elif args.dirty:
+        only_slugs = set(dirty_repo_slugs())
+    else:
+        parser.error("Specify --repo SLUG and/or --dirty")
+
+    repos = fetch("/repos/")
+    topics = fetch("/topics/?status=active")
+    topics_by_id = {topic["id"]: topic for topic in topics}
+    selected = choose_repos(repos, only_slugs)
+
+    total_files = 0
+    for repo in selected:
+        updated = normalize_repo(repo, topics_by_id, dry_run=args.dry_run)
+        if updated:
+            total_files += len(updated)
+            mode = "would update" if args.dry_run else "updated"
+            print(f"{repo['slug']}: {mode} {len(updated)} workplan(s)")
+            for name in updated:
+                print(f"  - {name}")
+
+    print(f"Done. {total_files} workplan file(s) {'would change' if args.dry_run else 'changed'}.")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/scripts/update_agent_instruction_files.py
+++ b/scripts/update_agent_instruction_files.py
@@ -1,14 +1,20 @@
 from __future__ import annotations

+import argparse
 import json
 import re
+import subprocess
+import sys
 import urllib.request
+from collections import Counter
 from pathlib import Path


 ROOT = Path(__file__).resolve().parent.parent
 TEMPLATE_DIR = ROOT / "scripts" / "project_rules"
 API_BASE = "http://127.0.0.1:8000"
+HOME_ROOT = Path("/home/worsch")
+WP_FILE_RE = re.compile(r"^([A-Z][A-Z0-9-]*-WP)-\d+")


 def fetch(path: str):
@@ -51,11 +57,34 @@ def repo_topic_id(repo: dict, topics: list[dict]) -> str:
    return match["id"] if match else "(none)"


-def wp_prefix(repo_slug: str) -> str:
+def default_wp_prefix(repo_slug: str) -> str:
    first = repo_slug.split("-", 1)[0].upper()
    return f"{first}-WP"


+def infer_wp_prefix(repo_path: Path, repo_slug: str) -> str:
+    """Prefer established on-disk workplan prefixes over first-token derivation."""
+    counts: Counter[str] = Counter()
+    workplans_dir = repo_path / "workplans"
+    if workplans_dir.is_dir():
+        for workplan in workplans_dir.glob("*.md"):
+            if workplan.name.startswith("ADHOC"):
+                continue
+            match = WP_FILE_RE.match(workplan.name)
+            if match:
+                counts[match.group(1)] += 1
+    if not counts:
+        return default_wp_prefix(repo_slug)
+    top_prefix, top_count = counts.most_common(1)[0]
+    if len(counts) > 1:
+        print(
+            f"warning: {repo_slug} has multiple workplan prefixes {dict(counts)}; "
+            f"using {top_prefix} ({top_count} files)",
+            file=sys.stderr,
+        )
+    return top_prefix
+
+
 def brief_domain(path: Path) -> str | None:
    brief = path / ".custodian-brief.md"
    if not brief.exists():
@@ -64,7 +93,23 @@ def brief_domain(path: Path) -> str | None:
    return match.group(1) if match else None


-def choose_repos(repos: list[dict]) -> list[dict]:
+def dirty_repo_slugs(home: Path = HOME_ROOT) -> list[str]:
+    slugs: list[str] = []
+    for path in sorted(home.iterdir()):
+        if not (path / ".git").is_dir():
+            continue
+        result = subprocess.run(
+            ["git", "-C", str(path), "status", "--porcelain"],
+            capture_output=True,
+            text=True,
+            check=False,
+        )
+        if result.stdout.strip():
+            slugs.append(path.name)
+    return slugs
+
+
+def choose_repos(repos: list[dict], only_slugs: set[str] | None = None) -> list[dict]:
    by_path: dict[str, list[dict]] = {}
    for repo in repos:
        local_path = repo.get("local_path") or ""
@@ -83,19 +128,84 @@ def choose_repos(repos: list[dict]) -> list[dict]:
                candidates = domain_matches
        active = [r for r in candidates if r.get("status") == "active"]
        chosen.append(active[0] if active else candidates[0])
+
+    if only_slugs is not None:
+        chosen = [repo for repo in chosen if repo.get("slug") in only_slugs]
    return chosen


-def main() -> None:
+def update_repo(
+    repo: dict,
+    topics: list[dict],
+    *,
+    agents_template: str,
+    claude_template: str,
+    scope_template: str,
+    credential_routing_template: str,
+    rule_templates: dict[str, str],
+) -> str:
+    path = Path(repo["local_path"])
+    repo_slug = repo["slug"]
+    project_name = repo.get("name") or path.name
+    description = repo.get("description") or f"{project_name} - (fill in purpose)"
+    prefix = infer_wp_prefix(path, repo_slug)
+    values = {
+        "PROJECT_NAME": project_name,
+        "PROJECT_DESCRIPTION": description,
+        "DOMAIN": repo.get("domain_slug") or "",
+        "TOPIC_ID": repo_topic_id(repo, topics),
+        "REPO_SLUG": repo_slug,
+        "WP_PREFIX": prefix,
+        "CREDENTIAL_ROUTING": render(
+            credential_routing_template,
+            {
+                "PROJECT_NAME": project_name,
+                "PROJECT_DESCRIPTION": description,
+                "DOMAIN": repo.get("domain_slug") or "",
+                "TOPIC_ID": repo_topic_id(repo, topics),
+                "REPO_SLUG": repo_slug,
+                "WP_PREFIX": prefix,
+            },
+        ),
+    }
+
+    agents_path = path / "AGENTS.md"
+    extensions = read_agents_extensions(agents_path)
+    agents_path.write_text(build_agents_md(agents_template, values, extensions), encoding="utf-8")
+    (path / "CLAUDE.md").write_text(render(claude_template, values), encoding="utf-8")
+    scope_path = path / "SCOPE.md"
+    if not scope_path.exists():
+        scope_path.write_text(render(scope_template, values), encoding="utf-8")
+
+    rules_dir = path / ".claude" / "rules"
+    rules_dir.mkdir(parents=True, exist_ok=True)
+    for name, template in rule_templates.items():
+        (rules_dir / f"{name}.md").write_text(render(template, values), encoding="utf-8")
+
+    return f"{repo_slug}\t{path}\t{prefix}"
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description="Regenerate agent instruction files from templates.")
+    parser.add_argument("--repo", action="append", dest="repos", help="Limit to repo slug(s)")
+    parser.add_argument("--dirty", action="store_true", help="Limit to repos with local git changes")
+    args = parser.parse_args()
+
+    only_slugs: set[str] | None = None
+    if args.repos:
+        only_slugs = set(args.repos)
+    elif args.dirty:
+        only_slugs = set(dirty_repo_slugs())
+
    repos = fetch("/repos/")
    topics = fetch("/topics/?status=active")

    agents_template = (TEMPLATE_DIR / "agents-codex.template").read_text(encoding="utf-8")
    claude_template = (TEMPLATE_DIR / "claude-md.template").read_text(encoding="utf-8")
    scope_template = (TEMPLATE_DIR / "scope.template").read_text(encoding="utf-8")
-    credential_routing_template = (
-        TEMPLATE_DIR / "credential-routing.template"
-    ).read_text(encoding="utf-8")
+    credential_routing_template = (TEMPLATE_DIR / "credential-routing.template").read_text(
+        encoding="utf-8"
+    )
    rule_names = [
        "repo-identity",
        "session-protocol",
@@ -117,54 +227,27 @@ def main() -> None:
                )
            )
        else:
-            rule_templates[name] = (
-                TEMPLATE_DIR / f"{name}.template"
-            ).read_text(encoding="utf-8")
+            rule_templates[name] = (TEMPLATE_DIR / f"{name}.template").read_text(encoding="utf-8")

    updated: list[str] = []
-    for repo in choose_repos(repos):
-        path = Path(repo["local_path"])
-        repo_slug = repo["slug"]
-        project_name = repo.get("name") or path.name
-        description = repo.get("description") or f"{project_name} - (fill in purpose)"
-        values = {
-            "PROJECT_NAME": project_name,
-            "PROJECT_DESCRIPTION": description,
-            "DOMAIN": repo.get("domain_slug") or "",
-            "TOPIC_ID": repo_topic_id(repo, topics),
-            "REPO_SLUG": repo_slug,
-            "WP_PREFIX": wp_prefix(repo_slug),
-            "CREDENTIAL_ROUTING": render(credential_routing_template, {
-                "PROJECT_NAME": project_name,
-                "PROJECT_DESCRIPTION": description,
-                "DOMAIN": repo.get("domain_slug") or "",
-                "TOPIC_ID": repo_topic_id(repo, topics),
-                "REPO_SLUG": repo_slug,
-                "WP_PREFIX": wp_prefix(repo_slug),
-            }),
-        }
-
-        agents_path = path / "AGENTS.md"
-        extensions = read_agents_extensions(agents_path)
-        agents_path.write_text(
-            build_agents_md(agents_template, values, extensions), encoding="utf-8"
+    for repo in choose_repos(repos, only_slugs):
+        updated.append(
+            update_repo(
+                repo,
+                topics,
+                agents_template=agents_template,
+                claude_template=claude_template,
+                scope_template=scope_template,
+                credential_routing_template=credential_routing_template,
+                rule_templates=rule_templates,
+            )
        )
-        (path / "CLAUDE.md").write_text(render(claude_template, values), encoding="utf-8")
-        scope_path = path / "SCOPE.md"
-        if not scope_path.exists():
-            scope_path.write_text(render(scope_template, values), encoding="utf-8")
-
-        rules_dir = path / ".claude" / "rules"
-        rules_dir.mkdir(parents=True, exist_ok=True)
-        for name, template in rule_templates.items():
-            (rules_dir / f"{name}.md").write_text(render(template, values), encoding="utf-8")
-
-        updated.append(f"{repo_slug}\t{path}")

    print(f"Updated {len(updated)} local repo(s):")
    for line in updated:
        print(line)
+    return 0


 if __name__ == "__main__":
-    main()
+    raise SystemExit(main())
--- a/workplans/STATE-WP-0067-attached-repo-agent-normalization.md
+++ b/workplans/STATE-WP-0067-attached-repo-agent-normalization.md
@@ -0,0 +1,141 @@
+---
+id: STATE-WP-0067
+type: workplan
+title: "Attached Repo Agent Instruction And Workplan Frontmatter Normalization"
+domain: custodian
+repo: state-hub
+status: active
+owner: codex
+topic_slug: custodian
+created: "2026-06-22"
+updated: "2026-06-22"
+---
+
+# STATE-WP-0067 — Attached Repo Agent Instruction And Workplan Frontmatter Normalization
+
+## Goal
+
+Close drift introduced by the State Hub agent-instruction template sync across
+attached repos. Agent files were regenerated with a first-token workplan prefix
+(`artifact-store` → `ARTIFACT-WP`) and `domain: infotech`, while existing
+workplan files retain repo-specific prefixes (`ARTIFACT-STORE-WP`, `IRP-WP`, …)
+and legacy frontmatter (`domain: stack` where `stack` is the topic slug).
+
+Per ADR-001, **workplan files are the source of truth**. Agent instructions must
+match on-disk workplan prefixes and frontmatter conventions; workplans are
+renamed only when a repo has no established prefix yet.
+
+## Context
+
+- `scripts/update_agent_instruction_files.py` derives `{WP_PREFIX}` from the
+  first hyphen segment of the repo slug. That is wrong for most registered repos
+  (35+ use intentional abbreviations).
+- Template sync left ~49 repos with local changes (discover via
+  `cd ~ && gitea ll`, or scan `git status --porcelain` under `~/`).
+- Task status canon (`STATE-WP-0052`) is already reflected in regenerated
+  agent files; workplan task blocks may still use legacy literals.
+- `domain` in workplan frontmatter should be the hub **domain slug**
+  (`infotech`), not the topic slug (`stack`). Topic linkage belongs in
+  `topic_slug`.
+
+## Policy
+
+| Layer | Rule |
+|-------|------|
+| Workplan prefix | Infer from existing `workplans/*-WP-NNNN-*.md` filenames; fall back to first-token only when no workplans exist |
+| `domain` frontmatter | Set to repo `domain_slug` from State Hub registration |
+| `topic_slug` frontmatter | Set from registered `topic_id` when present |
+| Task status in workplan blocks | `in_progress→progress`, `blocked→wait`, `cancelled/canceled→cancel` |
+| Agent files | Regenerated from templates using inferred prefix — never overwrite `<!-- REPO-AGENTS-EXTENSIONS -->` tail |
+| Grandfathered prefixes | Short prefixes (`IRP-WP`, `CYA-WP`, …) are canonical for their repo — not migrated to first-token |
+
+## T01 — Inventory repos with local changes
+
+```task
+id: STATE-WP-0067-T01
+status: progress
+priority: high
+```
+
+Enumerate repos with uncommitted changes under `/home/worsch/*/`.
+
+Done when the dirty-repo list is recorded in the T04 run log.
+
+## T02 — Infer workplan prefix from on-disk files
+
+```task
+id: STATE-WP-0067-T02
+status: progress
+priority: high
+```
+
+Update `scripts/update_agent_instruction_files.py` to infer `{WP_PREFIX}` from
+existing workplan filenames before falling back to first-token derivation.
+
+Done when `artifact-store` agent files reference `ARTIFACT-STORE-WP`, not
+`ARTIFACT-WP`.
+
+## T03 — Workplan frontmatter normalization script
+
+```task
+id: STATE-WP-0067-T03
+status: progress
+priority: high
+```
+
+Add `scripts/normalize_attached_repo_workplans.py` to:
+
+- set `domain:` to registered `domain_slug`;
+- set `topic_slug:` from registered topic when missing or wrong;
+- migrate legacy task status literals inside ` ```task ` blocks.
+
+Support `--repo SLUG` and `--dirty` (scan `~/` for porcelain).
+
+## T04 — Apply normalization to dirty repos
+
+```task
+id: STATE-WP-0067-T04
+status: todo
+priority: high
+```
+
+For each dirty repo:
+
+1. `normalize_attached_repo_workplans.py --repo <slug>`
+2. `update_agent_instruction_files.py --repo <slug>` (after T02 filter added)
+3. `make fix-consistency REPO=<slug>` from `~/state-hub`
+
+Done when all dirty repos have clean or warnings-only consistency checks.
+
+## T05 — Commit and push
+
+```task
+id: STATE-WP-0067-T05
+status: todo
+priority: high
+```
+
+Commit agent-instruction and workplan changes per repo with a shared message.
+Push to `origin` where a remote exists.
+
+Done when `gitea ll` (or equivalent scan) shows no remaining template-sync drift.
+
+## T06 — Close workplan
+
+```task
+id: STATE-WP-0067-T06
+status: todo
+priority: medium
+```
+
+Mark tasks done, set workplan `status: finished`, run
+`make fix-consistency REPO=state-hub`.
+
+## Acceptance Criteria
+
+- Agent instructions and workplan files agree on prefix and domain/topic fields
+  for every dirty repo.
+- `artifact-store` keeps `ARTIFACT-STORE-WP-*` filenames and IDs.
+- No `domain: stack` remains where `domain_slug` is `infotech` and `stack` is the
+  topic slug.
+- Dirty repos are committed; hub read model refreshed via fix-consistency.