From fcb41e8c258cd3afa9cce853491c5761f66576b9 Mon Sep 17 00:00:00 2001 From: tegwick Date: Mon, 22 Jun 2026 23:15:15 +0200 Subject: [PATCH] Add STATE-WP-0067 attached-repo agent and workplan normalization Infer workplan prefixes from on-disk filenames instead of first-token derivation, add a frontmatter normalization script, and wire Make targets for dirty-repo sweeps. --- Makefile | 19 ++ scripts/normalize_attached_repo_workplans.py | 222 ++++++++++++++++++ scripts/update_agent_instruction_files.py | 177 ++++++++++---- ...-0067-attached-repo-agent-normalization.md | 141 +++++++++++ 4 files changed, 512 insertions(+), 47 deletions(-) create mode 100644 scripts/normalize_attached_repo_workplans.py create mode 100644 workplans/STATE-WP-0067-attached-repo-agent-normalization.md diff --git a/Makefile b/Makefile index caf48d9..5877c0c 100644 --- a/Makefile +++ b/Makefile @@ -253,6 +253,25 @@ fix-consistency: $(if $(REPO_PATH),--repo-path "$(REPO_PATH)",); \ e=$$?; [ $$e -eq 2 ] && exit 0 || exit $$e +## Normalize workplan frontmatter and task status literals in attached repos. +## Usage: make normalize-attached-workplans REPO=artifact-store +## make normalize-attached-workplans DIRTY=1 +normalize-attached-workplans: + $(UV) run python scripts/normalize_attached_repo_workplans.py \ + $(if $(REPO),--repo "$(REPO)",) \ + $(if $(DIRTY),--dirty,) \ + $(if $(DRY_RUN),--dry-run,) + @test -n "$(REPO)$(DIRTY)" || (echo "ERROR: set REPO= or DIRTY=1"; exit 1) + +## Regenerate AGENTS.md / CLAUDE.md / .claude/rules from templates. +## Usage: make update-agent-instructions REPO=artifact-store +## make update-agent-instructions DIRTY=1 +update-agent-instructions: + $(UV) run python scripts/update_agent_instruction_files.py \ + $(if $(REPO),--repo "$(REPO)",) \ + $(if $(DIRTY),--dirty,) + @test -n "$(REPO)$(DIRTY)" || (echo "ERROR: set REPO= or DIRTY=1"; exit 1) + ## Reconcile measured token sources against State Hub. ## Usage: make token-reconcile [SINCE=2026-05-19] [APPLY=1] [ZERO_FALLBACKS=1] token-reconcile: diff --git a/scripts/normalize_attached_repo_workplans.py b/scripts/normalize_attached_repo_workplans.py new file mode 100644 index 0000000..df38fe9 --- /dev/null +++ b/scripts/normalize_attached_repo_workplans.py @@ -0,0 +1,222 @@ +#!/usr/bin/env python3 +"""Normalize workplan frontmatter and task status literals in attached repos.""" +from __future__ import annotations + +import argparse +import json +import re +import subprocess +import sys +import urllib.request +from collections import Counter +from pathlib import Path + +API_BASE = "http://127.0.0.1:8000" +HOME_ROOT = Path("/home/worsch") +WP_FILE_RE = re.compile(r"^([A-Z][A-Z0-9-]*-WP)-\d+") +TASK_BLOCK_RE = re.compile(r"```task\n(.*?)```", re.DOTALL) +TASK_STATUS_MAP = { + "blocked": "wait", + "in_progress": "progress", + "cancelled": "cancel", + "canceled": "cancel", +} + + +def fetch(path: str): + with urllib.request.urlopen(f"{API_BASE}{path}") as response: + return json.load(response) + + +def dirty_repo_slugs(home: Path = HOME_ROOT) -> list[str]: + slugs: list[str] = [] + for path in sorted(home.iterdir()): + if not (path / ".git").is_dir(): + continue + result = subprocess.run( + ["git", "-C", str(path), "status", "--porcelain"], + capture_output=True, + text=True, + check=False, + ) + if result.stdout.strip(): + slugs.append(path.name) + return slugs + + +def choose_repos(repos: list[dict], only_slugs: set[str] | None) -> list[dict]: + by_slug = {repo["slug"]: repo for repo in repos if repo.get("slug")} + if only_slugs is not None: + return [by_slug[slug] for slug in sorted(only_slugs) if slug in by_slug] + return sorted(by_slug.values(), key=lambda repo: repo["slug"]) + + +def split_frontmatter(text: str) -> tuple[str | None, str]: + if not text.startswith("---\n"): + return None, text + end = text.find("\n---", 4) + if end == -1: + return None, text + return text[4:end], text[end + 4 :] + + +def join_frontmatter(frontmatter: str, body: str) -> str: + return f"---\n{frontmatter}---{body}" + + +def normalize_frontmatter(frontmatter: str, domain_slug: str, topic_slug: str | None) -> tuple[str, bool]: + changed = False + fm = frontmatter + + if domain_slug: + new_fm, count = re.subn( + r"^domain:\s*.+$", + f"domain: {domain_slug}", + fm, + count=1, + flags=re.MULTILINE, + ) + if count: + fm = new_fm + changed = True + elif "domain:" not in fm: + fm = fm.rstrip() + f"\ndomain: {domain_slug}\n" + changed = True + + if topic_slug: + if re.search(r"^topic_slug:\s", fm, re.MULTILINE): + new_fm, count = re.subn( + r"^topic_slug:\s*.+$", + f"topic_slug: {topic_slug}", + fm, + count=1, + flags=re.MULTILINE, + ) + if count: + fm = new_fm + changed = True + else: + if re.search(r"^domain:\s", fm, re.MULTILINE): + fm = re.sub( + r"^(domain:\s*.+)$", + rf"\1\ntopic_slug: {topic_slug}", + fm, + count=1, + flags=re.MULTILINE, + ) + else: + fm = fm.rstrip() + f"\ntopic_slug: {topic_slug}\n" + changed = True + + return fm, changed + + +def normalize_task_blocks(body: str) -> tuple[str, bool]: + changed = False + + def repl(match: re.Match[str]) -> str: + nonlocal changed + block = match.group(1) + updated = block + for legacy, canon in TASK_STATUS_MAP.items(): + new_block, count = re.subn( + rf"^status:\s*{re.escape(legacy)}\s*$", + f"status: {canon}", + updated, + count=1, + flags=re.MULTILINE, + ) + if count: + updated = new_block + changed = True + return f"```task\n{updated}```" + + return TASK_BLOCK_RE.sub(repl, body), changed + + +def normalize_workplan_file( + path: Path, + domain_slug: str, + topic_slug: str | None, + *, + dry_run: bool, +) -> bool: + original = path.read_text(encoding="utf-8") + frontmatter, body = split_frontmatter(original) + if frontmatter is None: + return False + + fm, fm_changed = normalize_frontmatter(frontmatter, domain_slug, topic_slug) + body, body_changed = normalize_task_blocks(body) + if not (fm_changed or body_changed): + return False + + updated = join_frontmatter(fm, body) + if not dry_run: + path.write_text(updated, encoding="utf-8") + return True + + +def repo_topic_slug(repo: dict, topics_by_id: dict[str, dict]) -> str | None: + topic_id = repo.get("topic_id") + if not topic_id: + return None + topic = topics_by_id.get(topic_id) + return topic.get("slug") if topic else None + + +def normalize_repo(repo: dict, topics_by_id: dict[str, dict], *, dry_run: bool) -> list[str]: + path = Path(repo["local_path"]) + workplans_dir = path / "workplans" + if not workplans_dir.is_dir(): + return [] + + domain_slug = repo.get("domain_slug") or "" + topic_slug = repo_topic_slug(repo, topics_by_id) + updated_files: list[str] = [] + + for workplan in sorted(workplans_dir.glob("*.md")): + if workplan.name.startswith("ADHOC"): + continue + if normalize_workplan_file(workplan, domain_slug, topic_slug, dry_run=dry_run): + updated_files.append(str(workplan.relative_to(path))) + + return updated_files + + +def main() -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--repo", action="append", dest="repos", help="Repo slug to normalize") + parser.add_argument("--dirty", action="store_true", help="Normalize repos with local git changes") + parser.add_argument("--dry-run", action="store_true", help="Report changes without writing") + args = parser.parse_args() + + only_slugs: set[str] | None + if args.repos: + only_slugs = set(args.repos) + elif args.dirty: + only_slugs = set(dirty_repo_slugs()) + else: + parser.error("Specify --repo SLUG and/or --dirty") + + repos = fetch("/repos/") + topics = fetch("/topics/?status=active") + topics_by_id = {topic["id"]: topic for topic in topics} + selected = choose_repos(repos, only_slugs) + + total_files = 0 + for repo in selected: + updated = normalize_repo(repo, topics_by_id, dry_run=args.dry_run) + if updated: + total_files += len(updated) + mode = "would update" if args.dry_run else "updated" + print(f"{repo['slug']}: {mode} {len(updated)} workplan(s)") + for name in updated: + print(f" - {name}") + + print(f"Done. {total_files} workplan file(s) {'would change' if args.dry_run else 'changed'}.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/scripts/update_agent_instruction_files.py b/scripts/update_agent_instruction_files.py index da12998..31c5814 100644 --- a/scripts/update_agent_instruction_files.py +++ b/scripts/update_agent_instruction_files.py @@ -1,14 +1,20 @@ from __future__ import annotations +import argparse import json import re +import subprocess +import sys import urllib.request +from collections import Counter from pathlib import Path ROOT = Path(__file__).resolve().parent.parent TEMPLATE_DIR = ROOT / "scripts" / "project_rules" API_BASE = "http://127.0.0.1:8000" +HOME_ROOT = Path("/home/worsch") +WP_FILE_RE = re.compile(r"^([A-Z][A-Z0-9-]*-WP)-\d+") def fetch(path: str): @@ -51,11 +57,34 @@ def repo_topic_id(repo: dict, topics: list[dict]) -> str: return match["id"] if match else "(none)" -def wp_prefix(repo_slug: str) -> str: +def default_wp_prefix(repo_slug: str) -> str: first = repo_slug.split("-", 1)[0].upper() return f"{first}-WP" +def infer_wp_prefix(repo_path: Path, repo_slug: str) -> str: + """Prefer established on-disk workplan prefixes over first-token derivation.""" + counts: Counter[str] = Counter() + workplans_dir = repo_path / "workplans" + if workplans_dir.is_dir(): + for workplan in workplans_dir.glob("*.md"): + if workplan.name.startswith("ADHOC"): + continue + match = WP_FILE_RE.match(workplan.name) + if match: + counts[match.group(1)] += 1 + if not counts: + return default_wp_prefix(repo_slug) + top_prefix, top_count = counts.most_common(1)[0] + if len(counts) > 1: + print( + f"warning: {repo_slug} has multiple workplan prefixes {dict(counts)}; " + f"using {top_prefix} ({top_count} files)", + file=sys.stderr, + ) + return top_prefix + + def brief_domain(path: Path) -> str | None: brief = path / ".custodian-brief.md" if not brief.exists(): @@ -64,7 +93,23 @@ def brief_domain(path: Path) -> str | None: return match.group(1) if match else None -def choose_repos(repos: list[dict]) -> list[dict]: +def dirty_repo_slugs(home: Path = HOME_ROOT) -> list[str]: + slugs: list[str] = [] + for path in sorted(home.iterdir()): + if not (path / ".git").is_dir(): + continue + result = subprocess.run( + ["git", "-C", str(path), "status", "--porcelain"], + capture_output=True, + text=True, + check=False, + ) + if result.stdout.strip(): + slugs.append(path.name) + return slugs + + +def choose_repos(repos: list[dict], only_slugs: set[str] | None = None) -> list[dict]: by_path: dict[str, list[dict]] = {} for repo in repos: local_path = repo.get("local_path") or "" @@ -83,19 +128,84 @@ def choose_repos(repos: list[dict]) -> list[dict]: candidates = domain_matches active = [r for r in candidates if r.get("status") == "active"] chosen.append(active[0] if active else candidates[0]) + + if only_slugs is not None: + chosen = [repo for repo in chosen if repo.get("slug") in only_slugs] return chosen -def main() -> None: +def update_repo( + repo: dict, + topics: list[dict], + *, + agents_template: str, + claude_template: str, + scope_template: str, + credential_routing_template: str, + rule_templates: dict[str, str], +) -> str: + path = Path(repo["local_path"]) + repo_slug = repo["slug"] + project_name = repo.get("name") or path.name + description = repo.get("description") or f"{project_name} - (fill in purpose)" + prefix = infer_wp_prefix(path, repo_slug) + values = { + "PROJECT_NAME": project_name, + "PROJECT_DESCRIPTION": description, + "DOMAIN": repo.get("domain_slug") or "", + "TOPIC_ID": repo_topic_id(repo, topics), + "REPO_SLUG": repo_slug, + "WP_PREFIX": prefix, + "CREDENTIAL_ROUTING": render( + credential_routing_template, + { + "PROJECT_NAME": project_name, + "PROJECT_DESCRIPTION": description, + "DOMAIN": repo.get("domain_slug") or "", + "TOPIC_ID": repo_topic_id(repo, topics), + "REPO_SLUG": repo_slug, + "WP_PREFIX": prefix, + }, + ), + } + + agents_path = path / "AGENTS.md" + extensions = read_agents_extensions(agents_path) + agents_path.write_text(build_agents_md(agents_template, values, extensions), encoding="utf-8") + (path / "CLAUDE.md").write_text(render(claude_template, values), encoding="utf-8") + scope_path = path / "SCOPE.md" + if not scope_path.exists(): + scope_path.write_text(render(scope_template, values), encoding="utf-8") + + rules_dir = path / ".claude" / "rules" + rules_dir.mkdir(parents=True, exist_ok=True) + for name, template in rule_templates.items(): + (rules_dir / f"{name}.md").write_text(render(template, values), encoding="utf-8") + + return f"{repo_slug}\t{path}\t{prefix}" + + +def main() -> int: + parser = argparse.ArgumentParser(description="Regenerate agent instruction files from templates.") + parser.add_argument("--repo", action="append", dest="repos", help="Limit to repo slug(s)") + parser.add_argument("--dirty", action="store_true", help="Limit to repos with local git changes") + args = parser.parse_args() + + only_slugs: set[str] | None = None + if args.repos: + only_slugs = set(args.repos) + elif args.dirty: + only_slugs = set(dirty_repo_slugs()) + repos = fetch("/repos/") topics = fetch("/topics/?status=active") agents_template = (TEMPLATE_DIR / "agents-codex.template").read_text(encoding="utf-8") claude_template = (TEMPLATE_DIR / "claude-md.template").read_text(encoding="utf-8") scope_template = (TEMPLATE_DIR / "scope.template").read_text(encoding="utf-8") - credential_routing_template = ( - TEMPLATE_DIR / "credential-routing.template" - ).read_text(encoding="utf-8") + credential_routing_template = (TEMPLATE_DIR / "credential-routing.template").read_text( + encoding="utf-8" + ) rule_names = [ "repo-identity", "session-protocol", @@ -117,54 +227,27 @@ def main() -> None: ) ) else: - rule_templates[name] = ( - TEMPLATE_DIR / f"{name}.template" - ).read_text(encoding="utf-8") + rule_templates[name] = (TEMPLATE_DIR / f"{name}.template").read_text(encoding="utf-8") updated: list[str] = [] - for repo in choose_repos(repos): - path = Path(repo["local_path"]) - repo_slug = repo["slug"] - project_name = repo.get("name") or path.name - description = repo.get("description") or f"{project_name} - (fill in purpose)" - values = { - "PROJECT_NAME": project_name, - "PROJECT_DESCRIPTION": description, - "DOMAIN": repo.get("domain_slug") or "", - "TOPIC_ID": repo_topic_id(repo, topics), - "REPO_SLUG": repo_slug, - "WP_PREFIX": wp_prefix(repo_slug), - "CREDENTIAL_ROUTING": render(credential_routing_template, { - "PROJECT_NAME": project_name, - "PROJECT_DESCRIPTION": description, - "DOMAIN": repo.get("domain_slug") or "", - "TOPIC_ID": repo_topic_id(repo, topics), - "REPO_SLUG": repo_slug, - "WP_PREFIX": wp_prefix(repo_slug), - }), - } - - agents_path = path / "AGENTS.md" - extensions = read_agents_extensions(agents_path) - agents_path.write_text( - build_agents_md(agents_template, values, extensions), encoding="utf-8" + for repo in choose_repos(repos, only_slugs): + updated.append( + update_repo( + repo, + topics, + agents_template=agents_template, + claude_template=claude_template, + scope_template=scope_template, + credential_routing_template=credential_routing_template, + rule_templates=rule_templates, + ) ) - (path / "CLAUDE.md").write_text(render(claude_template, values), encoding="utf-8") - scope_path = path / "SCOPE.md" - if not scope_path.exists(): - scope_path.write_text(render(scope_template, values), encoding="utf-8") - - rules_dir = path / ".claude" / "rules" - rules_dir.mkdir(parents=True, exist_ok=True) - for name, template in rule_templates.items(): - (rules_dir / f"{name}.md").write_text(render(template, values), encoding="utf-8") - - updated.append(f"{repo_slug}\t{path}") print(f"Updated {len(updated)} local repo(s):") for line in updated: print(line) + return 0 if __name__ == "__main__": - main() + raise SystemExit(main()) \ No newline at end of file diff --git a/workplans/STATE-WP-0067-attached-repo-agent-normalization.md b/workplans/STATE-WP-0067-attached-repo-agent-normalization.md new file mode 100644 index 0000000..72d82c7 --- /dev/null +++ b/workplans/STATE-WP-0067-attached-repo-agent-normalization.md @@ -0,0 +1,141 @@ +--- +id: STATE-WP-0067 +type: workplan +title: "Attached Repo Agent Instruction And Workplan Frontmatter Normalization" +domain: custodian +repo: state-hub +status: active +owner: codex +topic_slug: custodian +created: "2026-06-22" +updated: "2026-06-22" +--- + +# STATE-WP-0067 — Attached Repo Agent Instruction And Workplan Frontmatter Normalization + +## Goal + +Close drift introduced by the State Hub agent-instruction template sync across +attached repos. Agent files were regenerated with a first-token workplan prefix +(`artifact-store` → `ARTIFACT-WP`) and `domain: infotech`, while existing +workplan files retain repo-specific prefixes (`ARTIFACT-STORE-WP`, `IRP-WP`, …) +and legacy frontmatter (`domain: stack` where `stack` is the topic slug). + +Per ADR-001, **workplan files are the source of truth**. Agent instructions must +match on-disk workplan prefixes and frontmatter conventions; workplans are +renamed only when a repo has no established prefix yet. + +## Context + +- `scripts/update_agent_instruction_files.py` derives `{WP_PREFIX}` from the + first hyphen segment of the repo slug. That is wrong for most registered repos + (35+ use intentional abbreviations). +- Template sync left ~49 repos with local changes (discover via + `cd ~ && gitea ll`, or scan `git status --porcelain` under `~/`). +- Task status canon (`STATE-WP-0052`) is already reflected in regenerated + agent files; workplan task blocks may still use legacy literals. +- `domain` in workplan frontmatter should be the hub **domain slug** + (`infotech`), not the topic slug (`stack`). Topic linkage belongs in + `topic_slug`. + +## Policy + +| Layer | Rule | +|-------|------| +| Workplan prefix | Infer from existing `workplans/*-WP-NNNN-*.md` filenames; fall back to first-token only when no workplans exist | +| `domain` frontmatter | Set to repo `domain_slug` from State Hub registration | +| `topic_slug` frontmatter | Set from registered `topic_id` when present | +| Task status in workplan blocks | `in_progress→progress`, `blocked→wait`, `cancelled/canceled→cancel` | +| Agent files | Regenerated from templates using inferred prefix — never overwrite `` tail | +| Grandfathered prefixes | Short prefixes (`IRP-WP`, `CYA-WP`, …) are canonical for their repo — not migrated to first-token | + +## T01 — Inventory repos with local changes + +```task +id: STATE-WP-0067-T01 +status: progress +priority: high +``` + +Enumerate repos with uncommitted changes under `/home/worsch/*/`. + +Done when the dirty-repo list is recorded in the T04 run log. + +## T02 — Infer workplan prefix from on-disk files + +```task +id: STATE-WP-0067-T02 +status: progress +priority: high +``` + +Update `scripts/update_agent_instruction_files.py` to infer `{WP_PREFIX}` from +existing workplan filenames before falling back to first-token derivation. + +Done when `artifact-store` agent files reference `ARTIFACT-STORE-WP`, not +`ARTIFACT-WP`. + +## T03 — Workplan frontmatter normalization script + +```task +id: STATE-WP-0067-T03 +status: progress +priority: high +``` + +Add `scripts/normalize_attached_repo_workplans.py` to: + +- set `domain:` to registered `domain_slug`; +- set `topic_slug:` from registered topic when missing or wrong; +- migrate legacy task status literals inside ` ```task ` blocks. + +Support `--repo SLUG` and `--dirty` (scan `~/` for porcelain). + +## T04 — Apply normalization to dirty repos + +```task +id: STATE-WP-0067-T04 +status: todo +priority: high +``` + +For each dirty repo: + +1. `normalize_attached_repo_workplans.py --repo ` +2. `update_agent_instruction_files.py --repo ` (after T02 filter added) +3. `make fix-consistency REPO=` from `~/state-hub` + +Done when all dirty repos have clean or warnings-only consistency checks. + +## T05 — Commit and push + +```task +id: STATE-WP-0067-T05 +status: todo +priority: high +``` + +Commit agent-instruction and workplan changes per repo with a shared message. +Push to `origin` where a remote exists. + +Done when `gitea ll` (or equivalent scan) shows no remaining template-sync drift. + +## T06 — Close workplan + +```task +id: STATE-WP-0067-T06 +status: todo +priority: medium +``` + +Mark tasks done, set workplan `status: finished`, run +`make fix-consistency REPO=state-hub`. + +## Acceptance Criteria + +- Agent instructions and workplan files agree on prefix and domain/topic fields + for every dirty repo. +- `artifact-store` keeps `ARTIFACT-STORE-WP-*` filenames and IDs. +- No `domain: stack` remains where `domain_slug` is `infotech` and `stack` is the + topic slug. +- Dirty repos are committed; hub read model refreshed via fix-consistency. \ No newline at end of file