Add STATE-WP-0067 attached-repo agent and workplan normalization

Infer workplan prefixes from on-disk filenames instead of first-token
derivation, add a frontmatter normalization script, and wire Make targets
for dirty-repo sweeps.
This commit is contained in:
2026-06-22 23:15:15 +02:00
parent e4ab64fa54
commit fcb41e8c25
4 changed files with 512 additions and 47 deletions

View File

@@ -253,6 +253,25 @@ fix-consistency:
$(if $(REPO_PATH),--repo-path "$(REPO_PATH)",); \ $(if $(REPO_PATH),--repo-path "$(REPO_PATH)",); \
e=$$?; [ $$e -eq 2 ] && exit 0 || exit $$e e=$$?; [ $$e -eq 2 ] && exit 0 || exit $$e
## Normalize workplan frontmatter and task status literals in attached repos.
## Usage: make normalize-attached-workplans REPO=artifact-store
## make normalize-attached-workplans DIRTY=1
normalize-attached-workplans:
$(UV) run python scripts/normalize_attached_repo_workplans.py \
$(if $(REPO),--repo "$(REPO)",) \
$(if $(DIRTY),--dirty,) \
$(if $(DRY_RUN),--dry-run,)
@test -n "$(REPO)$(DIRTY)" || (echo "ERROR: set REPO=<slug> or DIRTY=1"; exit 1)
## Regenerate AGENTS.md / CLAUDE.md / .claude/rules from templates.
## Usage: make update-agent-instructions REPO=artifact-store
## make update-agent-instructions DIRTY=1
update-agent-instructions:
$(UV) run python scripts/update_agent_instruction_files.py \
$(if $(REPO),--repo "$(REPO)",) \
$(if $(DIRTY),--dirty,)
@test -n "$(REPO)$(DIRTY)" || (echo "ERROR: set REPO=<slug> or DIRTY=1"; exit 1)
## Reconcile measured token sources against State Hub. ## Reconcile measured token sources against State Hub.
## Usage: make token-reconcile [SINCE=2026-05-19] [APPLY=1] [ZERO_FALLBACKS=1] ## Usage: make token-reconcile [SINCE=2026-05-19] [APPLY=1] [ZERO_FALLBACKS=1]
token-reconcile: token-reconcile:

View File

@@ -0,0 +1,222 @@
#!/usr/bin/env python3
"""Normalize workplan frontmatter and task status literals in attached repos."""
from __future__ import annotations
import argparse
import json
import re
import subprocess
import sys
import urllib.request
from collections import Counter
from pathlib import Path
API_BASE = "http://127.0.0.1:8000"
HOME_ROOT = Path("/home/worsch")
WP_FILE_RE = re.compile(r"^([A-Z][A-Z0-9-]*-WP)-\d+")
TASK_BLOCK_RE = re.compile(r"```task\n(.*?)```", re.DOTALL)
TASK_STATUS_MAP = {
"blocked": "wait",
"in_progress": "progress",
"cancelled": "cancel",
"canceled": "cancel",
}
def fetch(path: str):
with urllib.request.urlopen(f"{API_BASE}{path}") as response:
return json.load(response)
def dirty_repo_slugs(home: Path = HOME_ROOT) -> list[str]:
slugs: list[str] = []
for path in sorted(home.iterdir()):
if not (path / ".git").is_dir():
continue
result = subprocess.run(
["git", "-C", str(path), "status", "--porcelain"],
capture_output=True,
text=True,
check=False,
)
if result.stdout.strip():
slugs.append(path.name)
return slugs
def choose_repos(repos: list[dict], only_slugs: set[str] | None) -> list[dict]:
by_slug = {repo["slug"]: repo for repo in repos if repo.get("slug")}
if only_slugs is not None:
return [by_slug[slug] for slug in sorted(only_slugs) if slug in by_slug]
return sorted(by_slug.values(), key=lambda repo: repo["slug"])
def split_frontmatter(text: str) -> tuple[str | None, str]:
if not text.startswith("---\n"):
return None, text
end = text.find("\n---", 4)
if end == -1:
return None, text
return text[4:end], text[end + 4 :]
def join_frontmatter(frontmatter: str, body: str) -> str:
return f"---\n{frontmatter}---{body}"
def normalize_frontmatter(frontmatter: str, domain_slug: str, topic_slug: str | None) -> tuple[str, bool]:
changed = False
fm = frontmatter
if domain_slug:
new_fm, count = re.subn(
r"^domain:\s*.+$",
f"domain: {domain_slug}",
fm,
count=1,
flags=re.MULTILINE,
)
if count:
fm = new_fm
changed = True
elif "domain:" not in fm:
fm = fm.rstrip() + f"\ndomain: {domain_slug}\n"
changed = True
if topic_slug:
if re.search(r"^topic_slug:\s", fm, re.MULTILINE):
new_fm, count = re.subn(
r"^topic_slug:\s*.+$",
f"topic_slug: {topic_slug}",
fm,
count=1,
flags=re.MULTILINE,
)
if count:
fm = new_fm
changed = True
else:
if re.search(r"^domain:\s", fm, re.MULTILINE):
fm = re.sub(
r"^(domain:\s*.+)$",
rf"\1\ntopic_slug: {topic_slug}",
fm,
count=1,
flags=re.MULTILINE,
)
else:
fm = fm.rstrip() + f"\ntopic_slug: {topic_slug}\n"
changed = True
return fm, changed
def normalize_task_blocks(body: str) -> tuple[str, bool]:
changed = False
def repl(match: re.Match[str]) -> str:
nonlocal changed
block = match.group(1)
updated = block
for legacy, canon in TASK_STATUS_MAP.items():
new_block, count = re.subn(
rf"^status:\s*{re.escape(legacy)}\s*$",
f"status: {canon}",
updated,
count=1,
flags=re.MULTILINE,
)
if count:
updated = new_block
changed = True
return f"```task\n{updated}```"
return TASK_BLOCK_RE.sub(repl, body), changed
def normalize_workplan_file(
path: Path,
domain_slug: str,
topic_slug: str | None,
*,
dry_run: bool,
) -> bool:
original = path.read_text(encoding="utf-8")
frontmatter, body = split_frontmatter(original)
if frontmatter is None:
return False
fm, fm_changed = normalize_frontmatter(frontmatter, domain_slug, topic_slug)
body, body_changed = normalize_task_blocks(body)
if not (fm_changed or body_changed):
return False
updated = join_frontmatter(fm, body)
if not dry_run:
path.write_text(updated, encoding="utf-8")
return True
def repo_topic_slug(repo: dict, topics_by_id: dict[str, dict]) -> str | None:
topic_id = repo.get("topic_id")
if not topic_id:
return None
topic = topics_by_id.get(topic_id)
return topic.get("slug") if topic else None
def normalize_repo(repo: dict, topics_by_id: dict[str, dict], *, dry_run: bool) -> list[str]:
path = Path(repo["local_path"])
workplans_dir = path / "workplans"
if not workplans_dir.is_dir():
return []
domain_slug = repo.get("domain_slug") or ""
topic_slug = repo_topic_slug(repo, topics_by_id)
updated_files: list[str] = []
for workplan in sorted(workplans_dir.glob("*.md")):
if workplan.name.startswith("ADHOC"):
continue
if normalize_workplan_file(workplan, domain_slug, topic_slug, dry_run=dry_run):
updated_files.append(str(workplan.relative_to(path)))
return updated_files
def main() -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--repo", action="append", dest="repos", help="Repo slug to normalize")
parser.add_argument("--dirty", action="store_true", help="Normalize repos with local git changes")
parser.add_argument("--dry-run", action="store_true", help="Report changes without writing")
args = parser.parse_args()
only_slugs: set[str] | None
if args.repos:
only_slugs = set(args.repos)
elif args.dirty:
only_slugs = set(dirty_repo_slugs())
else:
parser.error("Specify --repo SLUG and/or --dirty")
repos = fetch("/repos/")
topics = fetch("/topics/?status=active")
topics_by_id = {topic["id"]: topic for topic in topics}
selected = choose_repos(repos, only_slugs)
total_files = 0
for repo in selected:
updated = normalize_repo(repo, topics_by_id, dry_run=args.dry_run)
if updated:
total_files += len(updated)
mode = "would update" if args.dry_run else "updated"
print(f"{repo['slug']}: {mode} {len(updated)} workplan(s)")
for name in updated:
print(f" - {name}")
print(f"Done. {total_files} workplan file(s) {'would change' if args.dry_run else 'changed'}.")
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -1,14 +1,20 @@
from __future__ import annotations from __future__ import annotations
import argparse
import json import json
import re import re
import subprocess
import sys
import urllib.request import urllib.request
from collections import Counter
from pathlib import Path from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent ROOT = Path(__file__).resolve().parent.parent
TEMPLATE_DIR = ROOT / "scripts" / "project_rules" TEMPLATE_DIR = ROOT / "scripts" / "project_rules"
API_BASE = "http://127.0.0.1:8000" API_BASE = "http://127.0.0.1:8000"
HOME_ROOT = Path("/home/worsch")
WP_FILE_RE = re.compile(r"^([A-Z][A-Z0-9-]*-WP)-\d+")
def fetch(path: str): def fetch(path: str):
@@ -51,11 +57,34 @@ def repo_topic_id(repo: dict, topics: list[dict]) -> str:
return match["id"] if match else "(none)" return match["id"] if match else "(none)"
def wp_prefix(repo_slug: str) -> str: def default_wp_prefix(repo_slug: str) -> str:
first = repo_slug.split("-", 1)[0].upper() first = repo_slug.split("-", 1)[0].upper()
return f"{first}-WP" return f"{first}-WP"
def infer_wp_prefix(repo_path: Path, repo_slug: str) -> str:
"""Prefer established on-disk workplan prefixes over first-token derivation."""
counts: Counter[str] = Counter()
workplans_dir = repo_path / "workplans"
if workplans_dir.is_dir():
for workplan in workplans_dir.glob("*.md"):
if workplan.name.startswith("ADHOC"):
continue
match = WP_FILE_RE.match(workplan.name)
if match:
counts[match.group(1)] += 1
if not counts:
return default_wp_prefix(repo_slug)
top_prefix, top_count = counts.most_common(1)[0]
if len(counts) > 1:
print(
f"warning: {repo_slug} has multiple workplan prefixes {dict(counts)}; "
f"using {top_prefix} ({top_count} files)",
file=sys.stderr,
)
return top_prefix
def brief_domain(path: Path) -> str | None: def brief_domain(path: Path) -> str | None:
brief = path / ".custodian-brief.md" brief = path / ".custodian-brief.md"
if not brief.exists(): if not brief.exists():
@@ -64,7 +93,23 @@ def brief_domain(path: Path) -> str | None:
return match.group(1) if match else None return match.group(1) if match else None
def choose_repos(repos: list[dict]) -> list[dict]: def dirty_repo_slugs(home: Path = HOME_ROOT) -> list[str]:
slugs: list[str] = []
for path in sorted(home.iterdir()):
if not (path / ".git").is_dir():
continue
result = subprocess.run(
["git", "-C", str(path), "status", "--porcelain"],
capture_output=True,
text=True,
check=False,
)
if result.stdout.strip():
slugs.append(path.name)
return slugs
def choose_repos(repos: list[dict], only_slugs: set[str] | None = None) -> list[dict]:
by_path: dict[str, list[dict]] = {} by_path: dict[str, list[dict]] = {}
for repo in repos: for repo in repos:
local_path = repo.get("local_path") or "" local_path = repo.get("local_path") or ""
@@ -83,19 +128,84 @@ def choose_repos(repos: list[dict]) -> list[dict]:
candidates = domain_matches candidates = domain_matches
active = [r for r in candidates if r.get("status") == "active"] active = [r for r in candidates if r.get("status") == "active"]
chosen.append(active[0] if active else candidates[0]) chosen.append(active[0] if active else candidates[0])
if only_slugs is not None:
chosen = [repo for repo in chosen if repo.get("slug") in only_slugs]
return chosen return chosen
def main() -> None: def update_repo(
repo: dict,
topics: list[dict],
*,
agents_template: str,
claude_template: str,
scope_template: str,
credential_routing_template: str,
rule_templates: dict[str, str],
) -> str:
path = Path(repo["local_path"])
repo_slug = repo["slug"]
project_name = repo.get("name") or path.name
description = repo.get("description") or f"{project_name} - (fill in purpose)"
prefix = infer_wp_prefix(path, repo_slug)
values = {
"PROJECT_NAME": project_name,
"PROJECT_DESCRIPTION": description,
"DOMAIN": repo.get("domain_slug") or "",
"TOPIC_ID": repo_topic_id(repo, topics),
"REPO_SLUG": repo_slug,
"WP_PREFIX": prefix,
"CREDENTIAL_ROUTING": render(
credential_routing_template,
{
"PROJECT_NAME": project_name,
"PROJECT_DESCRIPTION": description,
"DOMAIN": repo.get("domain_slug") or "",
"TOPIC_ID": repo_topic_id(repo, topics),
"REPO_SLUG": repo_slug,
"WP_PREFIX": prefix,
},
),
}
agents_path = path / "AGENTS.md"
extensions = read_agents_extensions(agents_path)
agents_path.write_text(build_agents_md(agents_template, values, extensions), encoding="utf-8")
(path / "CLAUDE.md").write_text(render(claude_template, values), encoding="utf-8")
scope_path = path / "SCOPE.md"
if not scope_path.exists():
scope_path.write_text(render(scope_template, values), encoding="utf-8")
rules_dir = path / ".claude" / "rules"
rules_dir.mkdir(parents=True, exist_ok=True)
for name, template in rule_templates.items():
(rules_dir / f"{name}.md").write_text(render(template, values), encoding="utf-8")
return f"{repo_slug}\t{path}\t{prefix}"
def main() -> int:
parser = argparse.ArgumentParser(description="Regenerate agent instruction files from templates.")
parser.add_argument("--repo", action="append", dest="repos", help="Limit to repo slug(s)")
parser.add_argument("--dirty", action="store_true", help="Limit to repos with local git changes")
args = parser.parse_args()
only_slugs: set[str] | None = None
if args.repos:
only_slugs = set(args.repos)
elif args.dirty:
only_slugs = set(dirty_repo_slugs())
repos = fetch("/repos/") repos = fetch("/repos/")
topics = fetch("/topics/?status=active") topics = fetch("/topics/?status=active")
agents_template = (TEMPLATE_DIR / "agents-codex.template").read_text(encoding="utf-8") agents_template = (TEMPLATE_DIR / "agents-codex.template").read_text(encoding="utf-8")
claude_template = (TEMPLATE_DIR / "claude-md.template").read_text(encoding="utf-8") claude_template = (TEMPLATE_DIR / "claude-md.template").read_text(encoding="utf-8")
scope_template = (TEMPLATE_DIR / "scope.template").read_text(encoding="utf-8") scope_template = (TEMPLATE_DIR / "scope.template").read_text(encoding="utf-8")
credential_routing_template = ( credential_routing_template = (TEMPLATE_DIR / "credential-routing.template").read_text(
TEMPLATE_DIR / "credential-routing.template" encoding="utf-8"
).read_text(encoding="utf-8") )
rule_names = [ rule_names = [
"repo-identity", "repo-identity",
"session-protocol", "session-protocol",
@@ -117,54 +227,27 @@ def main() -> None:
) )
) )
else: else:
rule_templates[name] = ( rule_templates[name] = (TEMPLATE_DIR / f"{name}.template").read_text(encoding="utf-8")
TEMPLATE_DIR / f"{name}.template"
).read_text(encoding="utf-8")
updated: list[str] = [] updated: list[str] = []
for repo in choose_repos(repos): for repo in choose_repos(repos, only_slugs):
path = Path(repo["local_path"]) updated.append(
repo_slug = repo["slug"] update_repo(
project_name = repo.get("name") or path.name repo,
description = repo.get("description") or f"{project_name} - (fill in purpose)" topics,
values = { agents_template=agents_template,
"PROJECT_NAME": project_name, claude_template=claude_template,
"PROJECT_DESCRIPTION": description, scope_template=scope_template,
"DOMAIN": repo.get("domain_slug") or "", credential_routing_template=credential_routing_template,
"TOPIC_ID": repo_topic_id(repo, topics), rule_templates=rule_templates,
"REPO_SLUG": repo_slug, )
"WP_PREFIX": wp_prefix(repo_slug),
"CREDENTIAL_ROUTING": render(credential_routing_template, {
"PROJECT_NAME": project_name,
"PROJECT_DESCRIPTION": description,
"DOMAIN": repo.get("domain_slug") or "",
"TOPIC_ID": repo_topic_id(repo, topics),
"REPO_SLUG": repo_slug,
"WP_PREFIX": wp_prefix(repo_slug),
}),
}
agents_path = path / "AGENTS.md"
extensions = read_agents_extensions(agents_path)
agents_path.write_text(
build_agents_md(agents_template, values, extensions), encoding="utf-8"
) )
(path / "CLAUDE.md").write_text(render(claude_template, values), encoding="utf-8")
scope_path = path / "SCOPE.md"
if not scope_path.exists():
scope_path.write_text(render(scope_template, values), encoding="utf-8")
rules_dir = path / ".claude" / "rules"
rules_dir.mkdir(parents=True, exist_ok=True)
for name, template in rule_templates.items():
(rules_dir / f"{name}.md").write_text(render(template, values), encoding="utf-8")
updated.append(f"{repo_slug}\t{path}")
print(f"Updated {len(updated)} local repo(s):") print(f"Updated {len(updated)} local repo(s):")
for line in updated: for line in updated:
print(line) print(line)
return 0
if __name__ == "__main__": if __name__ == "__main__":
main() raise SystemExit(main())

View File

@@ -0,0 +1,141 @@
---
id: STATE-WP-0067
type: workplan
title: "Attached Repo Agent Instruction And Workplan Frontmatter Normalization"
domain: custodian
repo: state-hub
status: active
owner: codex
topic_slug: custodian
created: "2026-06-22"
updated: "2026-06-22"
---
# STATE-WP-0067 — Attached Repo Agent Instruction And Workplan Frontmatter Normalization
## Goal
Close drift introduced by the State Hub agent-instruction template sync across
attached repos. Agent files were regenerated with a first-token workplan prefix
(`artifact-store``ARTIFACT-WP`) and `domain: infotech`, while existing
workplan files retain repo-specific prefixes (`ARTIFACT-STORE-WP`, `IRP-WP`, …)
and legacy frontmatter (`domain: stack` where `stack` is the topic slug).
Per ADR-001, **workplan files are the source of truth**. Agent instructions must
match on-disk workplan prefixes and frontmatter conventions; workplans are
renamed only when a repo has no established prefix yet.
## Context
- `scripts/update_agent_instruction_files.py` derives `{WP_PREFIX}` from the
first hyphen segment of the repo slug. That is wrong for most registered repos
(35+ use intentional abbreviations).
- Template sync left ~49 repos with local changes (discover via
`cd ~ && gitea ll`, or scan `git status --porcelain` under `~/`).
- Task status canon (`STATE-WP-0052`) is already reflected in regenerated
agent files; workplan task blocks may still use legacy literals.
- `domain` in workplan frontmatter should be the hub **domain slug**
(`infotech`), not the topic slug (`stack`). Topic linkage belongs in
`topic_slug`.
## Policy
| Layer | Rule |
|-------|------|
| Workplan prefix | Infer from existing `workplans/*-WP-NNNN-*.md` filenames; fall back to first-token only when no workplans exist |
| `domain` frontmatter | Set to repo `domain_slug` from State Hub registration |
| `topic_slug` frontmatter | Set from registered `topic_id` when present |
| Task status in workplan blocks | `in_progress→progress`, `blocked→wait`, `cancelled/canceled→cancel` |
| Agent files | Regenerated from templates using inferred prefix — never overwrite `<!-- REPO-AGENTS-EXTENSIONS -->` tail |
| Grandfathered prefixes | Short prefixes (`IRP-WP`, `CYA-WP`, …) are canonical for their repo — not migrated to first-token |
## T01 — Inventory repos with local changes
```task
id: STATE-WP-0067-T01
status: progress
priority: high
```
Enumerate repos with uncommitted changes under `/home/worsch/*/`.
Done when the dirty-repo list is recorded in the T04 run log.
## T02 — Infer workplan prefix from on-disk files
```task
id: STATE-WP-0067-T02
status: progress
priority: high
```
Update `scripts/update_agent_instruction_files.py` to infer `{WP_PREFIX}` from
existing workplan filenames before falling back to first-token derivation.
Done when `artifact-store` agent files reference `ARTIFACT-STORE-WP`, not
`ARTIFACT-WP`.
## T03 — Workplan frontmatter normalization script
```task
id: STATE-WP-0067-T03
status: progress
priority: high
```
Add `scripts/normalize_attached_repo_workplans.py` to:
- set `domain:` to registered `domain_slug`;
- set `topic_slug:` from registered topic when missing or wrong;
- migrate legacy task status literals inside ` ```task ` blocks.
Support `--repo SLUG` and `--dirty` (scan `~/` for porcelain).
## T04 — Apply normalization to dirty repos
```task
id: STATE-WP-0067-T04
status: todo
priority: high
```
For each dirty repo:
1. `normalize_attached_repo_workplans.py --repo <slug>`
2. `update_agent_instruction_files.py --repo <slug>` (after T02 filter added)
3. `make fix-consistency REPO=<slug>` from `~/state-hub`
Done when all dirty repos have clean or warnings-only consistency checks.
## T05 — Commit and push
```task
id: STATE-WP-0067-T05
status: todo
priority: high
```
Commit agent-instruction and workplan changes per repo with a shared message.
Push to `origin` where a remote exists.
Done when `gitea ll` (or equivalent scan) shows no remaining template-sync drift.
## T06 — Close workplan
```task
id: STATE-WP-0067-T06
status: todo
priority: medium
```
Mark tasks done, set workplan `status: finished`, run
`make fix-consistency REPO=state-hub`.
## Acceptance Criteria
- Agent instructions and workplan files agree on prefix and domain/topic fields
for every dirty repo.
- `artifact-store` keeps `ARTIFACT-STORE-WP-*` filenames and IDs.
- No `domain: stack` remains where `domain_slug` is `infotech` and `stack` is the
topic slug.
- Dirty repos are committed; hub read model refreshed via fix-consistency.