Add STATE-WP-0067 attached-repo agent and workplan normalization

Infer workplan prefixes from on-disk filenames instead of first-token
derivation, add a frontmatter normalization script, and wire Make targets
for dirty-repo sweeps.
This commit is contained in:
2026-06-22 23:15:15 +02:00
parent e4ab64fa54
commit fcb41e8c25
4 changed files with 512 additions and 47 deletions

View File

@@ -253,6 +253,25 @@ fix-consistency:
$(if $(REPO_PATH),--repo-path "$(REPO_PATH)",); \
e=$$?; [ $$e -eq 2 ] && exit 0 || exit $$e
## Normalize workplan frontmatter and task status literals in attached repos.
## Usage: make normalize-attached-workplans REPO=artifact-store
## make normalize-attached-workplans DIRTY=1
normalize-attached-workplans:
$(UV) run python scripts/normalize_attached_repo_workplans.py \
$(if $(REPO),--repo "$(REPO)",) \
$(if $(DIRTY),--dirty,) \
$(if $(DRY_RUN),--dry-run,)
@test -n "$(REPO)$(DIRTY)" || (echo "ERROR: set REPO=<slug> or DIRTY=1"; exit 1)
## Regenerate AGENTS.md / CLAUDE.md / .claude/rules from templates.
## Usage: make update-agent-instructions REPO=artifact-store
## make update-agent-instructions DIRTY=1
update-agent-instructions:
$(UV) run python scripts/update_agent_instruction_files.py \
$(if $(REPO),--repo "$(REPO)",) \
$(if $(DIRTY),--dirty,)
@test -n "$(REPO)$(DIRTY)" || (echo "ERROR: set REPO=<slug> or DIRTY=1"; exit 1)
## Reconcile measured token sources against State Hub.
## Usage: make token-reconcile [SINCE=2026-05-19] [APPLY=1] [ZERO_FALLBACKS=1]
token-reconcile:

View File

@@ -0,0 +1,222 @@
#!/usr/bin/env python3
"""Normalize workplan frontmatter and task status literals in attached repos."""
from __future__ import annotations
import argparse
import json
import re
import subprocess
import sys
import urllib.request
from collections import Counter
from pathlib import Path
API_BASE = "http://127.0.0.1:8000"
HOME_ROOT = Path("/home/worsch")
WP_FILE_RE = re.compile(r"^([A-Z][A-Z0-9-]*-WP)-\d+")
TASK_BLOCK_RE = re.compile(r"```task\n(.*?)```", re.DOTALL)
TASK_STATUS_MAP = {
"blocked": "wait",
"in_progress": "progress",
"cancelled": "cancel",
"canceled": "cancel",
}
def fetch(path: str):
with urllib.request.urlopen(f"{API_BASE}{path}") as response:
return json.load(response)
def dirty_repo_slugs(home: Path = HOME_ROOT) -> list[str]:
slugs: list[str] = []
for path in sorted(home.iterdir()):
if not (path / ".git").is_dir():
continue
result = subprocess.run(
["git", "-C", str(path), "status", "--porcelain"],
capture_output=True,
text=True,
check=False,
)
if result.stdout.strip():
slugs.append(path.name)
return slugs
def choose_repos(repos: list[dict], only_slugs: set[str] | None) -> list[dict]:
by_slug = {repo["slug"]: repo for repo in repos if repo.get("slug")}
if only_slugs is not None:
return [by_slug[slug] for slug in sorted(only_slugs) if slug in by_slug]
return sorted(by_slug.values(), key=lambda repo: repo["slug"])
def split_frontmatter(text: str) -> tuple[str | None, str]:
if not text.startswith("---\n"):
return None, text
end = text.find("\n---", 4)
if end == -1:
return None, text
return text[4:end], text[end + 4 :]
def join_frontmatter(frontmatter: str, body: str) -> str:
return f"---\n{frontmatter}---{body}"
def normalize_frontmatter(frontmatter: str, domain_slug: str, topic_slug: str | None) -> tuple[str, bool]:
changed = False
fm = frontmatter
if domain_slug:
new_fm, count = re.subn(
r"^domain:\s*.+$",
f"domain: {domain_slug}",
fm,
count=1,
flags=re.MULTILINE,
)
if count:
fm = new_fm
changed = True
elif "domain:" not in fm:
fm = fm.rstrip() + f"\ndomain: {domain_slug}\n"
changed = True
if topic_slug:
if re.search(r"^topic_slug:\s", fm, re.MULTILINE):
new_fm, count = re.subn(
r"^topic_slug:\s*.+$",
f"topic_slug: {topic_slug}",
fm,
count=1,
flags=re.MULTILINE,
)
if count:
fm = new_fm
changed = True
else:
if re.search(r"^domain:\s", fm, re.MULTILINE):
fm = re.sub(
r"^(domain:\s*.+)$",
rf"\1\ntopic_slug: {topic_slug}",
fm,
count=1,
flags=re.MULTILINE,
)
else:
fm = fm.rstrip() + f"\ntopic_slug: {topic_slug}\n"
changed = True
return fm, changed
def normalize_task_blocks(body: str) -> tuple[str, bool]:
changed = False
def repl(match: re.Match[str]) -> str:
nonlocal changed
block = match.group(1)
updated = block
for legacy, canon in TASK_STATUS_MAP.items():
new_block, count = re.subn(
rf"^status:\s*{re.escape(legacy)}\s*$",
f"status: {canon}",
updated,
count=1,
flags=re.MULTILINE,
)
if count:
updated = new_block
changed = True
return f"```task\n{updated}```"
return TASK_BLOCK_RE.sub(repl, body), changed
def normalize_workplan_file(
path: Path,
domain_slug: str,
topic_slug: str | None,
*,
dry_run: bool,
) -> bool:
original = path.read_text(encoding="utf-8")
frontmatter, body = split_frontmatter(original)
if frontmatter is None:
return False
fm, fm_changed = normalize_frontmatter(frontmatter, domain_slug, topic_slug)
body, body_changed = normalize_task_blocks(body)
if not (fm_changed or body_changed):
return False
updated = join_frontmatter(fm, body)
if not dry_run:
path.write_text(updated, encoding="utf-8")
return True
def repo_topic_slug(repo: dict, topics_by_id: dict[str, dict]) -> str | None:
topic_id = repo.get("topic_id")
if not topic_id:
return None
topic = topics_by_id.get(topic_id)
return topic.get("slug") if topic else None
def normalize_repo(repo: dict, topics_by_id: dict[str, dict], *, dry_run: bool) -> list[str]:
path = Path(repo["local_path"])
workplans_dir = path / "workplans"
if not workplans_dir.is_dir():
return []
domain_slug = repo.get("domain_slug") or ""
topic_slug = repo_topic_slug(repo, topics_by_id)
updated_files: list[str] = []
for workplan in sorted(workplans_dir.glob("*.md")):
if workplan.name.startswith("ADHOC"):
continue
if normalize_workplan_file(workplan, domain_slug, topic_slug, dry_run=dry_run):
updated_files.append(str(workplan.relative_to(path)))
return updated_files
def main() -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--repo", action="append", dest="repos", help="Repo slug to normalize")
parser.add_argument("--dirty", action="store_true", help="Normalize repos with local git changes")
parser.add_argument("--dry-run", action="store_true", help="Report changes without writing")
args = parser.parse_args()
only_slugs: set[str] | None
if args.repos:
only_slugs = set(args.repos)
elif args.dirty:
only_slugs = set(dirty_repo_slugs())
else:
parser.error("Specify --repo SLUG and/or --dirty")
repos = fetch("/repos/")
topics = fetch("/topics/?status=active")
topics_by_id = {topic["id"]: topic for topic in topics}
selected = choose_repos(repos, only_slugs)
total_files = 0
for repo in selected:
updated = normalize_repo(repo, topics_by_id, dry_run=args.dry_run)
if updated:
total_files += len(updated)
mode = "would update" if args.dry_run else "updated"
print(f"{repo['slug']}: {mode} {len(updated)} workplan(s)")
for name in updated:
print(f" - {name}")
print(f"Done. {total_files} workplan file(s) {'would change' if args.dry_run else 'changed'}.")
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -1,14 +1,20 @@
from __future__ import annotations
import argparse
import json
import re
import subprocess
import sys
import urllib.request
from collections import Counter
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
TEMPLATE_DIR = ROOT / "scripts" / "project_rules"
API_BASE = "http://127.0.0.1:8000"
HOME_ROOT = Path("/home/worsch")
WP_FILE_RE = re.compile(r"^([A-Z][A-Z0-9-]*-WP)-\d+")
def fetch(path: str):
@@ -51,11 +57,34 @@ def repo_topic_id(repo: dict, topics: list[dict]) -> str:
return match["id"] if match else "(none)"
def wp_prefix(repo_slug: str) -> str:
def default_wp_prefix(repo_slug: str) -> str:
first = repo_slug.split("-", 1)[0].upper()
return f"{first}-WP"
def infer_wp_prefix(repo_path: Path, repo_slug: str) -> str:
"""Prefer established on-disk workplan prefixes over first-token derivation."""
counts: Counter[str] = Counter()
workplans_dir = repo_path / "workplans"
if workplans_dir.is_dir():
for workplan in workplans_dir.glob("*.md"):
if workplan.name.startswith("ADHOC"):
continue
match = WP_FILE_RE.match(workplan.name)
if match:
counts[match.group(1)] += 1
if not counts:
return default_wp_prefix(repo_slug)
top_prefix, top_count = counts.most_common(1)[0]
if len(counts) > 1:
print(
f"warning: {repo_slug} has multiple workplan prefixes {dict(counts)}; "
f"using {top_prefix} ({top_count} files)",
file=sys.stderr,
)
return top_prefix
def brief_domain(path: Path) -> str | None:
brief = path / ".custodian-brief.md"
if not brief.exists():
@@ -64,7 +93,23 @@ def brief_domain(path: Path) -> str | None:
return match.group(1) if match else None
def choose_repos(repos: list[dict]) -> list[dict]:
def dirty_repo_slugs(home: Path = HOME_ROOT) -> list[str]:
slugs: list[str] = []
for path in sorted(home.iterdir()):
if not (path / ".git").is_dir():
continue
result = subprocess.run(
["git", "-C", str(path), "status", "--porcelain"],
capture_output=True,
text=True,
check=False,
)
if result.stdout.strip():
slugs.append(path.name)
return slugs
def choose_repos(repos: list[dict], only_slugs: set[str] | None = None) -> list[dict]:
by_path: dict[str, list[dict]] = {}
for repo in repos:
local_path = repo.get("local_path") or ""
@@ -83,19 +128,84 @@ def choose_repos(repos: list[dict]) -> list[dict]:
candidates = domain_matches
active = [r for r in candidates if r.get("status") == "active"]
chosen.append(active[0] if active else candidates[0])
if only_slugs is not None:
chosen = [repo for repo in chosen if repo.get("slug") in only_slugs]
return chosen
def main() -> None:
def update_repo(
repo: dict,
topics: list[dict],
*,
agents_template: str,
claude_template: str,
scope_template: str,
credential_routing_template: str,
rule_templates: dict[str, str],
) -> str:
path = Path(repo["local_path"])
repo_slug = repo["slug"]
project_name = repo.get("name") or path.name
description = repo.get("description") or f"{project_name} - (fill in purpose)"
prefix = infer_wp_prefix(path, repo_slug)
values = {
"PROJECT_NAME": project_name,
"PROJECT_DESCRIPTION": description,
"DOMAIN": repo.get("domain_slug") or "",
"TOPIC_ID": repo_topic_id(repo, topics),
"REPO_SLUG": repo_slug,
"WP_PREFIX": prefix,
"CREDENTIAL_ROUTING": render(
credential_routing_template,
{
"PROJECT_NAME": project_name,
"PROJECT_DESCRIPTION": description,
"DOMAIN": repo.get("domain_slug") or "",
"TOPIC_ID": repo_topic_id(repo, topics),
"REPO_SLUG": repo_slug,
"WP_PREFIX": prefix,
},
),
}
agents_path = path / "AGENTS.md"
extensions = read_agents_extensions(agents_path)
agents_path.write_text(build_agents_md(agents_template, values, extensions), encoding="utf-8")
(path / "CLAUDE.md").write_text(render(claude_template, values), encoding="utf-8")
scope_path = path / "SCOPE.md"
if not scope_path.exists():
scope_path.write_text(render(scope_template, values), encoding="utf-8")
rules_dir = path / ".claude" / "rules"
rules_dir.mkdir(parents=True, exist_ok=True)
for name, template in rule_templates.items():
(rules_dir / f"{name}.md").write_text(render(template, values), encoding="utf-8")
return f"{repo_slug}\t{path}\t{prefix}"
def main() -> int:
parser = argparse.ArgumentParser(description="Regenerate agent instruction files from templates.")
parser.add_argument("--repo", action="append", dest="repos", help="Limit to repo slug(s)")
parser.add_argument("--dirty", action="store_true", help="Limit to repos with local git changes")
args = parser.parse_args()
only_slugs: set[str] | None = None
if args.repos:
only_slugs = set(args.repos)
elif args.dirty:
only_slugs = set(dirty_repo_slugs())
repos = fetch("/repos/")
topics = fetch("/topics/?status=active")
agents_template = (TEMPLATE_DIR / "agents-codex.template").read_text(encoding="utf-8")
claude_template = (TEMPLATE_DIR / "claude-md.template").read_text(encoding="utf-8")
scope_template = (TEMPLATE_DIR / "scope.template").read_text(encoding="utf-8")
credential_routing_template = (
TEMPLATE_DIR / "credential-routing.template"
).read_text(encoding="utf-8")
credential_routing_template = (TEMPLATE_DIR / "credential-routing.template").read_text(
encoding="utf-8"
)
rule_names = [
"repo-identity",
"session-protocol",
@@ -117,54 +227,27 @@ def main() -> None:
)
)
else:
rule_templates[name] = (
TEMPLATE_DIR / f"{name}.template"
).read_text(encoding="utf-8")
rule_templates[name] = (TEMPLATE_DIR / f"{name}.template").read_text(encoding="utf-8")
updated: list[str] = []
for repo in choose_repos(repos):
path = Path(repo["local_path"])
repo_slug = repo["slug"]
project_name = repo.get("name") or path.name
description = repo.get("description") or f"{project_name} - (fill in purpose)"
values = {
"PROJECT_NAME": project_name,
"PROJECT_DESCRIPTION": description,
"DOMAIN": repo.get("domain_slug") or "",
"TOPIC_ID": repo_topic_id(repo, topics),
"REPO_SLUG": repo_slug,
"WP_PREFIX": wp_prefix(repo_slug),
"CREDENTIAL_ROUTING": render(credential_routing_template, {
"PROJECT_NAME": project_name,
"PROJECT_DESCRIPTION": description,
"DOMAIN": repo.get("domain_slug") or "",
"TOPIC_ID": repo_topic_id(repo, topics),
"REPO_SLUG": repo_slug,
"WP_PREFIX": wp_prefix(repo_slug),
}),
}
agents_path = path / "AGENTS.md"
extensions = read_agents_extensions(agents_path)
agents_path.write_text(
build_agents_md(agents_template, values, extensions), encoding="utf-8"
for repo in choose_repos(repos, only_slugs):
updated.append(
update_repo(
repo,
topics,
agents_template=agents_template,
claude_template=claude_template,
scope_template=scope_template,
credential_routing_template=credential_routing_template,
rule_templates=rule_templates,
)
)
(path / "CLAUDE.md").write_text(render(claude_template, values), encoding="utf-8")
scope_path = path / "SCOPE.md"
if not scope_path.exists():
scope_path.write_text(render(scope_template, values), encoding="utf-8")
rules_dir = path / ".claude" / "rules"
rules_dir.mkdir(parents=True, exist_ok=True)
for name, template in rule_templates.items():
(rules_dir / f"{name}.md").write_text(render(template, values), encoding="utf-8")
updated.append(f"{repo_slug}\t{path}")
print(f"Updated {len(updated)} local repo(s):")
for line in updated:
print(line)
return 0
if __name__ == "__main__":
main()
raise SystemExit(main())

View File

@@ -0,0 +1,141 @@
---
id: STATE-WP-0067
type: workplan
title: "Attached Repo Agent Instruction And Workplan Frontmatter Normalization"
domain: custodian
repo: state-hub
status: active
owner: codex
topic_slug: custodian
created: "2026-06-22"
updated: "2026-06-22"
---
# STATE-WP-0067 — Attached Repo Agent Instruction And Workplan Frontmatter Normalization
## Goal
Close drift introduced by the State Hub agent-instruction template sync across
attached repos. Agent files were regenerated with a first-token workplan prefix
(`artifact-store``ARTIFACT-WP`) and `domain: infotech`, while existing
workplan files retain repo-specific prefixes (`ARTIFACT-STORE-WP`, `IRP-WP`, …)
and legacy frontmatter (`domain: stack` where `stack` is the topic slug).
Per ADR-001, **workplan files are the source of truth**. Agent instructions must
match on-disk workplan prefixes and frontmatter conventions; workplans are
renamed only when a repo has no established prefix yet.
## Context
- `scripts/update_agent_instruction_files.py` derives `{WP_PREFIX}` from the
first hyphen segment of the repo slug. That is wrong for most registered repos
(35+ use intentional abbreviations).
- Template sync left ~49 repos with local changes (discover via
`cd ~ && gitea ll`, or scan `git status --porcelain` under `~/`).
- Task status canon (`STATE-WP-0052`) is already reflected in regenerated
agent files; workplan task blocks may still use legacy literals.
- `domain` in workplan frontmatter should be the hub **domain slug**
(`infotech`), not the topic slug (`stack`). Topic linkage belongs in
`topic_slug`.
## Policy
| Layer | Rule |
|-------|------|
| Workplan prefix | Infer from existing `workplans/*-WP-NNNN-*.md` filenames; fall back to first-token only when no workplans exist |
| `domain` frontmatter | Set to repo `domain_slug` from State Hub registration |
| `topic_slug` frontmatter | Set from registered `topic_id` when present |
| Task status in workplan blocks | `in_progress→progress`, `blocked→wait`, `cancelled/canceled→cancel` |
| Agent files | Regenerated from templates using inferred prefix — never overwrite `<!-- REPO-AGENTS-EXTENSIONS -->` tail |
| Grandfathered prefixes | Short prefixes (`IRP-WP`, `CYA-WP`, …) are canonical for their repo — not migrated to first-token |
## T01 — Inventory repos with local changes
```task
id: STATE-WP-0067-T01
status: progress
priority: high
```
Enumerate repos with uncommitted changes under `/home/worsch/*/`.
Done when the dirty-repo list is recorded in the T04 run log.
## T02 — Infer workplan prefix from on-disk files
```task
id: STATE-WP-0067-T02
status: progress
priority: high
```
Update `scripts/update_agent_instruction_files.py` to infer `{WP_PREFIX}` from
existing workplan filenames before falling back to first-token derivation.
Done when `artifact-store` agent files reference `ARTIFACT-STORE-WP`, not
`ARTIFACT-WP`.
## T03 — Workplan frontmatter normalization script
```task
id: STATE-WP-0067-T03
status: progress
priority: high
```
Add `scripts/normalize_attached_repo_workplans.py` to:
- set `domain:` to registered `domain_slug`;
- set `topic_slug:` from registered topic when missing or wrong;
- migrate legacy task status literals inside ` ```task ` blocks.
Support `--repo SLUG` and `--dirty` (scan `~/` for porcelain).
## T04 — Apply normalization to dirty repos
```task
id: STATE-WP-0067-T04
status: todo
priority: high
```
For each dirty repo:
1. `normalize_attached_repo_workplans.py --repo <slug>`
2. `update_agent_instruction_files.py --repo <slug>` (after T02 filter added)
3. `make fix-consistency REPO=<slug>` from `~/state-hub`
Done when all dirty repos have clean or warnings-only consistency checks.
## T05 — Commit and push
```task
id: STATE-WP-0067-T05
status: todo
priority: high
```
Commit agent-instruction and workplan changes per repo with a shared message.
Push to `origin` where a remote exists.
Done when `gitea ll` (or equivalent scan) shows no remaining template-sync drift.
## T06 — Close workplan
```task
id: STATE-WP-0067-T06
status: todo
priority: medium
```
Mark tasks done, set workplan `status: finished`, run
`make fix-consistency REPO=state-hub`.
## Acceptance Criteria
- Agent instructions and workplan files agree on prefix and domain/topic fields
for every dirty repo.
- `artifact-store` keeps `ARTIFACT-STORE-WP-*` filenames and IDs.
- No `domain: stack` remains where `domain_slug` is `infotech` and `stack` is the
topic slug.
- Dirty repos are committed; hub read model refreshed via fix-consistency.