generated from coulomb/repo-seed
Add STATE-WP-0067 attached-repo agent and workplan normalization
Infer workplan prefixes from on-disk filenames instead of first-token derivation, add a frontmatter normalization script, and wire Make targets for dirty-repo sweeps.
This commit is contained in:
222
scripts/normalize_attached_repo_workplans.py
Normal file
222
scripts/normalize_attached_repo_workplans.py
Normal file
@@ -0,0 +1,222 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Normalize workplan frontmatter and task status literals in attached repos."""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import urllib.request
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
|
||||
API_BASE = "http://127.0.0.1:8000"
|
||||
HOME_ROOT = Path("/home/worsch")
|
||||
WP_FILE_RE = re.compile(r"^([A-Z][A-Z0-9-]*-WP)-\d+")
|
||||
TASK_BLOCK_RE = re.compile(r"```task\n(.*?)```", re.DOTALL)
|
||||
TASK_STATUS_MAP = {
|
||||
"blocked": "wait",
|
||||
"in_progress": "progress",
|
||||
"cancelled": "cancel",
|
||||
"canceled": "cancel",
|
||||
}
|
||||
|
||||
|
||||
def fetch(path: str):
|
||||
with urllib.request.urlopen(f"{API_BASE}{path}") as response:
|
||||
return json.load(response)
|
||||
|
||||
|
||||
def dirty_repo_slugs(home: Path = HOME_ROOT) -> list[str]:
|
||||
slugs: list[str] = []
|
||||
for path in sorted(home.iterdir()):
|
||||
if not (path / ".git").is_dir():
|
||||
continue
|
||||
result = subprocess.run(
|
||||
["git", "-C", str(path), "status", "--porcelain"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if result.stdout.strip():
|
||||
slugs.append(path.name)
|
||||
return slugs
|
||||
|
||||
|
||||
def choose_repos(repos: list[dict], only_slugs: set[str] | None) -> list[dict]:
|
||||
by_slug = {repo["slug"]: repo for repo in repos if repo.get("slug")}
|
||||
if only_slugs is not None:
|
||||
return [by_slug[slug] for slug in sorted(only_slugs) if slug in by_slug]
|
||||
return sorted(by_slug.values(), key=lambda repo: repo["slug"])
|
||||
|
||||
|
||||
def split_frontmatter(text: str) -> tuple[str | None, str]:
|
||||
if not text.startswith("---\n"):
|
||||
return None, text
|
||||
end = text.find("\n---", 4)
|
||||
if end == -1:
|
||||
return None, text
|
||||
return text[4:end], text[end + 4 :]
|
||||
|
||||
|
||||
def join_frontmatter(frontmatter: str, body: str) -> str:
|
||||
return f"---\n{frontmatter}---{body}"
|
||||
|
||||
|
||||
def normalize_frontmatter(frontmatter: str, domain_slug: str, topic_slug: str | None) -> tuple[str, bool]:
|
||||
changed = False
|
||||
fm = frontmatter
|
||||
|
||||
if domain_slug:
|
||||
new_fm, count = re.subn(
|
||||
r"^domain:\s*.+$",
|
||||
f"domain: {domain_slug}",
|
||||
fm,
|
||||
count=1,
|
||||
flags=re.MULTILINE,
|
||||
)
|
||||
if count:
|
||||
fm = new_fm
|
||||
changed = True
|
||||
elif "domain:" not in fm:
|
||||
fm = fm.rstrip() + f"\ndomain: {domain_slug}\n"
|
||||
changed = True
|
||||
|
||||
if topic_slug:
|
||||
if re.search(r"^topic_slug:\s", fm, re.MULTILINE):
|
||||
new_fm, count = re.subn(
|
||||
r"^topic_slug:\s*.+$",
|
||||
f"topic_slug: {topic_slug}",
|
||||
fm,
|
||||
count=1,
|
||||
flags=re.MULTILINE,
|
||||
)
|
||||
if count:
|
||||
fm = new_fm
|
||||
changed = True
|
||||
else:
|
||||
if re.search(r"^domain:\s", fm, re.MULTILINE):
|
||||
fm = re.sub(
|
||||
r"^(domain:\s*.+)$",
|
||||
rf"\1\ntopic_slug: {topic_slug}",
|
||||
fm,
|
||||
count=1,
|
||||
flags=re.MULTILINE,
|
||||
)
|
||||
else:
|
||||
fm = fm.rstrip() + f"\ntopic_slug: {topic_slug}\n"
|
||||
changed = True
|
||||
|
||||
return fm, changed
|
||||
|
||||
|
||||
def normalize_task_blocks(body: str) -> tuple[str, bool]:
|
||||
changed = False
|
||||
|
||||
def repl(match: re.Match[str]) -> str:
|
||||
nonlocal changed
|
||||
block = match.group(1)
|
||||
updated = block
|
||||
for legacy, canon in TASK_STATUS_MAP.items():
|
||||
new_block, count = re.subn(
|
||||
rf"^status:\s*{re.escape(legacy)}\s*$",
|
||||
f"status: {canon}",
|
||||
updated,
|
||||
count=1,
|
||||
flags=re.MULTILINE,
|
||||
)
|
||||
if count:
|
||||
updated = new_block
|
||||
changed = True
|
||||
return f"```task\n{updated}```"
|
||||
|
||||
return TASK_BLOCK_RE.sub(repl, body), changed
|
||||
|
||||
|
||||
def normalize_workplan_file(
|
||||
path: Path,
|
||||
domain_slug: str,
|
||||
topic_slug: str | None,
|
||||
*,
|
||||
dry_run: bool,
|
||||
) -> bool:
|
||||
original = path.read_text(encoding="utf-8")
|
||||
frontmatter, body = split_frontmatter(original)
|
||||
if frontmatter is None:
|
||||
return False
|
||||
|
||||
fm, fm_changed = normalize_frontmatter(frontmatter, domain_slug, topic_slug)
|
||||
body, body_changed = normalize_task_blocks(body)
|
||||
if not (fm_changed or body_changed):
|
||||
return False
|
||||
|
||||
updated = join_frontmatter(fm, body)
|
||||
if not dry_run:
|
||||
path.write_text(updated, encoding="utf-8")
|
||||
return True
|
||||
|
||||
|
||||
def repo_topic_slug(repo: dict, topics_by_id: dict[str, dict]) -> str | None:
|
||||
topic_id = repo.get("topic_id")
|
||||
if not topic_id:
|
||||
return None
|
||||
topic = topics_by_id.get(topic_id)
|
||||
return topic.get("slug") if topic else None
|
||||
|
||||
|
||||
def normalize_repo(repo: dict, topics_by_id: dict[str, dict], *, dry_run: bool) -> list[str]:
|
||||
path = Path(repo["local_path"])
|
||||
workplans_dir = path / "workplans"
|
||||
if not workplans_dir.is_dir():
|
||||
return []
|
||||
|
||||
domain_slug = repo.get("domain_slug") or ""
|
||||
topic_slug = repo_topic_slug(repo, topics_by_id)
|
||||
updated_files: list[str] = []
|
||||
|
||||
for workplan in sorted(workplans_dir.glob("*.md")):
|
||||
if workplan.name.startswith("ADHOC"):
|
||||
continue
|
||||
if normalize_workplan_file(workplan, domain_slug, topic_slug, dry_run=dry_run):
|
||||
updated_files.append(str(workplan.relative_to(path)))
|
||||
|
||||
return updated_files
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--repo", action="append", dest="repos", help="Repo slug to normalize")
|
||||
parser.add_argument("--dirty", action="store_true", help="Normalize repos with local git changes")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Report changes without writing")
|
||||
args = parser.parse_args()
|
||||
|
||||
only_slugs: set[str] | None
|
||||
if args.repos:
|
||||
only_slugs = set(args.repos)
|
||||
elif args.dirty:
|
||||
only_slugs = set(dirty_repo_slugs())
|
||||
else:
|
||||
parser.error("Specify --repo SLUG and/or --dirty")
|
||||
|
||||
repos = fetch("/repos/")
|
||||
topics = fetch("/topics/?status=active")
|
||||
topics_by_id = {topic["id"]: topic for topic in topics}
|
||||
selected = choose_repos(repos, only_slugs)
|
||||
|
||||
total_files = 0
|
||||
for repo in selected:
|
||||
updated = normalize_repo(repo, topics_by_id, dry_run=args.dry_run)
|
||||
if updated:
|
||||
total_files += len(updated)
|
||||
mode = "would update" if args.dry_run else "updated"
|
||||
print(f"{repo['slug']}: {mode} {len(updated)} workplan(s)")
|
||||
for name in updated:
|
||||
print(f" - {name}")
|
||||
|
||||
print(f"Done. {total_files} workplan file(s) {'would change' if args.dry_run else 'changed'}.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
@@ -1,14 +1,20 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import urllib.request
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
TEMPLATE_DIR = ROOT / "scripts" / "project_rules"
|
||||
API_BASE = "http://127.0.0.1:8000"
|
||||
HOME_ROOT = Path("/home/worsch")
|
||||
WP_FILE_RE = re.compile(r"^([A-Z][A-Z0-9-]*-WP)-\d+")
|
||||
|
||||
|
||||
def fetch(path: str):
|
||||
@@ -51,11 +57,34 @@ def repo_topic_id(repo: dict, topics: list[dict]) -> str:
|
||||
return match["id"] if match else "(none)"
|
||||
|
||||
|
||||
def wp_prefix(repo_slug: str) -> str:
|
||||
def default_wp_prefix(repo_slug: str) -> str:
|
||||
first = repo_slug.split("-", 1)[0].upper()
|
||||
return f"{first}-WP"
|
||||
|
||||
|
||||
def infer_wp_prefix(repo_path: Path, repo_slug: str) -> str:
|
||||
"""Prefer established on-disk workplan prefixes over first-token derivation."""
|
||||
counts: Counter[str] = Counter()
|
||||
workplans_dir = repo_path / "workplans"
|
||||
if workplans_dir.is_dir():
|
||||
for workplan in workplans_dir.glob("*.md"):
|
||||
if workplan.name.startswith("ADHOC"):
|
||||
continue
|
||||
match = WP_FILE_RE.match(workplan.name)
|
||||
if match:
|
||||
counts[match.group(1)] += 1
|
||||
if not counts:
|
||||
return default_wp_prefix(repo_slug)
|
||||
top_prefix, top_count = counts.most_common(1)[0]
|
||||
if len(counts) > 1:
|
||||
print(
|
||||
f"warning: {repo_slug} has multiple workplan prefixes {dict(counts)}; "
|
||||
f"using {top_prefix} ({top_count} files)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return top_prefix
|
||||
|
||||
|
||||
def brief_domain(path: Path) -> str | None:
|
||||
brief = path / ".custodian-brief.md"
|
||||
if not brief.exists():
|
||||
@@ -64,7 +93,23 @@ def brief_domain(path: Path) -> str | None:
|
||||
return match.group(1) if match else None
|
||||
|
||||
|
||||
def choose_repos(repos: list[dict]) -> list[dict]:
|
||||
def dirty_repo_slugs(home: Path = HOME_ROOT) -> list[str]:
|
||||
slugs: list[str] = []
|
||||
for path in sorted(home.iterdir()):
|
||||
if not (path / ".git").is_dir():
|
||||
continue
|
||||
result = subprocess.run(
|
||||
["git", "-C", str(path), "status", "--porcelain"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
if result.stdout.strip():
|
||||
slugs.append(path.name)
|
||||
return slugs
|
||||
|
||||
|
||||
def choose_repos(repos: list[dict], only_slugs: set[str] | None = None) -> list[dict]:
|
||||
by_path: dict[str, list[dict]] = {}
|
||||
for repo in repos:
|
||||
local_path = repo.get("local_path") or ""
|
||||
@@ -83,19 +128,84 @@ def choose_repos(repos: list[dict]) -> list[dict]:
|
||||
candidates = domain_matches
|
||||
active = [r for r in candidates if r.get("status") == "active"]
|
||||
chosen.append(active[0] if active else candidates[0])
|
||||
|
||||
if only_slugs is not None:
|
||||
chosen = [repo for repo in chosen if repo.get("slug") in only_slugs]
|
||||
return chosen
|
||||
|
||||
|
||||
def main() -> None:
|
||||
def update_repo(
|
||||
repo: dict,
|
||||
topics: list[dict],
|
||||
*,
|
||||
agents_template: str,
|
||||
claude_template: str,
|
||||
scope_template: str,
|
||||
credential_routing_template: str,
|
||||
rule_templates: dict[str, str],
|
||||
) -> str:
|
||||
path = Path(repo["local_path"])
|
||||
repo_slug = repo["slug"]
|
||||
project_name = repo.get("name") or path.name
|
||||
description = repo.get("description") or f"{project_name} - (fill in purpose)"
|
||||
prefix = infer_wp_prefix(path, repo_slug)
|
||||
values = {
|
||||
"PROJECT_NAME": project_name,
|
||||
"PROJECT_DESCRIPTION": description,
|
||||
"DOMAIN": repo.get("domain_slug") or "",
|
||||
"TOPIC_ID": repo_topic_id(repo, topics),
|
||||
"REPO_SLUG": repo_slug,
|
||||
"WP_PREFIX": prefix,
|
||||
"CREDENTIAL_ROUTING": render(
|
||||
credential_routing_template,
|
||||
{
|
||||
"PROJECT_NAME": project_name,
|
||||
"PROJECT_DESCRIPTION": description,
|
||||
"DOMAIN": repo.get("domain_slug") or "",
|
||||
"TOPIC_ID": repo_topic_id(repo, topics),
|
||||
"REPO_SLUG": repo_slug,
|
||||
"WP_PREFIX": prefix,
|
||||
},
|
||||
),
|
||||
}
|
||||
|
||||
agents_path = path / "AGENTS.md"
|
||||
extensions = read_agents_extensions(agents_path)
|
||||
agents_path.write_text(build_agents_md(agents_template, values, extensions), encoding="utf-8")
|
||||
(path / "CLAUDE.md").write_text(render(claude_template, values), encoding="utf-8")
|
||||
scope_path = path / "SCOPE.md"
|
||||
if not scope_path.exists():
|
||||
scope_path.write_text(render(scope_template, values), encoding="utf-8")
|
||||
|
||||
rules_dir = path / ".claude" / "rules"
|
||||
rules_dir.mkdir(parents=True, exist_ok=True)
|
||||
for name, template in rule_templates.items():
|
||||
(rules_dir / f"{name}.md").write_text(render(template, values), encoding="utf-8")
|
||||
|
||||
return f"{repo_slug}\t{path}\t{prefix}"
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Regenerate agent instruction files from templates.")
|
||||
parser.add_argument("--repo", action="append", dest="repos", help="Limit to repo slug(s)")
|
||||
parser.add_argument("--dirty", action="store_true", help="Limit to repos with local git changes")
|
||||
args = parser.parse_args()
|
||||
|
||||
only_slugs: set[str] | None = None
|
||||
if args.repos:
|
||||
only_slugs = set(args.repos)
|
||||
elif args.dirty:
|
||||
only_slugs = set(dirty_repo_slugs())
|
||||
|
||||
repos = fetch("/repos/")
|
||||
topics = fetch("/topics/?status=active")
|
||||
|
||||
agents_template = (TEMPLATE_DIR / "agents-codex.template").read_text(encoding="utf-8")
|
||||
claude_template = (TEMPLATE_DIR / "claude-md.template").read_text(encoding="utf-8")
|
||||
scope_template = (TEMPLATE_DIR / "scope.template").read_text(encoding="utf-8")
|
||||
credential_routing_template = (
|
||||
TEMPLATE_DIR / "credential-routing.template"
|
||||
).read_text(encoding="utf-8")
|
||||
credential_routing_template = (TEMPLATE_DIR / "credential-routing.template").read_text(
|
||||
encoding="utf-8"
|
||||
)
|
||||
rule_names = [
|
||||
"repo-identity",
|
||||
"session-protocol",
|
||||
@@ -117,54 +227,27 @@ def main() -> None:
|
||||
)
|
||||
)
|
||||
else:
|
||||
rule_templates[name] = (
|
||||
TEMPLATE_DIR / f"{name}.template"
|
||||
).read_text(encoding="utf-8")
|
||||
rule_templates[name] = (TEMPLATE_DIR / f"{name}.template").read_text(encoding="utf-8")
|
||||
|
||||
updated: list[str] = []
|
||||
for repo in choose_repos(repos):
|
||||
path = Path(repo["local_path"])
|
||||
repo_slug = repo["slug"]
|
||||
project_name = repo.get("name") or path.name
|
||||
description = repo.get("description") or f"{project_name} - (fill in purpose)"
|
||||
values = {
|
||||
"PROJECT_NAME": project_name,
|
||||
"PROJECT_DESCRIPTION": description,
|
||||
"DOMAIN": repo.get("domain_slug") or "",
|
||||
"TOPIC_ID": repo_topic_id(repo, topics),
|
||||
"REPO_SLUG": repo_slug,
|
||||
"WP_PREFIX": wp_prefix(repo_slug),
|
||||
"CREDENTIAL_ROUTING": render(credential_routing_template, {
|
||||
"PROJECT_NAME": project_name,
|
||||
"PROJECT_DESCRIPTION": description,
|
||||
"DOMAIN": repo.get("domain_slug") or "",
|
||||
"TOPIC_ID": repo_topic_id(repo, topics),
|
||||
"REPO_SLUG": repo_slug,
|
||||
"WP_PREFIX": wp_prefix(repo_slug),
|
||||
}),
|
||||
}
|
||||
|
||||
agents_path = path / "AGENTS.md"
|
||||
extensions = read_agents_extensions(agents_path)
|
||||
agents_path.write_text(
|
||||
build_agents_md(agents_template, values, extensions), encoding="utf-8"
|
||||
for repo in choose_repos(repos, only_slugs):
|
||||
updated.append(
|
||||
update_repo(
|
||||
repo,
|
||||
topics,
|
||||
agents_template=agents_template,
|
||||
claude_template=claude_template,
|
||||
scope_template=scope_template,
|
||||
credential_routing_template=credential_routing_template,
|
||||
rule_templates=rule_templates,
|
||||
)
|
||||
)
|
||||
(path / "CLAUDE.md").write_text(render(claude_template, values), encoding="utf-8")
|
||||
scope_path = path / "SCOPE.md"
|
||||
if not scope_path.exists():
|
||||
scope_path.write_text(render(scope_template, values), encoding="utf-8")
|
||||
|
||||
rules_dir = path / ".claude" / "rules"
|
||||
rules_dir.mkdir(parents=True, exist_ok=True)
|
||||
for name, template in rule_templates.items():
|
||||
(rules_dir / f"{name}.md").write_text(render(template, values), encoding="utf-8")
|
||||
|
||||
updated.append(f"{repo_slug}\t{path}")
|
||||
|
||||
print(f"Updated {len(updated)} local repo(s):")
|
||||
for line in updated:
|
||||
print(line)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user