diff --git a/scripts/normalize_attached_repo_workplans.py b/scripts/normalize_attached_repo_workplans.py index df38fe9..ca868bb 100644 --- a/scripts/normalize_attached_repo_workplans.py +++ b/scripts/normalize_attached_repo_workplans.py @@ -51,17 +51,36 @@ def choose_repos(repos: list[dict], only_slugs: set[str] | None) -> list[dict]: return sorted(by_slug.values(), key=lambda repo: repo["slug"]) +def repair_frontmatter_delimiter(text: str) -> str: + """Fix a glued closing --- delimiter introduced by an earlier buggy join.""" + if not text.startswith("---\n"): + return text + repaired = re.sub(r'\"---', '"\n---', text, count=1) + repaired = re.sub(r"(\d{4}-\d{2}-\d{2})---", r"\1\n---", repaired, count=1) + repaired = re.sub(r"([0-9a-f-]{36})---", r"\1\n---", repaired, count=1) + if repaired != text and not repaired.split("---", 2)[1].endswith("\n"): + repaired = repaired.replace("\n---\n", "\n---\n", 1) + return repaired + + def split_frontmatter(text: str) -> tuple[str | None, str]: + text = repair_frontmatter_delimiter(text) if not text.startswith("---\n"): return None, text end = text.find("\n---", 4) if end == -1: return None, text - return text[4:end], text[end + 4 :] + body = text[end + 4 :] + if body and not body.startswith("\n"): + body = "\n" + body + return text[4:end], body def join_frontmatter(frontmatter: str, body: str) -> str: - return f"---\n{frontmatter}---{body}" + fm = frontmatter.rstrip("\n") + "\n" + if body and not body.startswith("\n"): + body = "\n" + body + return f"---\n{fm}---{body}" def normalize_frontmatter(frontmatter: str, domain_slug: str, topic_slug: str | None) -> tuple[str, bool]: @@ -142,13 +161,15 @@ def normalize_workplan_file( dry_run: bool, ) -> bool: original = path.read_text(encoding="utf-8") - frontmatter, body = split_frontmatter(original) + repaired = repair_frontmatter_delimiter(original) + frontmatter, body = split_frontmatter(repaired) if frontmatter is None: return False fm, fm_changed = normalize_frontmatter(frontmatter, domain_slug, topic_slug) body, body_changed = normalize_task_blocks(body) - if not (fm_changed or body_changed): + delimiter_changed = repaired != original + if not (fm_changed or body_changed or delimiter_changed): return False updated = join_frontmatter(fm, body) diff --git a/scripts/update_agent_instruction_files.py b/scripts/update_agent_instruction_files.py index 31c5814..e2522f2 100644 --- a/scripts/update_agent_instruction_files.py +++ b/scripts/update_agent_instruction_files.py @@ -14,7 +14,10 @@ ROOT = Path(__file__).resolve().parent.parent TEMPLATE_DIR = ROOT / "scripts" / "project_rules" API_BASE = "http://127.0.0.1:8000" HOME_ROOT = Path("/home/worsch") -WP_FILE_RE = re.compile(r"^([A-Z][A-Z0-9-]*-WP)-\d+") +WP_FILE_RE = re.compile(r"^([A-Za-z][A-Za-z0-9-]*-WP)-\d+", re.IGNORECASE) +WP_BARE_RE = re.compile(r"^(WP)-\d+") +ID_PREFIX_RE = re.compile(r"^id:\s*([A-Z][A-Z0-9-]*-WP)-\d+", re.MULTILINE) +ID_BARE_RE = re.compile(r"^id:\s*(WP)-\d+", re.MULTILINE) def fetch(path: str): @@ -70,9 +73,14 @@ def infer_wp_prefix(repo_path: Path, repo_slug: str) -> str: for workplan in workplans_dir.glob("*.md"): if workplan.name.startswith("ADHOC"): continue - match = WP_FILE_RE.match(workplan.name) - if match: - counts[match.group(1)] += 1 + text = workplan.read_text(encoding="utf-8", errors="replace") + id_match = ID_PREFIX_RE.search(text) or ID_BARE_RE.search(text) + if id_match: + counts[id_match.group(1)] += 1 + continue + file_match = WP_FILE_RE.match(workplan.name) or WP_BARE_RE.match(workplan.name) + if file_match: + counts[file_match.group(1).upper()] += 1 if not counts: return default_wp_prefix(repo_slug) top_prefix, top_count = counts.most_common(1)[0]