diff --git a/.repo-classification.yaml b/.repo-classification.yaml new file mode 100644 index 0000000..0af4e2f --- /dev/null +++ b/.repo-classification.yaml @@ -0,0 +1,38 @@ +repo_classification: + standard: Repo Classification Standard + version: "1.0" + classified_at: "2026-06-22" + classified_by: agent + + # the-custodian is the governance/continuity substrate: canon, standards, + # ADRs, charters, memory, and cross-domain coordination scaffolding. + category: research + domain: infotech + secondary_domains: + - agents + + capability_tags: + - governance + - knowledge + - coordination + - policy + - documentation + + business_stake: + - technology + - operations + - intelligence + - execution + + business_mechanics: + - intention + - control + - coordination + - adaptation + + notes: > + Primary domain is infotech (the intended users are the ecosystem's + developers and agents); agents is a secondary domain because the repo is + agent-coordination infrastructure. Classified as research because its core + output is canon, standards, and decision records rather than a deployable + product. First-pass agent classification pending human review (CUST-WP-0050 T02). diff --git a/canon/standards/repo-classification-standard_v1.0.md b/canon/standards/repo-classification-standard_v1.0.md index 2558696..2c5f68b 100755 --- a/canon/standards/repo-classification-standard_v1.0.md +++ b/canon/standards/repo-classification-standard_v1.0.md @@ -682,6 +682,10 @@ Add only the mechanics that materially apply. ## 12. Validation Checklist +The controlled vocabularies are maintained in machine-readable form at +`canon/standards/repo-classification.allowed.yaml`. Validate a file with +`tools/validate_repo_classification.py `. + A repo classification is valid when: - [ ] `category` exists and has exactly one allowed value. diff --git a/canon/standards/repo-classification.allowed.yaml b/canon/standards/repo-classification.allowed.yaml new file mode 100644 index 0000000..5341b9e --- /dev/null +++ b/canon/standards/repo-classification.allowed.yaml @@ -0,0 +1,111 @@ +# Machine-readable allowed-values for the Repo Classification Standard. +# +# Single source of truth for the standard's controlled vocabularies, derived +# from canon/standards/repo-classification-standard_v1.0.md. Consumed by: +# - the per-repo .repo-classification.yaml linter (tools/validate_repo_classification.py) +# - the State Hub registration validator (CUST-WP-0050 T04) +# +# When the standard's vocabularies change, update this file and bump `version` +# to match the standard version. CUST-WP-0050 T01. + +standard: "Repo Classification Standard" +version: "1.0" +canon_id: "canon-repo-classification" + +# category — exactly 1 required (§5) +categories: + - experimental + - research + - project + - product + - business + +# domain / secondary_domains — primary exactly 1; secondaries 0..n (§6) +domains: + - infotech + - financials + - communication + - consumer + - health + - industrials + - energy + - utilities + - materials + - realestate + - crypto + - agents + - space + - government + +# business_stake — 0..n; 2..6 recommended (§8) +business_stake: + - execution + - intelligence + - finance + - legal + - sales + - experience + - technology + - operations + - product + - people + - procurement + - sustainability + - automation + +# business_mechanics — 0..n, optional (§9) +business_mechanics: + - intention + - control + - coordination + - operation + - adaptation + +# capability_tags are intentionally OPEN-ENDED (§7): lowercase kebab-case, not +# restricted to this set. The families below are the standard's recommended +# canonical tags — used to warn on likely synonyms/typos, never to reject. +capability_families: + identity_and_access: + - identity + - authentication + - authorization + - access-control + - user-management + - tenancy + knowledge_and_evidence: + - knowledge + - citations + - evidence + - source-management + - traceability + - documentation + platform_and_operations: + - platform + - deployment + - operations + - observability + - feature-control + - configuration + - orchestration + market_and_coordination: + - marketplace + - pricing + - reputation + - challenges + - bounties + - collaboration + - coordination + governance_and_control: + - governance + - policy + - compliance + - risk + - audit + - control + +# Validation guidance (advisory bounds the linter applies as warnings) +guidance: + secondary_domains_max: 3 + business_stake_recommended_min: 2 + business_stake_recommended_max: 6 + capability_tag_pattern: "^[a-z0-9]+(-[a-z0-9]+)*$" diff --git a/tools/validate_repo_classification.py b/tools/validate_repo_classification.py new file mode 100644 index 0000000..cbf6118 --- /dev/null +++ b/tools/validate_repo_classification.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 +"""Validate a .repo-classification.yaml against the Repo Classification Standard. + +Single small linter shared by repo authors and (later) the State Hub registration +validator. It checks a repo's classification file against the controlled +vocabularies in canon/standards/repo-classification.allowed.yaml. + +Usage: + validate_repo_classification.py ... + validate_repo_classification.py --self-test + +Exit code 0 = all files valid (warnings allowed); 1 = at least one invalid. + +CUST-WP-0050 T01. Depends on PyYAML (stdlib + pyyaml only). +""" +from __future__ import annotations + +import re +import sys +from pathlib import Path + +import yaml + +ALLOWED_PATH = ( + Path(__file__).resolve().parent.parent + / "canon" + / "standards" + / "repo-classification.allowed.yaml" +) + + +def load_allowed(path: Path = ALLOWED_PATH) -> dict: + with path.open() as fh: + return yaml.safe_load(fh) + + +def _known_capability_tags(allowed: dict) -> set[str]: + tags: set[str] = set() + for fam in (allowed.get("capability_families") or {}).values(): + tags.update(fam or []) + return tags + + +def validate(doc: dict, allowed: dict) -> tuple[list[str], list[str]]: + """Return (errors, warnings) for a parsed classification document.""" + errors: list[str] = [] + warnings: list[str] = [] + + block = doc.get("repo_classification") if isinstance(doc, dict) else None + if not isinstance(block, dict): + return (["missing top-level `repo_classification:` mapping"], []) + + categories = set(allowed["categories"]) + domains = set(allowed["domains"]) + stakes = set(allowed["business_stake"]) + mechanics = set(allowed["business_mechanics"]) + guidance = allowed.get("guidance", {}) + pattern = re.compile(guidance.get("capability_tag_pattern", r"^[a-z0-9]+(-[a-z0-9]+)*$")) + + # category — required, exactly one allowed value + category = block.get("category") + if category is None: + errors.append("`category` is required") + elif category not in categories: + errors.append(f"`category` '{category}' not in {sorted(categories)}") + + # domain — required, exactly one allowed value + domain = block.get("domain") + if domain is None: + errors.append("`domain` is required") + elif domain not in domains: + errors.append(f"`domain` '{domain}' not in allowed domains") + + # secondary_domains — 0..n allowed domains, excluding primary, no dups + secondary = block.get("secondary_domains") or [] + if not isinstance(secondary, list): + errors.append("`secondary_domains` must be a list") + secondary = [] + for d in secondary: + if d not in domains: + errors.append(f"secondary domain '{d}' not in allowed domains") + if d == domain: + errors.append(f"secondary domain '{d}' repeats the primary domain") + if len(secondary) != len(set(secondary)): + errors.append("`secondary_domains` contains duplicates") + smax = guidance.get("secondary_domains_max", 3) + if len(secondary) > smax: + warnings.append(f"{len(secondary)} secondary_domains exceeds recommended max {smax}") + + # capability_tags — open-ended, kebab-case; warn on unknown/synonym + tags = block.get("capability_tags") or [] + if not isinstance(tags, list): + errors.append("`capability_tags` must be a list") + tags = [] + known = _known_capability_tags(allowed) + for t in tags: + if not isinstance(t, str) or not pattern.match(t): + errors.append(f"capability_tag '{t}' is not lowercase kebab-case") + elif t not in known: + warnings.append(f"capability_tag '{t}' is not a recommended family tag (allowed, check for synonym)") + + # business_stake — 0..n allowed; recommend 2..6 + stake = block.get("business_stake") or [] + if not isinstance(stake, list): + errors.append("`business_stake` must be a list") + stake = [] + for s in stake: + if s not in stakes: + errors.append(f"business_stake '{s}' not in {sorted(stakes)}") + if stake: + lo = guidance.get("business_stake_recommended_min", 2) + hi = guidance.get("business_stake_recommended_max", 6) + if not (lo <= len(stake) <= hi): + warnings.append(f"{len(stake)} business_stake values; {lo}-{hi} recommended") + + # business_mechanics — 0..n allowed + mech = block.get("business_mechanics") or [] + if not isinstance(mech, list): + errors.append("`business_mechanics` must be a list") + mech = [] + for m in mech: + if m not in mechanics: + errors.append(f"business_mechanics '{m}' not in {sorted(mechanics)}") + + return errors, warnings + + +def validate_file(path: Path, allowed: dict) -> bool: + try: + doc = yaml.safe_load(path.read_text()) + except (OSError, yaml.YAMLError) as exc: + print(f"FAIL {path}: cannot read/parse ({exc})") + return False + errors, warnings = validate(doc, allowed) + for w in warnings: + print(f"warn {path}: {w}") + if errors: + for e in errors: + print(f"FAIL {path}: {e}") + return False + print(f"ok {path}") + return True + + +def self_test(allowed: dict) -> bool: + good = { + "repo_classification": { + "category": "research", + "domain": "infotech", + "secondary_domains": ["agents"], + "capability_tags": ["governance", "knowledge", "coordination"], + "business_stake": ["technology", "operations", "intelligence"], + "business_mechanics": ["intention", "control", "coordination"], + } + } + bad = { + "repo_classification": { + "category": "platform", # not a category (it's a tag) + "domain": "knowledge", # not a market domain + "secondary_domains": ["infotech", "infotech"], + "capability_tags": ["Stuff", "access-control"], + "business_stake": ["technology", "wizardry"], + "business_mechanics": ["teleportation"], + } + } + ge, _ = validate(good, allowed) + be, _ = validate(bad, allowed) + ok = (ge == []) and (len(be) >= 5) + print(f"self-test: good_errors={len(ge)} bad_errors={len(be)} -> {'PASS' if ok else 'FAIL'}") + return ok + + +def main(argv: list[str]) -> int: + allowed = load_allowed() + args = argv[1:] + if not args or args == ["--self-test"]: + return 0 if self_test(allowed) else 1 + all_ok = True + for a in args: + if not validate_file(Path(a), allowed): + all_ok = False + return 0 if all_ok else 1 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv)) diff --git a/workplans/CUST-WP-0050-repo-classification-registration-redesign.md b/workplans/CUST-WP-0050-repo-classification-registration-redesign.md index 7e661d0..ea2fbf6 100644 --- a/workplans/CUST-WP-0050-repo-classification-registration-redesign.md +++ b/workplans/CUST-WP-0050-repo-classification-registration-redesign.md @@ -4,13 +4,14 @@ type: workplan title: "Repo Classification & State Hub Registration Redesign" domain: custodian repo: the-custodian -status: proposed +status: active owner: custodian topic_slug: custodian planning_priority: high planning_order: 50 created: "2026-06-22" updated: "2026-06-22" +started: "2026-06-22" state_hub_workstream_id: "9f031f48-8de8-48b6-8e69-d2d83ad70a7a" --- @@ -150,7 +151,7 @@ hub remains a read/index model fed by repo-owned files (ADR-001). ```task id: CUST-WP-0050-T01 -status: todo +status: done priority: high state_hub_task_id: "d978b1f3-4eca-4a17-835b-2c25d13cae22" ``` @@ -164,13 +165,20 @@ families) into a single machine-readable artefact (e.g. Done when a single allowed-values file exists, is referenced by the standard, and a small validator can check a `.repo-classification.yaml` against it. +**Delivered (2026-06-22):** `canon/standards/repo-classification.allowed.yaml` +(categories, domains, business_stake, business_mechanics, capability families, +guidance bounds); referenced from the standard §12; validator +`tools/validate_repo_classification.py` (stdlib + PyYAML) with `--self-test` +(PASS) — checks category/domain enums, secondary-domain rules, kebab-case tags, +and stake/mechanics enums. + ### Phase 2 — Classify the portfolio (repo-owned source of truth) ### T02 - Classify custodian-owned repos ```task id: CUST-WP-0050-T02 -status: todo +status: in_progress priority: high state_hub_task_id: "b7edfbb5-483f-4600-9356-8f885c78ce58" ``` @@ -183,6 +191,12 @@ standard's §16 agent prompt as a first pass. Done when each custodian repo has a committed file that validates against T01 and has been reviewed by a human. +**Progress (2026-06-22):** `the-custodian/.repo-classification.yaml` authored +(category: research · domain: infotech · secondary: agents) and validates clean; +flagged `classified_by: agent` pending human review. Remaining 10 custodian repos +(state-hub, hub-core, inter-hub, activity-core, issue-core, kaizen-agentic, +llm-connect, ops-bridge, ops-warden, email-connect) still to classify. + ### T03 - Classify the full Gitea inventory ```task