generated from coulomb/repo-seed
finish(STATE-WP-0064): cut over scheduler and split sweep errors from failures
STATE-WP-0064 cutover (state-hub only): - Retire local custodian-sync.timer; archive units under infra/systemd/archived/ - Mark workplan finished; update infra/README, cron-migration, runbook, AGENTS.md - Point activity-core-delegation at the consistency-sweep runbook Consistency engine — automation error vs assessment failure: - C-00 is an automation error; C-01..C-23 assessment failures are recorded for follow-up but no longer fail --remote --all scheduled sweeps (exit 0) - Skip workplans/README.md in the workplan glob (human index, not a workplan) - Progress events and compare script expose automation_error and assessment_failures separately from exit_code
This commit is contained in:
@@ -8,6 +8,7 @@ from pydantic import BaseModel, Field
|
||||
|
||||
class ConsistencySweepIssueSummary(BaseModel):
|
||||
fail: int = 0
|
||||
automation_error: int = 0
|
||||
warn: int = 0
|
||||
info: int = 0
|
||||
|
||||
@@ -39,6 +40,7 @@ class ConsistencySweepRemoteAllRun(BaseModel):
|
||||
max_seconds: int
|
||||
source: str
|
||||
exit_code: int
|
||||
automation_error: bool = False
|
||||
lock_skipped: bool
|
||||
repos_processed: list[ConsistencySweepRepoResult] = Field(default_factory=list)
|
||||
skipped_clean: list[str] = Field(default_factory=list)
|
||||
|
||||
@@ -83,6 +83,7 @@ def _parse_stdout(stdout: str) -> list[ConsistencySweepRepoResult]:
|
||||
result=str(item.get("result") or "pass"),
|
||||
summary=ConsistencySweepIssueSummary(
|
||||
fail=int(summary.get("fail", 0)),
|
||||
automation_error=int(summary.get("automation_error", 0)),
|
||||
warn=int(summary.get("warn", 0)),
|
||||
info=int(summary.get("info", 0)),
|
||||
),
|
||||
@@ -121,6 +122,7 @@ async def run_remote_all_sweep(
|
||||
stderr_meta = _parse_stderr(result.stderr)
|
||||
repos_processed = [] if lock_skipped else _parse_stdout(result.stdout)
|
||||
|
||||
automation_error = result.returncode != 0 and not lock_skipped
|
||||
progress_event_id = await _log_sweep_progress(
|
||||
session,
|
||||
started_at=started_at,
|
||||
@@ -128,6 +130,7 @@ async def run_remote_all_sweep(
|
||||
max_seconds=max_seconds,
|
||||
source=source,
|
||||
exit_code=result.returncode,
|
||||
automation_error=automation_error,
|
||||
lock_skipped=lock_skipped,
|
||||
repos_processed=repos_processed,
|
||||
**stderr_meta,
|
||||
@@ -138,6 +141,7 @@ async def run_remote_all_sweep(
|
||||
max_seconds=max_seconds,
|
||||
source=source,
|
||||
exit_code=result.returncode,
|
||||
automation_error=automation_error,
|
||||
lock_skipped=lock_skipped,
|
||||
repos_processed=repos_processed,
|
||||
skipped_clean=stderr_meta["skipped_clean"],
|
||||
@@ -155,6 +159,7 @@ async def _log_sweep_progress(
|
||||
max_seconds: int,
|
||||
source: str,
|
||||
exit_code: int,
|
||||
automation_error: bool,
|
||||
lock_skipped: bool,
|
||||
repos_processed: list[ConsistencySweepRepoResult],
|
||||
skipped_clean: list[str],
|
||||
@@ -162,16 +167,23 @@ async def _log_sweep_progress(
|
||||
skipped_budget: list[str],
|
||||
) -> uuid.UUID:
|
||||
processed_count = len(repos_processed)
|
||||
fail_count = sum(1 for repo in repos_processed if repo.result == "fail")
|
||||
error_count = sum(1 for repo in repos_processed if repo.result == "error")
|
||||
assessment_fail_count = sum(1 for repo in repos_processed if repo.result == "fail")
|
||||
warn_count = sum(1 for repo in repos_processed if repo.result == "warn")
|
||||
if lock_skipped:
|
||||
summary = "State Hub consistency sweep skipped: prior remote-all run still active"
|
||||
elif automation_error:
|
||||
summary = (
|
||||
"State Hub consistency sweep automation error: "
|
||||
f"exit_code={exit_code}, {processed_count} repos partially processed"
|
||||
)
|
||||
else:
|
||||
summary = (
|
||||
"State Hub consistency sweep completed: "
|
||||
f"{processed_count} processed, {len(skipped_clean)} clean, "
|
||||
f"{len(skipped_missing)} missing, {len(skipped_budget)} budget-skipped, "
|
||||
f"{fail_count} failed, {warn_count} warned"
|
||||
f"{assessment_fail_count} assessment-fail, {error_count} automation-error, "
|
||||
f"{warn_count} warned"
|
||||
)
|
||||
event = ProgressEvent(
|
||||
event_type="consistency_sweep_remote_all",
|
||||
@@ -182,6 +194,9 @@ async def _log_sweep_progress(
|
||||
"max_seconds": max_seconds,
|
||||
"source": source,
|
||||
"exit_code": exit_code,
|
||||
"automation_error": automation_error,
|
||||
"assessment_failures": assessment_fail_count,
|
||||
"automation_errors": error_count,
|
||||
"lock_skipped": lock_skipped,
|
||||
"repos_processed": [item.model_dump(mode="json") for item in repos_processed],
|
||||
"skipped_clean": skipped_clean,
|
||||
|
||||
Reference in New Issue
Block a user