feat(STATE-WP-0064): add consistency sweep remote-all API endpoint

Expose POST /consistency/sweep/remote-all so activity-core can trigger
the workstation ADR-001 remote-all sweep via the bridge tunnel pattern.
Records consistency_sweep_remote_all progress events and documents the
cutover runbook while the local custodian-sync timer remains interim.
This commit is contained in:
2026-06-21 20:19:22 +02:00
parent 0fdebc6aa8
commit 5a7a6ef5ee
9 changed files with 599 additions and 50 deletions

View File

@@ -17,6 +17,7 @@ from api.routers import token_events
from api.routers import interface_changes
from api.routers import flows
from api.routers import recently_on_scope
from api.routers import consistency_sweep
from api.routers import reconciliation
from api.routers import execution
from api.routers import fabric
@@ -102,6 +103,7 @@ app.add_middleware(
app.include_router(domains.router)
app.include_router(recently_on_scope.hourly_router)
app.include_router(recently_on_scope.router)
app.include_router(consistency_sweep.router)
app.include_router(repos.router)
app.include_router(topics.router)
app.include_router(workstreams.router)

View File

@@ -0,0 +1,33 @@
from __future__ import annotations
import json
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy.ext.asyncio import AsyncSession
from api.database import get_session
from api.schemas.consistency_sweep import (
ConsistencySweepRemoteAllGenerate,
ConsistencySweepRemoteAllRun,
)
from api.services.consistency_sweep import run_remote_all_sweep
router = APIRouter(prefix="/consistency/sweep", tags=["consistency"])
@router.post(
"/remote-all",
response_model=ConsistencySweepRemoteAllRun,
status_code=status.HTTP_201_CREATED,
)
async def sweep_remote_all(
body: ConsistencySweepRemoteAllGenerate,
session: AsyncSession = Depends(get_session),
) -> ConsistencySweepRemoteAllRun:
try:
return await run_remote_all_sweep(session, max_seconds=body.max_seconds)
except json.JSONDecodeError as exc:
raise HTTPException(
status_code=500,
detail=f"Consistency sweep returned invalid JSON: {exc}",
) from exc

View File

@@ -0,0 +1,42 @@
from __future__ import annotations
import uuid
from datetime import datetime
from pydantic import BaseModel, Field
class ConsistencySweepIssueSummary(BaseModel):
fail: int = 0
warn: int = 0
info: int = 0
class ConsistencySweepRepoResult(BaseModel):
repo_slug: str
repo_path: str
result: str
summary: ConsistencySweepIssueSummary
fixes_applied: list[str] = Field(default_factory=list)
class ConsistencySweepRemoteAllGenerate(BaseModel):
max_seconds: int = Field(
default=300,
ge=0,
le=3600,
description="Wall-clock budget for the remote-all sweep (0 disables)",
)
class ConsistencySweepRemoteAllRun(BaseModel):
started_at: datetime
completed_at: datetime
max_seconds: int
exit_code: int
lock_skipped: bool
repos_processed: list[ConsistencySweepRepoResult] = Field(default_factory=list)
skipped_clean: list[str] = Field(default_factory=list)
skipped_missing: list[str] = Field(default_factory=list)
skipped_budget: list[str] = Field(default_factory=list)
progress_event_id: uuid.UUID | None = None

View File

@@ -0,0 +1,178 @@
from __future__ import annotations
import asyncio
import json
import re
import subprocess
import sys
import uuid
from datetime import UTC, datetime
from pathlib import Path
from sqlalchemy.ext.asyncio import AsyncSession
from api.config import settings
from api.models.progress_event import ProgressEvent
from api.schemas.consistency_sweep import (
ConsistencySweepIssueSummary,
ConsistencySweepRemoteAllRun,
ConsistencySweepRepoResult,
)
_LOCK_SKIP_MARKER = "another fix-consistency-remote --all run is already active"
_CLEAN_RE = re.compile(r"^\s*CLEAN \(skipped\):\s*(.+)$", re.MULTILINE)
_MISSING_RE = re.compile(r"^\s*NOT ON THIS HOST \(skipped\):\s*(.+)$", re.MULTILINE)
_BUDGET_RE = re.compile(
r"^\s*BUDGET EXHAUSTED after \d+s \(skipped\):\s*(.+)$",
re.MULTILINE,
)
def _script_path() -> Path:
return Path(__file__).parent.parent.parent / "scripts" / "consistency_check.py"
def _split_slug_list(value: str) -> list[str]:
return [part.strip() for part in value.split(",") if part.strip()]
def _parse_stderr(stderr: str) -> dict[str, list[str]]:
return {
"skipped_clean": _split_slug_list(_CLEAN_RE.search(stderr).group(1))
if _CLEAN_RE.search(stderr)
else [],
"skipped_missing": _split_slug_list(_MISSING_RE.search(stderr).group(1))
if _MISSING_RE.search(stderr)
else [],
"skipped_budget": _split_slug_list(_BUDGET_RE.search(stderr).group(1))
if _BUDGET_RE.search(stderr)
else [],
}
def _parse_stdout(stdout: str) -> list[ConsistencySweepRepoResult]:
text = stdout.strip()
if not text:
return []
payload = json.loads(text)
items = payload if isinstance(payload, list) else [payload]
results: list[ConsistencySweepRepoResult] = []
for item in items:
summary = item.get("summary") or {}
results.append(
ConsistencySweepRepoResult(
repo_slug=str(item.get("repo_slug") or ""),
repo_path=str(item.get("repo_path") or ""),
result=str(item.get("result") or "pass"),
summary=ConsistencySweepIssueSummary(
fail=int(summary.get("fail", 0)),
warn=int(summary.get("warn", 0)),
info=int(summary.get("info", 0)),
),
fixes_applied=list(item.get("fixes_applied") or []),
)
)
return results
async def run_remote_all_sweep(
session: AsyncSession,
*,
max_seconds: int,
) -> ConsistencySweepRemoteAllRun:
started_at = datetime.now(tz=UTC)
cmd = [
sys.executable,
str(_script_path()),
"--remote",
"--all",
"--json",
"--api-base",
settings.api_base,
"--max-seconds",
str(max_seconds),
]
result = await asyncio.to_thread(
subprocess.run,
cmd,
capture_output=True,
text=True,
)
completed_at = datetime.now(tz=UTC)
lock_skipped = _LOCK_SKIP_MARKER in result.stderr
stderr_meta = _parse_stderr(result.stderr)
repos_processed = [] if lock_skipped else _parse_stdout(result.stdout)
progress_event_id = await _log_sweep_progress(
session,
started_at=started_at,
completed_at=completed_at,
max_seconds=max_seconds,
exit_code=result.returncode,
lock_skipped=lock_skipped,
repos_processed=repos_processed,
**stderr_meta,
)
return ConsistencySweepRemoteAllRun(
started_at=started_at,
completed_at=completed_at,
max_seconds=max_seconds,
exit_code=result.returncode,
lock_skipped=lock_skipped,
repos_processed=repos_processed,
skipped_clean=stderr_meta["skipped_clean"],
skipped_missing=stderr_meta["skipped_missing"],
skipped_budget=stderr_meta["skipped_budget"],
progress_event_id=progress_event_id,
)
async def _log_sweep_progress(
session: AsyncSession,
*,
started_at: datetime,
completed_at: datetime,
max_seconds: int,
exit_code: int,
lock_skipped: bool,
repos_processed: list[ConsistencySweepRepoResult],
skipped_clean: list[str],
skipped_missing: list[str],
skipped_budget: list[str],
) -> uuid.UUID:
processed_count = len(repos_processed)
fail_count = sum(1 for repo in repos_processed if repo.result == "fail")
warn_count = sum(1 for repo in repos_processed if repo.result == "warn")
if lock_skipped:
summary = "State Hub consistency sweep skipped: prior remote-all run still active"
else:
summary = (
"State Hub consistency sweep completed: "
f"{processed_count} processed, {len(skipped_clean)} clean, "
f"{len(skipped_missing)} missing, {len(skipped_budget)} budget-skipped, "
f"{fail_count} failed, {warn_count} warned"
)
event = ProgressEvent(
event_type="consistency_sweep_remote_all",
summary=summary,
detail={
"started_at": _iso(started_at),
"completed_at": _iso(completed_at),
"max_seconds": max_seconds,
"exit_code": exit_code,
"lock_skipped": lock_skipped,
"repos_processed": [item.model_dump(mode="json") for item in repos_processed],
"skipped_clean": skipped_clean,
"skipped_missing": skipped_missing,
"skipped_budget": skipped_budget,
},
author="state-hub",
)
session.add(event)
await session.commit()
await session.refresh(event)
return event.id
def _iso(value: datetime) -> str:
return value.astimezone(UTC).isoformat().replace("+00:00", "Z")