Harden reconciliation conflict handling

This commit is contained in:
2026-05-23 18:18:44 +02:00
parent 7831673820
commit 706b360736
8 changed files with 445 additions and 22 deletions

View File

@@ -14,6 +14,7 @@ from api.schemas.reconciliation import StateChangeRequest, StateChangeResponse
from api.services.lifecycle import status_value
from api.services.reconciliation import (
ReconciliationClass,
StateChangeClassification,
classify_task_status_change,
classify_workstream_status_change,
)
@@ -21,7 +22,10 @@ from api.services.workplan_files import (
find_workplan_for_workstream,
patch_task_status,
patch_workplan_status,
resolve_repo_path,
task_block_status,
task_block_linked,
workplan_status,
)
from api.workplan_status import normalize_workstream_status
@@ -32,6 +36,14 @@ def _bool_or_default(value: bool | None, default: bool) -> bool:
return default if value is None else value
def _conflict(reason: str, follow_up: str) -> StateChangeClassification:
return StateChangeClassification(
ReconciliationClass.DEFERRED,
reason,
follow_up,
)
async def _workstream_tasks_terminal(session: AsyncSession, workstream_id: uuid.UUID) -> bool:
result = await session.execute(select(Task.status).where(Task.workstream_id == workstream_id))
statuses = [status_value(row[0]) for row in result.all()]
@@ -47,6 +59,7 @@ def _deferred_message(
reason: str,
follow_up: str,
workplan_path: str | None,
conflict: bool = False,
) -> AgentMessage:
subject = f"Reconcile {body.target_type} state change: {current_status} -> {target_status}"
lines = [
@@ -56,9 +69,11 @@ def _deferred_message(
f"target_id: {body.target_id}",
f"actor: {body.actor}",
f"intent: {body.intent or ''}",
f"expected_current_status: {body.expected_current_status or ''}",
f"current_status: {current_status}",
f"target_status: {target_status}",
f"reconciliation_class: {classification.value}",
f"conflict: {str(conflict).lower()}",
f"reason: {reason}",
f"follow_up: {follow_up}",
f"workplan_path: {workplan_path or ''}",
@@ -82,7 +97,8 @@ async def classify_state_change(
raise HTTPException(status_code=404, detail="Workstream not found")
repo = await session.get(ManagedRepo, ws.repo_id) if ws.repo_id else None
workplan_ref = find_workplan_for_workstream(repo, ws.id)
repo_path = resolve_repo_path(repo)
workplan_ref = find_workplan_for_workstream(repo, ws.id) if repo_path else None
actual_file_backed = workplan_ref is not None
actual_archived_file = bool(workplan_ref and workplan_ref.archived)
file_backed = (
@@ -111,13 +127,56 @@ async def classify_state_change(
)
write_result = "not_attempted"
reconciliation_record_id = None
conflict = False
if body.apply:
if classification.reconciliation_class == ReconciliationClass.WRITE_THROUGH and workplan_ref:
patch_workplan_status(workplan_ref.path, target_status)
ws.status = target_status
await session.commit()
write_result = "applied"
else:
expected_status = (
normalize_workstream_status(body.expected_current_status)
if body.expected_current_status is not None
else None
)
if expected_status is not None and expected_status != current_status:
classification = _conflict(
f"cached workstream status changed from expected {expected_status!r} to {current_status!r}",
"refresh the dashboard and retry the state change if it is still intended",
)
conflict = True
elif repo is not None and repo_path is None:
classification = _conflict(
"repo host path is unavailable for this State Hub host",
"register a host path for this machine or retry from a host with the repo checkout",
)
conflict = True
elif classification.reconciliation_class == ReconciliationClass.WRITE_THROUGH and workplan_ref:
file_status = normalize_workstream_status(workplan_status(workplan_ref.path))
if file_status and file_status != current_status:
classification = _conflict(
f"workplan file status {file_status!r} differs from cached DB status {current_status!r}",
"run consistency repair or refresh State Hub from files before retrying",
)
conflict = True
else:
try:
patch_workplan_status(workplan_ref.path, target_status)
patched_status = normalize_workstream_status(workplan_status(workplan_ref.path))
except OSError as exc:
classification = _conflict(
f"workplan file write failed: {exc}",
"fix repo file access and retry the reconciliation",
)
conflict = True
else:
if patched_status != target_status:
classification = _conflict(
f"workplan file status could not be patched to {target_status!r}",
"inspect the workplan frontmatter format before retrying",
)
conflict = True
else:
ws.status = target_status
await session.commit()
write_result = "applied"
if write_result != "applied":
msg = _deferred_message(
body=body,
current_status=current_status,
@@ -126,6 +185,7 @@ async def classify_state_change(
reason=classification.reason,
follow_up=classification.follow_up,
workplan_path=workplan_ref.relative_path if workplan_ref else None,
conflict=conflict,
)
session.add(msg)
await session.commit()
@@ -148,6 +208,7 @@ async def classify_state_change(
write_through_result=write_result,
workplan_path=workplan_ref.relative_path if workplan_ref else None,
reconciliation_record_id=reconciliation_record_id,
conflict=conflict,
)
task = await session.get(Task, body.target_id)
@@ -156,7 +217,8 @@ async def classify_state_change(
ws = await session.get(Workstream, task.workstream_id)
repo = await session.get(ManagedRepo, ws.repo_id) if ws and ws.repo_id else None
workplan_ref = find_workplan_for_workstream(repo, ws.id) if ws else None
repo_path = resolve_repo_path(repo)
workplan_ref = find_workplan_for_workstream(repo, ws.id) if ws and repo_path else None
actual_file_backed = workplan_ref is not None
actual_archived_file = bool(workplan_ref and workplan_ref.archived)
file_backed = (
@@ -187,19 +249,62 @@ async def classify_state_change(
)
write_result = "not_attempted"
reconciliation_record_id = None
conflict = False
if body.apply:
if (
expected_status = (
status_value(body.expected_current_status)
if body.expected_current_status is not None
else None
)
if expected_status is not None and expected_status != current_status:
classification = _conflict(
f"cached task status changed from expected {expected_status!r} to {current_status!r}",
"refresh the dashboard and retry the state change if it is still intended",
)
conflict = True
elif repo is not None and repo_path is None:
classification = _conflict(
"repo host path is unavailable for this State Hub host",
"register a host path for this machine or retry from a host with the repo checkout",
)
conflict = True
elif (
classification.reconciliation_class == ReconciliationClass.WRITE_THROUGH
and workplan_ref
and actual_task_linked
):
patch_task_status(workplan_ref.path, task.id, target_status)
task.status = TaskStatus(target_status)
if body.blocking_reason is not None:
task.blocking_reason = body.blocking_reason
await session.commit()
write_result = "applied"
else:
file_status = status_value(task_block_status(workplan_ref.path, task.id))
if file_status and file_status != current_status:
classification = _conflict(
f"workplan task status {file_status!r} differs from cached DB status {current_status!r}",
"run consistency repair or refresh State Hub from files before retrying",
)
conflict = True
else:
try:
patch_task_status(workplan_ref.path, task.id, target_status)
patched_status = status_value(task_block_status(workplan_ref.path, task.id))
except OSError as exc:
classification = _conflict(
f"workplan task write failed: {exc}",
"fix repo file access and retry the reconciliation",
)
conflict = True
else:
if patched_status != target_status:
classification = _conflict(
f"workplan task block could not be patched to {target_status!r}",
"inspect the task block format before retrying",
)
conflict = True
else:
task.status = TaskStatus(target_status)
if body.blocking_reason is not None:
task.blocking_reason = body.blocking_reason
await session.commit()
write_result = "applied"
if write_result != "applied":
msg = _deferred_message(
body=body,
current_status=current_status,
@@ -208,6 +313,7 @@ async def classify_state_change(
reason=classification.reason,
follow_up=classification.follow_up,
workplan_path=workplan_ref.relative_path if workplan_ref else None,
conflict=conflict,
)
session.add(msg)
await session.commit()
@@ -230,4 +336,5 @@ async def classify_state_change(
write_through_result=write_result,
workplan_path=workplan_ref.relative_path if workplan_ref else None,
reconciliation_record_id=reconciliation_record_id,
conflict=conflict,
)

View File

@@ -15,6 +15,7 @@ class StateChangeRequest(BaseModel):
target_status: str
actor: str = "dashboard"
intent: str | None = None
expected_current_status: str | None = None
file_backed: bool | None = None
archived_file: bool | None = None
task_linked: bool | None = None
@@ -40,3 +41,4 @@ class StateChangeResponse(BaseModel):
write_through_result: Literal["not_attempted", "applied", "not_applicable"] = "not_attempted"
workplan_path: str | None = None
reconciliation_record_id: uuid.UUID | None = None
conflict: bool = False

View File

@@ -66,6 +66,19 @@ def task_block_linked(path: Path, task_id: uuid.UUID) -> bool:
return _task_block_for_task(path, task_id) is not None
def workplan_status(path: Path) -> str | None:
status = _frontmatter(path).get("status")
return str(status).strip() if status is not None else None
def task_block_status(path: Path, task_id: uuid.UUID) -> str | None:
meta = _task_block_for_task(path, task_id)
if meta is None:
return None
status = meta.get("status")
return str(status).strip() if status is not None else None
def patch_workplan_status(path: Path, status: str) -> bool:
return _patch_frontmatter_field(path, "status", status)
@@ -140,7 +153,10 @@ def _patch_frontmatter_field(path: Path, key: str, value: str) -> bool:
def _task_block_for_task(path: Path, task_id: uuid.UUID) -> dict[str, Any] | None:
text = path.read_text(encoding="utf-8")
try:
text = path.read_text(encoding="utf-8")
except OSError:
return None
for match in _TASK_BLOCK_RE.finditer(text):
meta = _parse_task_block(match.group(1))
if str(meta.get("state_hub_task_id", "")).strip().strip('"') == str(task_id):