generated from coulomb/repo-seed
Harden WSJF triage report recovery
This commit is contained in:
@@ -328,6 +328,7 @@ async def evaluate_instructions(payload: dict) -> dict:
|
||||
"model": result.model,
|
||||
"output_validated": result.output_validated,
|
||||
"review_required": result.review_required,
|
||||
"validation_error": result.validation_error,
|
||||
})
|
||||
for spec in result.tasks:
|
||||
task_specs.append({
|
||||
|
||||
@@ -126,6 +126,9 @@ def _post_state_hub_progress(
|
||||
"activity_core_run_id": run_id,
|
||||
"instruction_id": instruction_id,
|
||||
"scheduled_for": payload.get("scheduled_for"),
|
||||
"output_validated": report_entry.get("output_validated"),
|
||||
"review_required": report_entry.get("review_required"),
|
||||
"validation_error": report_entry.get("validation_error"),
|
||||
"report": report,
|
||||
},
|
||||
}
|
||||
@@ -179,6 +182,7 @@ def _render_markdown(
|
||||
report = report_entry.get("report") or {}
|
||||
instruction_id = report_entry.get("instruction_id", "instruction")
|
||||
summary = report.get("summary", "")
|
||||
validation_error = report_entry.get("validation_error")
|
||||
lines = [
|
||||
"---",
|
||||
"type: working-memory",
|
||||
@@ -187,6 +191,10 @@ def _render_markdown(
|
||||
f"activity_core_run_id: {payload.get('run_id')}",
|
||||
f"instruction_id: {instruction_id}",
|
||||
f"scheduled_for: {payload.get('scheduled_for')}",
|
||||
f"output_validated: {str(bool(report_entry.get('output_validated'))).lower()}",
|
||||
f"review_required: {str(bool(report_entry.get('review_required'))).lower()}",
|
||||
f"model: {report_entry.get('model') or ''}",
|
||||
f"prompt_hash: {report_entry.get('prompt_hash') or ''}",
|
||||
f"created: {datetime.now(tz=timezone.utc).isoformat()}",
|
||||
"---",
|
||||
"",
|
||||
@@ -195,6 +203,8 @@ def _render_markdown(
|
||||
]
|
||||
if summary:
|
||||
lines.extend([summary, ""])
|
||||
if validation_error:
|
||||
lines.extend(["Validation error:", "", f"`{validation_error}`", ""])
|
||||
lines.extend([
|
||||
"```json",
|
||||
json.dumps(report, indent=2, sort_keys=True),
|
||||
|
||||
@@ -39,6 +39,7 @@ class InstructionResult:
|
||||
output_validated: bool = False
|
||||
review_required: bool = False
|
||||
condition_matched: str | None = None
|
||||
validation_error: str | None = None
|
||||
|
||||
|
||||
def _resolve_path(obj: Any, path: str) -> Any:
|
||||
@@ -164,7 +165,19 @@ def _execute(
|
||||
"error=%s, raw_output_preview=%r",
|
||||
instr.id, prompt_hash, error, preview,
|
||||
)
|
||||
return _empty_result(instr, prompt_hash=prompt_hash)
|
||||
failure_report = _invalid_output_report(instr, error, raw_output)
|
||||
if failure_report is not None:
|
||||
return InstructionResult(
|
||||
tasks=[],
|
||||
report=failure_report,
|
||||
prompt_hash=prompt_hash,
|
||||
model=instr.model,
|
||||
output_validated=False,
|
||||
review_required=True,
|
||||
condition_matched=instr.condition or None,
|
||||
validation_error=error,
|
||||
)
|
||||
return _empty_result(instr, prompt_hash=prompt_hash, validation_error=error)
|
||||
|
||||
return InstructionResult(
|
||||
tasks=task_specs,
|
||||
@@ -193,7 +206,11 @@ def _llm_run_config(instr: Any) -> dict[str, Any]:
|
||||
return config
|
||||
|
||||
|
||||
def _empty_result(instr: Any, prompt_hash: str | None = None) -> InstructionResult:
|
||||
def _empty_result(
|
||||
instr: Any,
|
||||
prompt_hash: str | None = None,
|
||||
validation_error: str | None = None,
|
||||
) -> InstructionResult:
|
||||
return InstructionResult(
|
||||
tasks=[],
|
||||
prompt_hash=prompt_hash,
|
||||
@@ -201,9 +218,54 @@ def _empty_result(instr: Any, prompt_hash: str | None = None) -> InstructionResu
|
||||
output_validated=False,
|
||||
review_required=bool(getattr(instr, "review_required", False)),
|
||||
condition_matched=getattr(instr, "condition", "") or None,
|
||||
validation_error=validation_error,
|
||||
)
|
||||
|
||||
|
||||
def _invalid_output_report(
|
||||
instr: Any,
|
||||
validation_error: str,
|
||||
raw_output: Any,
|
||||
) -> dict[str, Any] | None:
|
||||
"""Build a durable diagnostic report for invalid report-sink output.
|
||||
|
||||
Task-only instructions keep the legacy empty-result behavior. Instructions
|
||||
with report sinks should leave operators a bounded artifact that preserves
|
||||
the partial model output without marking it as schema-valid.
|
||||
"""
|
||||
if not getattr(instr, "report_sinks", None):
|
||||
return None
|
||||
|
||||
partial_output: Any
|
||||
raw_preview: str | None = None
|
||||
if isinstance(raw_output, str):
|
||||
try:
|
||||
partial_output = json.loads(raw_output)
|
||||
except json.JSONDecodeError:
|
||||
partial_output = None
|
||||
raw_preview = raw_output[:4000]
|
||||
else:
|
||||
partial_output = raw_output
|
||||
|
||||
report: dict[str, Any] = {
|
||||
"summary": (
|
||||
f"Instruction {instr.id} produced output that failed validation; "
|
||||
"partial output was preserved for operator review."
|
||||
),
|
||||
"status": "validation_failed",
|
||||
"validation_error": validation_error,
|
||||
}
|
||||
if isinstance(partial_output, dict):
|
||||
if isinstance(partial_output.get("summary"), str):
|
||||
report["partial_summary"] = partial_output["summary"]
|
||||
report["partial_report"] = partial_output
|
||||
elif isinstance(partial_output, list):
|
||||
report["partial_report"] = partial_output
|
||||
elif raw_preview is not None:
|
||||
report["raw_output_preview"] = raw_preview
|
||||
return report
|
||||
|
||||
|
||||
def _validate_output(
|
||||
raw_output: Any,
|
||||
instr: Any,
|
||||
|
||||
Reference in New Issue
Block a user