Fix repo_sbom_status resolver — close ADHOC-2026-06-01-T01

The state-hub resolver was calling GET /sbom/status?repo={slug}, which State
Hub does not expose. Real SBOM routes are /sbom/, /sbom/{slug},
/sbom/snapshots/, /sbom/snapshots/{id}, /sbom/ingest/, /sbom/report/licences/.
The weekly-sbom-staleness ActivityDefinition was passing params {repos: all}
and the resolver was reading params.get("repo_slug", ""), so the URL
collapsed to /sbom/status?repo= and 404'd. _fetch_json swallowed the error,
the rule context.repos.sbom_age_days > 30 evaluated against {} and never
matched, and the weekly SBOM check has been a silent no-op for as long as
the route mismatch has existed.

Resolver now supports two modes selected by params:
- single-repo: {repo_slug: foo} → GET /sbom/{foo}, returns
  {repo_slug, last_sbom_at, sbom_age_days, has_sbom}
- bulk: {repos: all} → GET /repos/, computes per-repo age, returns the
  worst repo's fields hoisted to the top of the result alongside
  stale_count, total_count, worst_* fields, and the full per-repo list

Never-scanned repos get a 99999 sentinel age so threshold rules treat
them as very stale without forcing the rule to special-case None.

Hoisting the worst entry to the top preserves the existing rule
expression context.repos.sbom_age_days > 30 (and target_repo:
context.repos.repo_slug, though that field is a separate interpolation
gap tracked as ADHOC-2026-06-01-T02). The integration tests'
aspirational per-repo iteration model is left intact.

Live validation against State Hub on 2026-06-01:
- single: activity-core → 36 days since 2026-04-26 ingest
- bulk: 48 repos total, 46 stale (>30d), worst is info-tech-canon (never
  scanned), rule expression evaluates True

Tests: 120 passed, 1 skipped.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-06-02 03:31:56 +02:00
parent 5d3fb33c6b
commit a8d3cc2782
4 changed files with 270 additions and 16 deletions

View File

@@ -3,7 +3,8 @@
Registered as source type 'state-hub'.
Supported queries:
- domain_summary: GET {STATE_HUB_URL}/state/domain/{domain}
- repo_sbom_status: GET {STATE_HUB_URL}/sbom/status?repo={repo_slug}
- repo_sbom_status: single-repo -> GET {STATE_HUB_URL}/sbom/{repo_slug}
bulk (repos:all) -> GET {STATE_HUB_URL}/repos/
- state_summary: GET {STATE_HUB_URL}/state/summary
- next_steps: GET {STATE_HUB_URL}/state/next_steps
- workplan_index: GET {STATE_HUB_URL}/workstreams/workplan-index
@@ -20,6 +21,7 @@ from __future__ import annotations
import json
import os
from datetime import datetime, timezone
from typing import Any
import httpx
@@ -30,6 +32,10 @@ _DEFAULT_STATE_HUB_URL = "http://127.0.0.1:8000"
_TIMEOUT_SECONDS = 10.0
_OPEN_WORKSTREAM_STATUSES = {"active", "ready", "blocked"}
_OPEN_TASK_STATUSES = {"todo", "in_progress", "blocked"}
# Sentinel age for repos that have never had an SBOM ingested. Large enough
# that any threshold-based staleness rule treats them as "very stale" without
# forcing the rule expression to special-case None.
_NEVER_SCANNED_AGE_DAYS = 99999
def _base_url() -> str:
@@ -74,8 +80,7 @@ class StateHubContextResolver(ContextResolver):
domain = params.get("domain", "")
return _fetch_json(f"/state/domain/{domain}")
if query == "repo_sbom_status":
repo_slug = params.get("repo_slug", "")
return _fetch_json("/sbom/status", {"repo": repo_slug})
return _repo_sbom_status(params)
if query == "state_summary":
return _fetch_json("/state/summary")
if query == "next_steps":
@@ -105,6 +110,102 @@ class StateHubContextResolver(ContextResolver):
CONTEXT_RESOLVER_REGISTRY["state-hub"] = StateHubContextResolver
def _repo_sbom_status(params: dict[str, Any]) -> dict[str, Any]:
"""Resolve SBOM staleness against the State Hub.
Two modes, selected by params:
- Single-repo: params = {"repo_slug": "<slug>"} -> GET /sbom/{slug}.
Returns {repo_slug, last_sbom_at, sbom_age_days, has_sbom}.
- Bulk: params = {"repos": "all"} -> GET /repos/. Computes age per repo
and returns a summary the rule layer can match against without
comprehensions (the AST evaluator does not support them):
{
"repos": [{repo_slug, last_sbom_at, sbom_age_days, has_sbom}, ...],
"stale_count": int,
"total_count": int,
"worst_repo_slug": str | None,
"worst_age_days": int | None,
"worst_last_sbom_at": str | None,
}
Returns {} on HTTP error to preserve the resolver's graceful-degradation
contract.
"""
repo_slug = params.get("repo_slug")
bulk = str(params.get("repos", "")).lower() == "all"
if repo_slug and not bulk:
payload = _fetch_json(f"/sbom/{repo_slug}")
if not isinstance(payload, dict) or not payload:
return {}
return _sbom_status_entry(
repo_slug=str(payload.get("repo_slug") or repo_slug),
last_sbom_at=payload.get("last_sbom_at"),
)
if bulk:
repos = _fetch_json("/repos/")
if not isinstance(repos, list):
return {}
entries = [
_sbom_status_entry(
repo_slug=str(r.get("slug") or ""),
last_sbom_at=r.get("last_sbom_at"),
)
for r in repos
if r.get("slug")
]
stale = [e for e in entries if e["sbom_age_days"] > 30]
worst = max(entries, key=lambda e: e["sbom_age_days"], default=None)
# Hoist the worst-repo fields to the top so a sandboxed rule expression
# `context.repos.sbom_age_days > 30` matches when any repo is stale,
# without needing comprehensions. Bulk-only summary fields live
# alongside, and the full per-repo list is exposed under `repos`.
result: dict[str, Any] = {
"repos": entries,
"stale_count": len(stale),
"total_count": len(entries),
"worst_repo_slug": worst["repo_slug"] if worst else None,
"worst_age_days": worst["sbom_age_days"] if worst else None,
"worst_last_sbom_at": worst["last_sbom_at"] if worst else None,
}
if worst:
result.update({
"repo_slug": worst["repo_slug"],
"last_sbom_at": worst["last_sbom_at"],
"sbom_age_days": worst["sbom_age_days"],
"has_sbom": worst["has_sbom"],
})
return result
return {}
def _sbom_status_entry(repo_slug: str, last_sbom_at: Any) -> dict[str, Any]:
age_days, has_sbom, normalised = _sbom_age_days(last_sbom_at)
return {
"repo_slug": repo_slug,
"last_sbom_at": normalised,
"sbom_age_days": age_days,
"has_sbom": has_sbom,
}
def _sbom_age_days(last_sbom_at: Any) -> tuple[int, bool, str | None]:
if not isinstance(last_sbom_at, str) or not last_sbom_at:
return _NEVER_SCANNED_AGE_DAYS, False, None
try:
ts = datetime.fromisoformat(last_sbom_at.replace("Z", "+00:00"))
except ValueError:
return _NEVER_SCANNED_AGE_DAYS, False, last_sbom_at
if ts.tzinfo is None:
ts = ts.replace(tzinfo=timezone.utc)
delta = datetime.now(timezone.utc) - ts
return max(0, delta.days), True, last_sbom_at
def _daily_triage_digest(params: dict[str, Any]) -> str:
"""Return a compact JSON string safe to inject into an instruction prompt.