artifact refs and manifest fingerprinting

This commit is contained in:
2026-05-07 13:11:29 +02:00
parent 12ab9c88cb
commit 0b90004a6e
8 changed files with 177 additions and 8 deletions

View File

@@ -337,6 +337,10 @@ Stores run artifacts by reference and checksum:
- profile snapshots,
- source lockfiles.
The first implementation builds the assessment package artifact manifest from
runner-emitted artifact refs and computes checksums for files inside the run
directory.
### Normalizer
Converts extension output into guide-board evidence records.

View File

@@ -135,6 +135,10 @@ Result fields:
- `facts`: structured facts extracted by the runner.
- `artifact_refs`: references to raw artifacts written by the runner.
Artifact refs must be paths relative to the run directory. After runner
execution, the core fingerprints existing artifact refs into the assessment
package `artifact_manifest`.
If a Python runner raises an exception, the core converts that failure into
`infrastructure_error` evidence so the assessment package remains complete.
@@ -163,7 +167,6 @@ Initial statuses:
## Next SDK Steps
- Add artifact helper APIs for extension-generated raw files.
- Add normalizer and mapping plug-in contracts.
- Add extension-owned schema validation for domain-specific target profile
fields.

View File

@@ -3,6 +3,7 @@
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen
@@ -24,6 +25,7 @@ def run(context: dict[str, Any]) -> dict[str, Any]:
}
timeout = _timeout_seconds(context)
artifact_refs: list[str] = []
request = Request(
endpoint["url"],
headers={
@@ -35,8 +37,25 @@ def run(context: dict[str, Any]) -> dict[str, Any]:
with urlopen(request, timeout=timeout) as response:
status_code = response.status
content_type = response.headers.get("Content-Type", "")
headers = dict(response.headers.items())
body = response.read(1024 * 1024)
artifact_refs = _write_response_artifacts(
context,
status_code,
content_type,
headers,
body,
)
except HTTPError as exc:
body = exc.read(1024 * 1024)
content_type = exc.headers.get("Content-Type", "")
artifact_refs = _write_response_artifacts(
context,
exc.code,
content_type,
dict(exc.headers.items()),
body,
)
return {
"result": "infrastructure_error",
"observations": [
@@ -46,8 +65,9 @@ def run(context: dict[str, Any]) -> dict[str, Any]:
"endpoint_found": True,
"url": endpoint["url"],
"http_status": exc.code,
"content_type": content_type,
},
"artifact_refs": [],
"artifact_refs": artifact_refs,
}
except URLError as exc:
return {
@@ -60,7 +80,7 @@ def run(context: dict[str, Any]) -> dict[str, Any]:
"url": endpoint["url"],
"error": str(exc.reason),
},
"artifact_refs": [],
"artifact_refs": artifact_refs,
}
except TimeoutError:
return {
@@ -73,7 +93,7 @@ def run(context: dict[str, Any]) -> dict[str, Any]:
"url": endpoint["url"],
"timeout_seconds": timeout,
},
"artifact_refs": [],
"artifact_refs": artifact_refs,
}
facts: dict[str, Any] = {
@@ -93,7 +113,7 @@ def run(context: dict[str, Any]) -> dict[str, Any]:
"CMIS Browser Binding endpoint is reachable but did not return parseable JSON."
],
"facts": facts,
"artifact_refs": [],
"artifact_refs": artifact_refs,
}
facts["json_detected"] = True
@@ -104,7 +124,7 @@ def run(context: dict[str, Any]) -> dict[str, Any]:
"CMIS Browser Binding endpoint is reachable and returned parseable JSON."
],
"facts": facts,
"artifact_refs": [],
"artifact_refs": artifact_refs,
}
@@ -159,3 +179,35 @@ def _repository_facts(value: Any) -> dict[str, Any]:
"repository_shape": "object",
"top_level_keys": sorted(str(key) for key in value.keys())[:20],
}
def _write_response_artifacts(
context: dict[str, Any],
status_code: int,
content_type: str,
headers: dict[str, str],
body: bytes,
) -> list[str]:
run_dir = Path(context["run_dir"])
artifact_dir = run_dir / "artifacts" / "open-cmis-tck" / "preflight"
artifact_dir.mkdir(parents=True, exist_ok=True)
response_ref = "artifacts/open-cmis-tck/preflight/response-body.bin"
metadata_ref = "artifacts/open-cmis-tck/preflight/response-metadata.json"
(run_dir / response_ref).write_bytes(body)
(run_dir / metadata_ref).write_text(
json.dumps(
{
"status_code": status_code,
"content_type": content_type,
"headers": headers,
"byte_count": len(body),
},
indent=2,
sort_keys=True,
)
+ "\n",
encoding="utf-8",
)
return [metadata_ref, response_ref]

View File

@@ -107,6 +107,8 @@ Progress:
- The first CMIS Browser Binding preflight runner checks endpoint reachability
and parseable JSON repository metadata through the guide-board runner bridge.
- The preflight runner preserves raw response metadata and body artifacts for
assessment-package fingerprinting.
- Capability flag normalization remains to be expanded after a live target sample
is captured.

View File

@@ -0,0 +1,65 @@
"""Artifact manifest helpers."""
from __future__ import annotations
import hashlib
import mimetypes
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from guide_board.schema import assert_valid
def build_artifact_manifest(
run_dir: Path,
run_id: str,
evidence: list[dict[str, Any]],
) -> list[dict[str, Any]]:
artifacts: list[dict[str, Any]] = []
seen: set[str] = set()
for item in evidence:
producer = item["check_id"]
for artifact_ref in item.get("artifact_refs", []):
if not isinstance(artifact_ref, str) or artifact_ref in seen:
continue
seen.add(artifact_ref)
path = (run_dir / artifact_ref).resolve()
try:
path.relative_to(run_dir.resolve())
except ValueError:
continue
if not path.exists() or not path.is_file():
continue
artifact = {
"id": f"artifact:{_safe_id(artifact_ref)}",
"run_id": run_id,
"path": artifact_ref,
"media_type": _media_type(path),
"producer": producer,
"checksum": f"sha256:{_sha256(path)}",
"created_at": datetime.now(timezone.utc).isoformat(),
"retention_class": "raw",
}
assert_valid(artifact, "raw-artifact")
artifacts.append(artifact)
return artifacts
def _sha256(path: Path) -> str:
digest = hashlib.sha256()
with path.open("rb") as handle:
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
digest.update(chunk)
return digest.hexdigest()
def _media_type(path: Path) -> str:
guessed, _ = mimetypes.guess_type(path.name)
if guessed:
return guessed
return "application/octet-stream"
def _safe_id(value: str) -> str:
return "".join(char if char.isalnum() or char in {"-", "_"} else "_" for char in value)

View File

@@ -7,6 +7,7 @@ from datetime import datetime, timezone
from pathlib import Path
from typing import Any
from guide_board.artifacts import build_artifact_manifest
from guide_board.io import write_json
from guide_board.planning import build_run_plan
from guide_board.runners import run_step
@@ -35,7 +36,16 @@ def run_assessment(
for finding in findings:
assert_valid(finding, "finding")
assessment_package = _assessment_package(run_id, plan, evidence, findings, created_at)
artifact_manifest = build_artifact_manifest(run_dir, run_id, evidence)
assessment_package = _assessment_package(
run_id,
plan,
evidence,
findings,
artifact_manifest,
created_at,
)
assert_valid(assessment_package, "assessment-package")
run_metadata = {
@@ -164,6 +174,7 @@ def _assessment_package(
plan: dict[str, Any],
evidence: list[dict[str, Any]],
findings: list[dict[str, Any]],
artifact_manifest: list[dict[str, Any]],
created_at: str,
) -> dict[str, Any]:
summary = dict(Counter(item["result"] for item in evidence))
@@ -179,7 +190,7 @@ def _assessment_package(
"summary": summary,
"findings": findings,
"evidence_refs": [item["id"] for item in evidence],
"artifact_manifest": [],
"artifact_manifest": artifact_manifest,
"waivers": [],
"certification_boundary": "Guide Board produces preparation evidence only and does not issue certifications or audit assurance.",
"created_at": created_at,

View File

@@ -153,13 +153,35 @@ class CoreArchitectureTests(unittest.TestCase):
encoding="utf-8"
)
)
package = json.loads(
(Path(result["run_dir"]) / "reports" / "assessment-package.json").read_text(
encoding="utf-8"
)
)
self.assertEqual(result["status"], "completed")
self.assertEqual(evidence["evidence"][0]["result"], "pass")
self.assertEqual(
sorted(evidence["evidence"][0]["artifact_refs"]),
[
"artifacts/open-cmis-tck/preflight/response-body.bin",
"artifacts/open-cmis-tck/preflight/response-metadata.json",
],
)
self.assertEqual(
evidence["evidence"][0]["facts"]["repository_ids"],
["local-test-repository"],
)
self.assertEqual(len(package["artifact_manifest"]), 2)
self.assertTrue(
(
Path(result["run_dir"])
/ "artifacts"
/ "open-cmis-tck"
/ "preflight"
/ "response-metadata.json"
).exists()
)
finally:
server.shutdown()
thread.join(timeout=5)
@@ -243,6 +265,11 @@ class CoreArchitectureTests(unittest.TestCase):
encoding="utf-8"
)
)["findings"]
package = json.loads(
(Path(result["run_dir"]) / "reports" / "assessment-package.json").read_text(
encoding="utf-8"
)
)
self.assertEqual(result["status"], "blocked")
self.assertEqual(evidence[0]["result"], "pass")
@@ -256,6 +283,7 @@ class CoreArchitectureTests(unittest.TestCase):
findings[0]["classification"],
evidence[1]["facts"]["blocked_reason"],
)
self.assertGreaterEqual(len(package["artifact_manifest"]), 3)
finally:
server.shutdown()
thread.join(timeout=5)

View File

@@ -184,6 +184,8 @@ Acceptance:
of CMIS.
- The baseline executor writes the run directory contract, normalized evidence,
an assessment package, and a Markdown report.
- The assessment package includes a fingerprinted artifact manifest for
runner-emitted raw artifacts.
## D1.7 - Extension SDK Skeleton
@@ -204,6 +206,8 @@ Acceptance:
- Python module runner contracts are documented in `docs/EXTENSION-SDK.md`.
- Manifest-declared command runners execute without shell expansion and return
normalized evidence through the same runner result contract.
- Runner artifact refs are constrained to the run directory and fingerprinted in
the assessment package artifact manifest.
## D1.8 - CMIS Seed Extension Integration