From 0b90004a6e708360352f040d8197988508add6fd Mon Sep 17 00:00:00 2001 From: tegwick Date: Thu, 7 May 2026 13:11:29 +0200 Subject: [PATCH] artifact refs and manifest fingerprinting --- docs/ARCHITECTURE-BLUEPRINT.md | 4 ++ docs/EXTENSION-SDK.md | 5 +- .../src/open_cmis_tck/preflight.py | 62 ++++++++++++++++-- ...PEN-CMIS-TCK-WP-0001-harness-foundation.md | 2 + src/guide_board/artifacts.py | 65 +++++++++++++++++++ src/guide_board/execution.py | 15 ++++- tests/test_core.py | 28 ++++++++ .../GUIDE-BOARD-WP-0001-bootstrapping.md | 4 ++ 8 files changed, 177 insertions(+), 8 deletions(-) create mode 100644 src/guide_board/artifacts.py diff --git a/docs/ARCHITECTURE-BLUEPRINT.md b/docs/ARCHITECTURE-BLUEPRINT.md index b459c44..2e8d2c9 100644 --- a/docs/ARCHITECTURE-BLUEPRINT.md +++ b/docs/ARCHITECTURE-BLUEPRINT.md @@ -337,6 +337,10 @@ Stores run artifacts by reference and checksum: - profile snapshots, - source lockfiles. +The first implementation builds the assessment package artifact manifest from +runner-emitted artifact refs and computes checksums for files inside the run +directory. + ### Normalizer Converts extension output into guide-board evidence records. diff --git a/docs/EXTENSION-SDK.md b/docs/EXTENSION-SDK.md index d65b15a..997e601 100644 --- a/docs/EXTENSION-SDK.md +++ b/docs/EXTENSION-SDK.md @@ -135,6 +135,10 @@ Result fields: - `facts`: structured facts extracted by the runner. - `artifact_refs`: references to raw artifacts written by the runner. +Artifact refs must be paths relative to the run directory. After runner +execution, the core fingerprints existing artifact refs into the assessment +package `artifact_manifest`. + If a Python runner raises an exception, the core converts that failure into `infrastructure_error` evidence so the assessment package remains complete. @@ -163,7 +167,6 @@ Initial statuses: ## Next SDK Steps -- Add artifact helper APIs for extension-generated raw files. - Add normalizer and mapping plug-in contracts. - Add extension-owned schema validation for domain-specific target profile fields. diff --git a/extensions/open-cmis-tck/src/open_cmis_tck/preflight.py b/extensions/open-cmis-tck/src/open_cmis_tck/preflight.py index 39fba96..5029b0c 100644 --- a/extensions/open-cmis-tck/src/open_cmis_tck/preflight.py +++ b/extensions/open-cmis-tck/src/open_cmis_tck/preflight.py @@ -3,6 +3,7 @@ from __future__ import annotations import json +from pathlib import Path from typing import Any from urllib.error import HTTPError, URLError from urllib.request import Request, urlopen @@ -24,6 +25,7 @@ def run(context: dict[str, Any]) -> dict[str, Any]: } timeout = _timeout_seconds(context) + artifact_refs: list[str] = [] request = Request( endpoint["url"], headers={ @@ -35,8 +37,25 @@ def run(context: dict[str, Any]) -> dict[str, Any]: with urlopen(request, timeout=timeout) as response: status_code = response.status content_type = response.headers.get("Content-Type", "") + headers = dict(response.headers.items()) body = response.read(1024 * 1024) + artifact_refs = _write_response_artifacts( + context, + status_code, + content_type, + headers, + body, + ) except HTTPError as exc: + body = exc.read(1024 * 1024) + content_type = exc.headers.get("Content-Type", "") + artifact_refs = _write_response_artifacts( + context, + exc.code, + content_type, + dict(exc.headers.items()), + body, + ) return { "result": "infrastructure_error", "observations": [ @@ -46,8 +65,9 @@ def run(context: dict[str, Any]) -> dict[str, Any]: "endpoint_found": True, "url": endpoint["url"], "http_status": exc.code, + "content_type": content_type, }, - "artifact_refs": [], + "artifact_refs": artifact_refs, } except URLError as exc: return { @@ -60,7 +80,7 @@ def run(context: dict[str, Any]) -> dict[str, Any]: "url": endpoint["url"], "error": str(exc.reason), }, - "artifact_refs": [], + "artifact_refs": artifact_refs, } except TimeoutError: return { @@ -73,7 +93,7 @@ def run(context: dict[str, Any]) -> dict[str, Any]: "url": endpoint["url"], "timeout_seconds": timeout, }, - "artifact_refs": [], + "artifact_refs": artifact_refs, } facts: dict[str, Any] = { @@ -93,7 +113,7 @@ def run(context: dict[str, Any]) -> dict[str, Any]: "CMIS Browser Binding endpoint is reachable but did not return parseable JSON." ], "facts": facts, - "artifact_refs": [], + "artifact_refs": artifact_refs, } facts["json_detected"] = True @@ -104,7 +124,7 @@ def run(context: dict[str, Any]) -> dict[str, Any]: "CMIS Browser Binding endpoint is reachable and returned parseable JSON." ], "facts": facts, - "artifact_refs": [], + "artifact_refs": artifact_refs, } @@ -159,3 +179,35 @@ def _repository_facts(value: Any) -> dict[str, Any]: "repository_shape": "object", "top_level_keys": sorted(str(key) for key in value.keys())[:20], } + + +def _write_response_artifacts( + context: dict[str, Any], + status_code: int, + content_type: str, + headers: dict[str, str], + body: bytes, +) -> list[str]: + run_dir = Path(context["run_dir"]) + artifact_dir = run_dir / "artifacts" / "open-cmis-tck" / "preflight" + artifact_dir.mkdir(parents=True, exist_ok=True) + + response_ref = "artifacts/open-cmis-tck/preflight/response-body.bin" + metadata_ref = "artifacts/open-cmis-tck/preflight/response-metadata.json" + + (run_dir / response_ref).write_bytes(body) + (run_dir / metadata_ref).write_text( + json.dumps( + { + "status_code": status_code, + "content_type": content_type, + "headers": headers, + "byte_count": len(body), + }, + indent=2, + sort_keys=True, + ) + + "\n", + encoding="utf-8", + ) + return [metadata_ref, response_ref] diff --git a/extensions/open-cmis-tck/workplans/OPEN-CMIS-TCK-WP-0001-harness-foundation.md b/extensions/open-cmis-tck/workplans/OPEN-CMIS-TCK-WP-0001-harness-foundation.md index 9052d3e..c71e4ad 100644 --- a/extensions/open-cmis-tck/workplans/OPEN-CMIS-TCK-WP-0001-harness-foundation.md +++ b/extensions/open-cmis-tck/workplans/OPEN-CMIS-TCK-WP-0001-harness-foundation.md @@ -107,6 +107,8 @@ Progress: - The first CMIS Browser Binding preflight runner checks endpoint reachability and parseable JSON repository metadata through the guide-board runner bridge. +- The preflight runner preserves raw response metadata and body artifacts for + assessment-package fingerprinting. - Capability flag normalization remains to be expanded after a live target sample is captured. diff --git a/src/guide_board/artifacts.py b/src/guide_board/artifacts.py new file mode 100644 index 0000000..b91193d --- /dev/null +++ b/src/guide_board/artifacts.py @@ -0,0 +1,65 @@ +"""Artifact manifest helpers.""" + +from __future__ import annotations + +import hashlib +import mimetypes +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from guide_board.schema import assert_valid + + +def build_artifact_manifest( + run_dir: Path, + run_id: str, + evidence: list[dict[str, Any]], +) -> list[dict[str, Any]]: + artifacts: list[dict[str, Any]] = [] + seen: set[str] = set() + for item in evidence: + producer = item["check_id"] + for artifact_ref in item.get("artifact_refs", []): + if not isinstance(artifact_ref, str) or artifact_ref in seen: + continue + seen.add(artifact_ref) + path = (run_dir / artifact_ref).resolve() + try: + path.relative_to(run_dir.resolve()) + except ValueError: + continue + if not path.exists() or not path.is_file(): + continue + artifact = { + "id": f"artifact:{_safe_id(artifact_ref)}", + "run_id": run_id, + "path": artifact_ref, + "media_type": _media_type(path), + "producer": producer, + "checksum": f"sha256:{_sha256(path)}", + "created_at": datetime.now(timezone.utc).isoformat(), + "retention_class": "raw", + } + assert_valid(artifact, "raw-artifact") + artifacts.append(artifact) + return artifacts + + +def _sha256(path: Path) -> str: + digest = hashlib.sha256() + with path.open("rb") as handle: + for chunk in iter(lambda: handle.read(1024 * 1024), b""): + digest.update(chunk) + return digest.hexdigest() + + +def _media_type(path: Path) -> str: + guessed, _ = mimetypes.guess_type(path.name) + if guessed: + return guessed + return "application/octet-stream" + + +def _safe_id(value: str) -> str: + return "".join(char if char.isalnum() or char in {"-", "_"} else "_" for char in value) diff --git a/src/guide_board/execution.py b/src/guide_board/execution.py index 31949f5..911679f 100644 --- a/src/guide_board/execution.py +++ b/src/guide_board/execution.py @@ -7,6 +7,7 @@ from datetime import datetime, timezone from pathlib import Path from typing import Any +from guide_board.artifacts import build_artifact_manifest from guide_board.io import write_json from guide_board.planning import build_run_plan from guide_board.runners import run_step @@ -35,7 +36,16 @@ def run_assessment( for finding in findings: assert_valid(finding, "finding") - assessment_package = _assessment_package(run_id, plan, evidence, findings, created_at) + artifact_manifest = build_artifact_manifest(run_dir, run_id, evidence) + + assessment_package = _assessment_package( + run_id, + plan, + evidence, + findings, + artifact_manifest, + created_at, + ) assert_valid(assessment_package, "assessment-package") run_metadata = { @@ -164,6 +174,7 @@ def _assessment_package( plan: dict[str, Any], evidence: list[dict[str, Any]], findings: list[dict[str, Any]], + artifact_manifest: list[dict[str, Any]], created_at: str, ) -> dict[str, Any]: summary = dict(Counter(item["result"] for item in evidence)) @@ -179,7 +190,7 @@ def _assessment_package( "summary": summary, "findings": findings, "evidence_refs": [item["id"] for item in evidence], - "artifact_manifest": [], + "artifact_manifest": artifact_manifest, "waivers": [], "certification_boundary": "Guide Board produces preparation evidence only and does not issue certifications or audit assurance.", "created_at": created_at, diff --git a/tests/test_core.py b/tests/test_core.py index 71644ae..c57b861 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -153,13 +153,35 @@ class CoreArchitectureTests(unittest.TestCase): encoding="utf-8" ) ) + package = json.loads( + (Path(result["run_dir"]) / "reports" / "assessment-package.json").read_text( + encoding="utf-8" + ) + ) self.assertEqual(result["status"], "completed") self.assertEqual(evidence["evidence"][0]["result"], "pass") + self.assertEqual( + sorted(evidence["evidence"][0]["artifact_refs"]), + [ + "artifacts/open-cmis-tck/preflight/response-body.bin", + "artifacts/open-cmis-tck/preflight/response-metadata.json", + ], + ) self.assertEqual( evidence["evidence"][0]["facts"]["repository_ids"], ["local-test-repository"], ) + self.assertEqual(len(package["artifact_manifest"]), 2) + self.assertTrue( + ( + Path(result["run_dir"]) + / "artifacts" + / "open-cmis-tck" + / "preflight" + / "response-metadata.json" + ).exists() + ) finally: server.shutdown() thread.join(timeout=5) @@ -243,6 +265,11 @@ class CoreArchitectureTests(unittest.TestCase): encoding="utf-8" ) )["findings"] + package = json.loads( + (Path(result["run_dir"]) / "reports" / "assessment-package.json").read_text( + encoding="utf-8" + ) + ) self.assertEqual(result["status"], "blocked") self.assertEqual(evidence[0]["result"], "pass") @@ -256,6 +283,7 @@ class CoreArchitectureTests(unittest.TestCase): findings[0]["classification"], evidence[1]["facts"]["blocked_reason"], ) + self.assertGreaterEqual(len(package["artifact_manifest"]), 3) finally: server.shutdown() thread.join(timeout=5) diff --git a/workplans/GUIDE-BOARD-WP-0001-bootstrapping.md b/workplans/GUIDE-BOARD-WP-0001-bootstrapping.md index 35c94e9..65bd865 100644 --- a/workplans/GUIDE-BOARD-WP-0001-bootstrapping.md +++ b/workplans/GUIDE-BOARD-WP-0001-bootstrapping.md @@ -184,6 +184,8 @@ Acceptance: of CMIS. - The baseline executor writes the run directory contract, normalized evidence, an assessment package, and a Markdown report. +- The assessment package includes a fingerprinted artifact manifest for + runner-emitted raw artifacts. ## D1.7 - Extension SDK Skeleton @@ -204,6 +206,8 @@ Acceptance: - Python module runner contracts are documented in `docs/EXTENSION-SDK.md`. - Manifest-declared command runners execute without shell expansion and return normalized evidence through the same runner result contract. +- Runner artifact refs are constrained to the run directory and fingerprinted in + the assessment package artifact manifest. ## D1.8 - CMIS Seed Extension Integration