generated from coulomb/repo-seed
artifact refs and manifest fingerprinting
This commit is contained in:
@@ -337,6 +337,10 @@ Stores run artifacts by reference and checksum:
|
||||
- profile snapshots,
|
||||
- source lockfiles.
|
||||
|
||||
The first implementation builds the assessment package artifact manifest from
|
||||
runner-emitted artifact refs and computes checksums for files inside the run
|
||||
directory.
|
||||
|
||||
### Normalizer
|
||||
|
||||
Converts extension output into guide-board evidence records.
|
||||
|
||||
@@ -135,6 +135,10 @@ Result fields:
|
||||
- `facts`: structured facts extracted by the runner.
|
||||
- `artifact_refs`: references to raw artifacts written by the runner.
|
||||
|
||||
Artifact refs must be paths relative to the run directory. After runner
|
||||
execution, the core fingerprints existing artifact refs into the assessment
|
||||
package `artifact_manifest`.
|
||||
|
||||
If a Python runner raises an exception, the core converts that failure into
|
||||
`infrastructure_error` evidence so the assessment package remains complete.
|
||||
|
||||
@@ -163,7 +167,6 @@ Initial statuses:
|
||||
|
||||
## Next SDK Steps
|
||||
|
||||
- Add artifact helper APIs for extension-generated raw files.
|
||||
- Add normalizer and mapping plug-in contracts.
|
||||
- Add extension-owned schema validation for domain-specific target profile
|
||||
fields.
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.request import Request, urlopen
|
||||
@@ -24,6 +25,7 @@ def run(context: dict[str, Any]) -> dict[str, Any]:
|
||||
}
|
||||
|
||||
timeout = _timeout_seconds(context)
|
||||
artifact_refs: list[str] = []
|
||||
request = Request(
|
||||
endpoint["url"],
|
||||
headers={
|
||||
@@ -35,8 +37,25 @@ def run(context: dict[str, Any]) -> dict[str, Any]:
|
||||
with urlopen(request, timeout=timeout) as response:
|
||||
status_code = response.status
|
||||
content_type = response.headers.get("Content-Type", "")
|
||||
headers = dict(response.headers.items())
|
||||
body = response.read(1024 * 1024)
|
||||
artifact_refs = _write_response_artifacts(
|
||||
context,
|
||||
status_code,
|
||||
content_type,
|
||||
headers,
|
||||
body,
|
||||
)
|
||||
except HTTPError as exc:
|
||||
body = exc.read(1024 * 1024)
|
||||
content_type = exc.headers.get("Content-Type", "")
|
||||
artifact_refs = _write_response_artifacts(
|
||||
context,
|
||||
exc.code,
|
||||
content_type,
|
||||
dict(exc.headers.items()),
|
||||
body,
|
||||
)
|
||||
return {
|
||||
"result": "infrastructure_error",
|
||||
"observations": [
|
||||
@@ -46,8 +65,9 @@ def run(context: dict[str, Any]) -> dict[str, Any]:
|
||||
"endpoint_found": True,
|
||||
"url": endpoint["url"],
|
||||
"http_status": exc.code,
|
||||
"content_type": content_type,
|
||||
},
|
||||
"artifact_refs": [],
|
||||
"artifact_refs": artifact_refs,
|
||||
}
|
||||
except URLError as exc:
|
||||
return {
|
||||
@@ -60,7 +80,7 @@ def run(context: dict[str, Any]) -> dict[str, Any]:
|
||||
"url": endpoint["url"],
|
||||
"error": str(exc.reason),
|
||||
},
|
||||
"artifact_refs": [],
|
||||
"artifact_refs": artifact_refs,
|
||||
}
|
||||
except TimeoutError:
|
||||
return {
|
||||
@@ -73,7 +93,7 @@ def run(context: dict[str, Any]) -> dict[str, Any]:
|
||||
"url": endpoint["url"],
|
||||
"timeout_seconds": timeout,
|
||||
},
|
||||
"artifact_refs": [],
|
||||
"artifact_refs": artifact_refs,
|
||||
}
|
||||
|
||||
facts: dict[str, Any] = {
|
||||
@@ -93,7 +113,7 @@ def run(context: dict[str, Any]) -> dict[str, Any]:
|
||||
"CMIS Browser Binding endpoint is reachable but did not return parseable JSON."
|
||||
],
|
||||
"facts": facts,
|
||||
"artifact_refs": [],
|
||||
"artifact_refs": artifact_refs,
|
||||
}
|
||||
|
||||
facts["json_detected"] = True
|
||||
@@ -104,7 +124,7 @@ def run(context: dict[str, Any]) -> dict[str, Any]:
|
||||
"CMIS Browser Binding endpoint is reachable and returned parseable JSON."
|
||||
],
|
||||
"facts": facts,
|
||||
"artifact_refs": [],
|
||||
"artifact_refs": artifact_refs,
|
||||
}
|
||||
|
||||
|
||||
@@ -159,3 +179,35 @@ def _repository_facts(value: Any) -> dict[str, Any]:
|
||||
"repository_shape": "object",
|
||||
"top_level_keys": sorted(str(key) for key in value.keys())[:20],
|
||||
}
|
||||
|
||||
|
||||
def _write_response_artifacts(
|
||||
context: dict[str, Any],
|
||||
status_code: int,
|
||||
content_type: str,
|
||||
headers: dict[str, str],
|
||||
body: bytes,
|
||||
) -> list[str]:
|
||||
run_dir = Path(context["run_dir"])
|
||||
artifact_dir = run_dir / "artifacts" / "open-cmis-tck" / "preflight"
|
||||
artifact_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
response_ref = "artifacts/open-cmis-tck/preflight/response-body.bin"
|
||||
metadata_ref = "artifacts/open-cmis-tck/preflight/response-metadata.json"
|
||||
|
||||
(run_dir / response_ref).write_bytes(body)
|
||||
(run_dir / metadata_ref).write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"status_code": status_code,
|
||||
"content_type": content_type,
|
||||
"headers": headers,
|
||||
"byte_count": len(body),
|
||||
},
|
||||
indent=2,
|
||||
sort_keys=True,
|
||||
)
|
||||
+ "\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
return [metadata_ref, response_ref]
|
||||
|
||||
@@ -107,6 +107,8 @@ Progress:
|
||||
|
||||
- The first CMIS Browser Binding preflight runner checks endpoint reachability
|
||||
and parseable JSON repository metadata through the guide-board runner bridge.
|
||||
- The preflight runner preserves raw response metadata and body artifacts for
|
||||
assessment-package fingerprinting.
|
||||
- Capability flag normalization remains to be expanded after a live target sample
|
||||
is captured.
|
||||
|
||||
|
||||
65
src/guide_board/artifacts.py
Normal file
65
src/guide_board/artifacts.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""Artifact manifest helpers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import mimetypes
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from guide_board.schema import assert_valid
|
||||
|
||||
|
||||
def build_artifact_manifest(
|
||||
run_dir: Path,
|
||||
run_id: str,
|
||||
evidence: list[dict[str, Any]],
|
||||
) -> list[dict[str, Any]]:
|
||||
artifacts: list[dict[str, Any]] = []
|
||||
seen: set[str] = set()
|
||||
for item in evidence:
|
||||
producer = item["check_id"]
|
||||
for artifact_ref in item.get("artifact_refs", []):
|
||||
if not isinstance(artifact_ref, str) or artifact_ref in seen:
|
||||
continue
|
||||
seen.add(artifact_ref)
|
||||
path = (run_dir / artifact_ref).resolve()
|
||||
try:
|
||||
path.relative_to(run_dir.resolve())
|
||||
except ValueError:
|
||||
continue
|
||||
if not path.exists() or not path.is_file():
|
||||
continue
|
||||
artifact = {
|
||||
"id": f"artifact:{_safe_id(artifact_ref)}",
|
||||
"run_id": run_id,
|
||||
"path": artifact_ref,
|
||||
"media_type": _media_type(path),
|
||||
"producer": producer,
|
||||
"checksum": f"sha256:{_sha256(path)}",
|
||||
"created_at": datetime.now(timezone.utc).isoformat(),
|
||||
"retention_class": "raw",
|
||||
}
|
||||
assert_valid(artifact, "raw-artifact")
|
||||
artifacts.append(artifact)
|
||||
return artifacts
|
||||
|
||||
|
||||
def _sha256(path: Path) -> str:
|
||||
digest = hashlib.sha256()
|
||||
with path.open("rb") as handle:
|
||||
for chunk in iter(lambda: handle.read(1024 * 1024), b""):
|
||||
digest.update(chunk)
|
||||
return digest.hexdigest()
|
||||
|
||||
|
||||
def _media_type(path: Path) -> str:
|
||||
guessed, _ = mimetypes.guess_type(path.name)
|
||||
if guessed:
|
||||
return guessed
|
||||
return "application/octet-stream"
|
||||
|
||||
|
||||
def _safe_id(value: str) -> str:
|
||||
return "".join(char if char.isalnum() or char in {"-", "_"} else "_" for char in value)
|
||||
@@ -7,6 +7,7 @@ from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from guide_board.artifacts import build_artifact_manifest
|
||||
from guide_board.io import write_json
|
||||
from guide_board.planning import build_run_plan
|
||||
from guide_board.runners import run_step
|
||||
@@ -35,7 +36,16 @@ def run_assessment(
|
||||
for finding in findings:
|
||||
assert_valid(finding, "finding")
|
||||
|
||||
assessment_package = _assessment_package(run_id, plan, evidence, findings, created_at)
|
||||
artifact_manifest = build_artifact_manifest(run_dir, run_id, evidence)
|
||||
|
||||
assessment_package = _assessment_package(
|
||||
run_id,
|
||||
plan,
|
||||
evidence,
|
||||
findings,
|
||||
artifact_manifest,
|
||||
created_at,
|
||||
)
|
||||
assert_valid(assessment_package, "assessment-package")
|
||||
|
||||
run_metadata = {
|
||||
@@ -164,6 +174,7 @@ def _assessment_package(
|
||||
plan: dict[str, Any],
|
||||
evidence: list[dict[str, Any]],
|
||||
findings: list[dict[str, Any]],
|
||||
artifact_manifest: list[dict[str, Any]],
|
||||
created_at: str,
|
||||
) -> dict[str, Any]:
|
||||
summary = dict(Counter(item["result"] for item in evidence))
|
||||
@@ -179,7 +190,7 @@ def _assessment_package(
|
||||
"summary": summary,
|
||||
"findings": findings,
|
||||
"evidence_refs": [item["id"] for item in evidence],
|
||||
"artifact_manifest": [],
|
||||
"artifact_manifest": artifact_manifest,
|
||||
"waivers": [],
|
||||
"certification_boundary": "Guide Board produces preparation evidence only and does not issue certifications or audit assurance.",
|
||||
"created_at": created_at,
|
||||
|
||||
@@ -153,13 +153,35 @@ class CoreArchitectureTests(unittest.TestCase):
|
||||
encoding="utf-8"
|
||||
)
|
||||
)
|
||||
package = json.loads(
|
||||
(Path(result["run_dir"]) / "reports" / "assessment-package.json").read_text(
|
||||
encoding="utf-8"
|
||||
)
|
||||
)
|
||||
|
||||
self.assertEqual(result["status"], "completed")
|
||||
self.assertEqual(evidence["evidence"][0]["result"], "pass")
|
||||
self.assertEqual(
|
||||
sorted(evidence["evidence"][0]["artifact_refs"]),
|
||||
[
|
||||
"artifacts/open-cmis-tck/preflight/response-body.bin",
|
||||
"artifacts/open-cmis-tck/preflight/response-metadata.json",
|
||||
],
|
||||
)
|
||||
self.assertEqual(
|
||||
evidence["evidence"][0]["facts"]["repository_ids"],
|
||||
["local-test-repository"],
|
||||
)
|
||||
self.assertEqual(len(package["artifact_manifest"]), 2)
|
||||
self.assertTrue(
|
||||
(
|
||||
Path(result["run_dir"])
|
||||
/ "artifacts"
|
||||
/ "open-cmis-tck"
|
||||
/ "preflight"
|
||||
/ "response-metadata.json"
|
||||
).exists()
|
||||
)
|
||||
finally:
|
||||
server.shutdown()
|
||||
thread.join(timeout=5)
|
||||
@@ -243,6 +265,11 @@ class CoreArchitectureTests(unittest.TestCase):
|
||||
encoding="utf-8"
|
||||
)
|
||||
)["findings"]
|
||||
package = json.loads(
|
||||
(Path(result["run_dir"]) / "reports" / "assessment-package.json").read_text(
|
||||
encoding="utf-8"
|
||||
)
|
||||
)
|
||||
|
||||
self.assertEqual(result["status"], "blocked")
|
||||
self.assertEqual(evidence[0]["result"], "pass")
|
||||
@@ -256,6 +283,7 @@ class CoreArchitectureTests(unittest.TestCase):
|
||||
findings[0]["classification"],
|
||||
evidence[1]["facts"]["blocked_reason"],
|
||||
)
|
||||
self.assertGreaterEqual(len(package["artifact_manifest"]), 3)
|
||||
finally:
|
||||
server.shutdown()
|
||||
thread.join(timeout=5)
|
||||
|
||||
@@ -184,6 +184,8 @@ Acceptance:
|
||||
of CMIS.
|
||||
- The baseline executor writes the run directory contract, normalized evidence,
|
||||
an assessment package, and a Markdown report.
|
||||
- The assessment package includes a fingerprinted artifact manifest for
|
||||
runner-emitted raw artifacts.
|
||||
|
||||
## D1.7 - Extension SDK Skeleton
|
||||
|
||||
@@ -204,6 +206,8 @@ Acceptance:
|
||||
- Python module runner contracts are documented in `docs/EXTENSION-SDK.md`.
|
||||
- Manifest-declared command runners execute without shell expansion and return
|
||||
normalized evidence through the same runner result contract.
|
||||
- Runner artifact refs are constrained to the run directory and fingerprinted in
|
||||
the assessment package artifact manifest.
|
||||
|
||||
## D1.8 - CMIS Seed Extension Integration
|
||||
|
||||
|
||||
Reference in New Issue
Block a user