feat: start mailbox evidence scanner

This commit is contained in:
2026-06-02 01:19:09 +02:00
parent 8292ffe41d
commit 8532583182
26 changed files with 1733 additions and 18 deletions

View File

@@ -0,0 +1,107 @@
from __future__ import annotations
import csv
import json
from datetime import UTC, datetime
from pathlib import Path
REPORT_COLUMNS = [
"report_generated_at",
"scan_id",
"mailbox_id",
"mailbox_message_id",
"mailbox_received_at",
"source_from",
"source_to",
"source_subject",
"message_id_header",
"detected_message_class",
"normalized_event_type",
"assessment_category",
"assessment_subclass",
"affected_email_address",
"original_message_id",
"original_recipient",
"smtp_status_code",
"enhanced_status_code",
"reason_code",
"confidence",
"evidence_strength",
"occurred_at",
"observed_at",
"first_seen_at",
"last_seen_at",
"deduplication_key",
"raw_message_ref",
"notes",
]
def report_filename(now: datetime | None = None) -> str:
stamp = (now or datetime.now(UTC)).strftime("%Y%m%d-%H%M%S")
return f"email-channel-evidence-report-{stamp}.csv"
def write_evidence_report(
rows: list[dict],
*,
output_dir: str | Path,
scan_id: str,
mailbox_id: str,
generated_at: datetime | None = None,
) -> Path:
generated = generated_at or datetime.now(UTC)
out_dir = Path(output_dir)
out_dir.mkdir(parents=True, exist_ok=True)
path = out_dir / report_filename(generated)
with path.open("w", newline="", encoding="utf-8") as fh:
writer = csv.DictWriter(fh, fieldnames=REPORT_COLUMNS)
writer.writeheader()
for row in rows:
writer.writerow(_report_row(row, scan_id=scan_id, mailbox_id=mailbox_id, generated_at=generated))
return path
def _report_row(row: dict, *, scan_id: str, mailbox_id: str, generated_at: datetime) -> dict:
metadata = _json(row.get("metadata_json"))
notes = _json(row.get("notes_json"))
return {
"report_generated_at": generated_at.isoformat(),
"scan_id": scan_id,
"mailbox_id": mailbox_id,
"mailbox_message_id": row.get("mailbox_message_id", ""),
"mailbox_received_at": row.get("occurred_at") or "",
"source_from": metadata.get("source_from", ""),
"source_to": metadata.get("source_to", ""),
"source_subject": metadata.get("source_subject", ""),
"message_id_header": metadata.get("message_id_header", ""),
"detected_message_class": metadata.get("message_class", ""),
"normalized_event_type": row.get("event_type", ""),
"assessment_category": row.get("assessment_category", ""),
"assessment_subclass": row.get("assessment_subclass", ""),
"affected_email_address": row.get("affected_email_address") or "",
"original_message_id": row.get("original_message_id") or "",
"original_recipient": metadata.get("original_recipient", ""),
"smtp_status_code": metadata.get("smtp_status_code") or "",
"enhanced_status_code": metadata.get("enhanced_status_code") or "",
"reason_code": metadata.get("reason_code") or "",
"confidence": row.get("confidence", ""),
"evidence_strength": row.get("evidence_strength", ""),
"occurred_at": row.get("occurred_at") or "",
"observed_at": row.get("observed_at") or "",
"first_seen_at": metadata.get("first_seen_at", ""),
"last_seen_at": metadata.get("last_seen_at", ""),
"deduplication_key": row.get("deduplication_key", ""),
"raw_message_ref": row.get("raw_message_ref") or "",
"notes": "; ".join(str(item) for item in notes),
}
def _json(value: str | None) -> dict | list:
if not value:
return {}
try:
return json.loads(value)
except json.JSONDecodeError:
return {}