feat: add expected recipient reporting

This commit is contained in:
2026-06-02 03:07:13 +02:00
parent 5ea6c738d2
commit b7591f531b
17 changed files with 629 additions and 22 deletions

View File

@@ -20,6 +20,7 @@ REPORT_COLUMNS = [
"assessment_category",
"assessment_subclass",
"affected_email_address",
"known_recipient",
"original_message_id",
"original_recipient",
"smtp_status_code",
@@ -49,16 +50,17 @@ def write_evidence_report(
scan_id: str,
mailbox_id: str,
generated_at: datetime | None = None,
expected_recipients: set[str] | None = None,
) -> Path:
generated = generated_at or datetime.now(UTC)
out_dir = Path(output_dir)
out_dir.mkdir(parents=True, exist_ok=True)
path = out_dir / report_filename(generated)
path = _unique_report_path(out_dir / report_filename(generated))
with path.open("w", newline="", encoding="utf-8") as fh:
writer = csv.DictWriter(fh, fieldnames=REPORT_COLUMNS)
writer.writeheader()
for row in rows:
for row in _ordered_rows(rows, expected_recipients=expected_recipients or set()):
writer.writerow(_report_row(row, scan_id=scan_id, mailbox_id=mailbox_id, generated_at=generated))
return path
@@ -66,6 +68,7 @@ def write_evidence_report(
def _report_row(row: dict, *, scan_id: str, mailbox_id: str, generated_at: datetime) -> dict:
metadata = _json(row.get("metadata_json"))
notes = _json(row.get("notes_json"))
known_recipient = _known_recipient(row, expected_recipients=set(row.get("_expected_recipients", [])))
return {
"report_generated_at": generated_at.isoformat(),
"scan_id": scan_id,
@@ -81,6 +84,7 @@ def _report_row(row: dict, *, scan_id: str, mailbox_id: str, generated_at: datet
"assessment_category": row.get("assessment_category", ""),
"assessment_subclass": row.get("assessment_subclass", ""),
"affected_email_address": row.get("affected_email_address") or "",
"known_recipient": "true" if known_recipient else "false",
"original_message_id": row.get("original_message_id") or "",
"original_recipient": metadata.get("original_recipient", ""),
"smtp_status_code": metadata.get("smtp_status_code") or "",
@@ -98,6 +102,29 @@ def _report_row(row: dict, *, scan_id: str, mailbox_id: str, generated_at: datet
}
def _ordered_rows(rows: list[dict], *, expected_recipients: set[str]) -> list[dict]:
enriched = [dict(row, _expected_recipients=tuple(expected_recipients)) for row in rows]
if not expected_recipients:
return enriched
return sorted(
enriched,
key=lambda row: (
not _known_recipient(row, expected_recipients=expected_recipients),
str(row.get("affected_email_address") or ""),
str(row.get("observed_at") or ""),
str(row.get("event_type") or ""),
str(row.get("deduplication_key") or ""),
),
)
def _known_recipient(row: dict, *, expected_recipients: set[str]) -> bool:
if row.get("known_recipient") is True:
return True
address = str(row.get("affected_email_address") or "").lower()
return bool(address and address in expected_recipients)
def _json(value: str | None) -> dict | list:
if not value:
return {}
@@ -105,3 +132,15 @@ def _json(value: str | None) -> dict | list:
return json.loads(value)
except json.JSONDecodeError:
return {}
def _unique_report_path(path: Path) -> Path:
if not path.exists():
return path
stem = path.stem
suffix = path.suffix
for index in range(1, 1000):
candidate = path.with_name(f"{stem}-{index:02d}{suffix}")
if not candidate.exists():
return candidate
raise RuntimeError(f"Could not allocate unique report filename for {path}")