feat: finish mailbox evidence scanner mvp

This commit is contained in:
2026-06-02 02:24:39 +02:00
parent 226c045397
commit 7ab1f9deb9
11 changed files with 192 additions and 18 deletions

View File

@@ -36,8 +36,8 @@ marking messages seen are intentionally unsupported in this MVP.
endpoint quality hints. endpoint quality hints.
- CSV report generation, including `--report-only-new`. - CSV report generation, including `--report-only-new`.
- Golden fixture tests for hard bounce, soft bounce, delayed delivery, final - Golden fixture tests for hard bounce, soft bounce, delayed delivery, final
failure, complaint, unsubscribe, unknown return, out-of-office, and human failure, complaint, unsubscribe, challenge-response, unknown return,
reply signals. parse-failure, out-of-office, and human reply signals.
Provider webhooks, outbound sending, suppression workflows, OAuth mailbox login, Provider webhooks, outbound sending, suppression workflows, OAuth mailbox login,
and a UI remain outside this first mailbox-scanner slice. and a UI remain outside this first mailbox-scanner slice.

View File

@@ -36,6 +36,7 @@ coordination runtime decides whether those facts satisfy a coordination case.
| `unsubscribe_or_opt_out` | `notification.channel.unsubscribe_received` | `fail.unsubscribed` | | `unsubscribe_or_opt_out` | `notification.channel.unsubscribe_received` | `fail.unsubscribed` |
| `unknown_return_message` | `notification.endpoint.unknown` | `undef.conflicting_evidence` | | `unknown_return_message` | `notification.endpoint.unknown` | `undef.conflicting_evidence` |
| `challenge_response` | `interaction.unverified_actor_interaction` | `undef.identity_uncertain` | | `challenge_response` | `interaction.unverified_actor_interaction` | `undef.identity_uncertain` |
| `parse_failed` | `diagnostic.message.parse_failed` | `undef.parse_failed` |
## Overclaim Prevention ## Overclaim Prevention
@@ -45,6 +46,7 @@ coordination runtime decides whether those facts satisfy a coordination case.
- Out-of-office does not prove recipient awareness or action. - Out-of-office does not prove recipient awareness or action.
- Human reply does not prove legal acceptance. - Human reply does not prove legal acceptance.
- Unknown return messages remain visible. - Unknown return messages remain visible.
- Parse failures are diagnostic rows, not delivery or interaction outcomes.
- Scanner and proxy interactions must stay below identity-bound interaction. - Scanner and proxy interactions must stay below identity-bound interaction.
## Endpoint Quality Hints ## Endpoint Quality Hints

View File

@@ -74,10 +74,14 @@ Examples:
- `complaint_or_abuse` -> `notification.channel.complaint_received` - `complaint_or_abuse` -> `notification.channel.complaint_received`
- `unsubscribe_or_opt_out` -> `notification.channel.unsubscribe_received` - `unsubscribe_or_opt_out` -> `notification.channel.unsubscribe_received`
- `out_of_office` -> `interaction.out_of_office_received` - `out_of_office` -> `interaction.out_of_office_received`
- `challenge_response` -> `interaction.unverified_actor_interaction`
- `human_reply` -> `interaction.reply_received` - `human_reply` -> `interaction.reply_received`
- `parse_failed` -> `diagnostic.message.parse_failed`
The mapper does not emit evidence for unrelated messages. Unknown return The mapper does not emit evidence for unrelated messages. Unknown return
messages stay visible as `notification.endpoint.unknown`. messages stay visible as `notification.endpoint.unknown`. Parse failures are
visible as diagnostics without claiming delivery, interaction, identity, or
endpoint quality.
## coordination-engine Alignment ## coordination-engine Alignment

View File

@@ -12,6 +12,7 @@ EMITTED_EVENT_TYPES = [
"interaction.reply_received", "interaction.reply_received",
"interaction.out_of_office_received", "interaction.out_of_office_received",
"notification.endpoint.unknown", "notification.endpoint.unknown",
"diagnostic.message.parse_failed",
] ]

View File

@@ -96,7 +96,13 @@ EVIDENCE_MAPPINGS: dict[MessageClass, EvidenceMapping | None] = {
"Challenge-response or automated interaction was observed.", "Challenge-response or automated interaction was observed.",
), ),
MessageClass.UNRELATED_MESSAGE: None, MessageClass.UNRELATED_MESSAGE: None,
MessageClass.PARSE_FAILED: None, MessageClass.PARSE_FAILED: EvidenceMapping(
"diagnostic.message.parse_failed",
AssessmentCategory.UNDEF,
"undef.parse_failed",
EvidenceStrength.NONE,
"Message source could not be parsed reliably; no delivery or interaction claim is made.",
),
} }

View File

@@ -1,6 +1,7 @@
from __future__ import annotations from __future__ import annotations
import hashlib import hashlib
import html
import re import re
from datetime import UTC, datetime from datetime import UTC, datetime
from email import policy from email import policy
@@ -34,7 +35,25 @@ def parse_message_bytes(
now: datetime | None = None, now: datetime | None = None,
) -> tuple[InboundMailboxMessage, ParsedMailboxMessage, EmailEvidenceCandidate | None]: ) -> tuple[InboundMailboxMessage, ParsedMailboxMessage, EmailEvidenceCandidate | None]:
observed_at = now or datetime.now(UTC) observed_at = now or datetime.now(UTC)
msg = BytesParser(policy=policy.default).parsebytes(raw_bytes) if not raw_bytes.strip():
return _parse_failed(
mailbox_id=mailbox_id,
raw_message_ref=raw_message_ref,
imap_uid=imap_uid,
observed_at=observed_at,
reason="empty_message",
)
try:
msg = BytesParser(policy=policy.default).parsebytes(raw_bytes)
except Exception as exc:
return _parse_failed(
mailbox_id=mailbox_id,
raw_message_ref=raw_message_ref,
imap_uid=imap_uid,
observed_at=observed_at,
reason=f"parser_error:{type(exc).__name__}",
)
message_id = _clean_header(msg.get("Message-ID")) message_id = _clean_header(msg.get("Message-ID"))
subject = _clean_header(msg.get("Subject")) subject = _clean_header(msg.get("Subject"))
@@ -104,8 +123,12 @@ def classify_message(
text = combined_text.lower() text = combined_text.lower()
enhanced_status = _first_match(ENHANCED_STATUS_RE, combined_text) enhanced_status = _first_match(ENHANCED_STATUS_RE, combined_text)
smtp_status = _first_match(SMTP_STATUS_RE, combined_text) smtp_status = _first_match(SMTP_STATUS_RE, combined_text)
affected = _extract_affected_recipient(combined_text) dsn_fields = _extract_dsn_fields(combined_text)
affected = dsn_fields.get("final_recipient_email") or dsn_fields.get("original_recipient_email")
if affected is None:
affected = _extract_affected_recipient(combined_text)
original_message_id = _extract_headerish(combined_text, "Original-Message-ID") original_message_id = _extract_headerish(combined_text, "Original-Message-ID")
original_recipient = dsn_fields.get("original_recipient_email") or affected
notes: list[str] = [] notes: list[str] = []
message_class = MessageClass.UNRELATED_MESSAGE message_class = MessageClass.UNRELATED_MESSAGE
@@ -124,6 +147,10 @@ def classify_message(
message_class = MessageClass.OUT_OF_OFFICE message_class = MessageClass.OUT_OF_OFFICE
confidence = Confidence.MEDIUM confidence = Confidence.MEDIUM
reason_code = "auto_reply" reason_code = "auto_reply"
elif _is_challenge_response(text):
message_class = MessageClass.CHALLENGE_RESPONSE
confidence = Confidence.MEDIUM
reason_code = "challenge_response"
elif _contains_any(text, ["will keep trying", "delivery delayed", "message delayed", "not yet delivered"]): elif _contains_any(text, ["will keep trying", "delivery delayed", "message delayed", "not yet delivered"]):
message_class = MessageClass.DELAYED_DELIVERY_NOTICE message_class = MessageClass.DELAYED_DELIVERY_NOTICE
confidence = Confidence.HIGH confidence = Confidence.HIGH
@@ -140,6 +167,9 @@ def classify_message(
reason_code = "unknown_return" reason_code = "unknown_return"
notes.append("Return-channel message did not match a reliable classifier.") notes.append("Return-channel message did not match a reliable classifier.")
for key in ["original_recipient", "final_recipient", "action", "diagnostic_code", "remote_mta"]:
if dsn_fields.get(key):
notes.append(f"{key}={dsn_fields[key]}")
if enhanced_status: if enhanced_status:
notes.append(f"enhanced_status={enhanced_status}") notes.append(f"enhanced_status={enhanced_status}")
if smtp_status: if smtp_status:
@@ -153,7 +183,7 @@ def classify_message(
message_class=message_class, message_class=message_class,
affected_email_address=affected, affected_email_address=affected,
original_message_id=original_message_id, original_message_id=original_message_id,
original_recipient=affected, original_recipient=original_recipient,
smtp_status_code=smtp_status, smtp_status_code=smtp_status,
enhanced_status_code=enhanced_status, enhanced_status_code=enhanced_status,
reason_code=reason_code, reason_code=reason_code,
@@ -183,6 +213,7 @@ def _classify_dsn(
def _extract_text(msg) -> str: def _extract_text(msg) -> str:
chunks: list[str] = [] chunks: list[str] = []
html_chunks: list[str] = []
if msg.is_multipart(): if msg.is_multipart():
for part in msg.walk(): for part in msg.walk():
if part.get_content_maintype() == "multipart": if part.get_content_maintype() == "multipart":
@@ -192,14 +223,75 @@ def _extract_text(msg) -> str:
chunks.append(str(part.get_content())) chunks.append(str(part.get_content()))
except Exception: except Exception:
continue continue
elif part.get_content_type() == "text/html":
try:
html_chunks.append(_html_to_text(str(part.get_content())))
except Exception:
continue
else: else:
try: try:
chunks.append(str(msg.get_content())) content = str(msg.get_content())
if msg.get_content_type() == "text/html":
html_chunks.append(_html_to_text(content))
else:
chunks.append(content)
except Exception: except Exception:
payload = msg.get_payload(decode=True) payload = msg.get_payload(decode=True)
if payload: if payload:
chunks.append(payload.decode(errors="replace")) chunks.append(payload.decode(errors="replace"))
return "\n".join(chunks) if chunks:
return "\n".join(chunks)
return "\n".join(html_chunks)
def _parse_failed(
*,
mailbox_id: str,
raw_message_ref: str | None,
imap_uid: str | None,
observed_at: datetime,
reason: str,
) -> tuple[InboundMailboxMessage, ParsedMailboxMessage, EmailEvidenceCandidate | None]:
dedup_key = "|".join([mailbox_id, imap_uid or "", raw_message_ref or "", reason])
mailbox_message_id = str(uuid5(NAMESPACE_URL, "email-connect:message:" + dedup_key))
inbound = InboundMailboxMessage(
mailbox_message_id=mailbox_message_id,
mailbox_id=mailbox_id,
imap_uid=imap_uid,
message_id_header=None,
received_at=None,
from_address=None,
to_addresses=[],
subject=None,
raw_headers_ref=raw_message_ref,
raw_message_ref=raw_message_ref,
first_seen_at=observed_at,
last_seen_at=observed_at,
deduplication_key=dedup_key,
)
parsed_id_basis = "|".join([mailbox_message_id, PARSER_VERSION, "parse_failed"])
parsed = ParsedMailboxMessage(
parsed_message_id=str(uuid5(NAMESPACE_URL, "email-connect:parsed:" + parsed_id_basis)),
mailbox_message_id=mailbox_message_id,
parser_version=PARSER_VERSION,
message_class=MessageClass.PARSE_FAILED,
affected_email_address=None,
original_message_id=None,
original_recipient=None,
smtp_status_code=None,
enhanced_status_code=None,
reason_code=reason,
confidence=Confidence.HIGH,
parsed_at=observed_at,
notes=[f"parse_failure={reason}"],
)
candidate = candidate_from_parsed(
parsed,
raw_message_ref=raw_message_ref,
observed_at=observed_at,
occurred_at=None,
)
return inbound, parsed, candidate
def _message_dedup_key( def _message_dedup_key(
@@ -262,6 +354,25 @@ def _extract_headerish(text: str, name: str) -> str | None:
return match.group(1).strip() if match else None return match.group(1).strip() if match else None
def _extract_dsn_fields(text: str) -> dict[str, str]:
fields: dict[str, str] = {}
for field, key in [
("Original-Recipient", "original_recipient"),
("Final-Recipient", "final_recipient"),
("Action", "action"),
("Diagnostic-Code", "diagnostic_code"),
("Remote-MTA", "remote_mta"),
]:
value = _extract_headerish(text, field)
if value:
fields[key] = value
if field in {"Original-Recipient", "Final-Recipient"}:
match = EMAIL_RE.search(value)
if match:
fields[f"{key}_email"] = match.group(0).lower()
return fields
def _extract_affected_recipient(text: str) -> str | None: def _extract_affected_recipient(text: str) -> str | None:
for name in ["Final-Recipient", "Original-Recipient", "X-Failed-Recipients", "Failed-Recipient"]: for name in ["Final-Recipient", "Original-Recipient", "X-Failed-Recipients", "Failed-Recipient"]:
value = _extract_headerish(text, name) value = _extract_headerish(text, name)
@@ -309,6 +420,21 @@ def _is_out_of_office(text: str) -> bool:
) )
def _is_challenge_response(text: str) -> bool:
return _contains_any(
text,
[
"challenge-response",
"challenge response",
"sender verification",
"verify your email before your message can be delivered",
"confirm you are a real person",
"confirm that you sent this message",
"please verify yourself",
],
)
def _looks_like_human_reply(inbound: InboundMailboxMessage, text: str) -> bool: def _looks_like_human_reply(inbound: InboundMailboxMessage, text: str) -> bool:
subject = (inbound.subject or "").lower() subject = (inbound.subject or "").lower()
if _contains_any(text, ["auto-submitted: auto-replied", "x-autoreply", "auto-generated"]): if _contains_any(text, ["auto-submitted: auto-replied", "x-autoreply", "auto-generated"]):
@@ -318,3 +444,8 @@ def _looks_like_human_reply(inbound: InboundMailboxMessage, text: str) -> bool:
def _looks_return_related(text: str) -> bool: def _looks_return_related(text: str) -> bool:
return _contains_any(text, ["delivery", "mailbox", "recipient", "message", "smtp", "unsubscribe", "reply"]) return _contains_any(text, ["delivery", "mailbox", "recipient", "message", "smtp", "unsubscribe", "reply"])
def _html_to_text(value: str) -> str:
without_tags = re.sub(r"<[^>]+>", " ", value)
return re.sub(r"\s+", " ", html.unescape(without_tags)).strip()

View File

@@ -0,0 +1,9 @@
From: Sender Verification <verify@example.net>
To: sender@example.com
Subject: Sender verification required
Date: Tue, 02 Jun 2026 10:09:00 +0000
Message-ID: <challenge-response@example.net>
Content-Type: text/plain; charset=utf-8
This is a challenge-response message. Please verify yourself before your message
can be delivered to challenge@example.com.

View File

@@ -0,0 +1 @@

View File

@@ -72,6 +72,26 @@ class ParserTests(unittest.TestCase):
self.assertIsNotNone(candidate) self.assertIsNotNone(candidate)
self.assertEqual(candidate.event_type, "notification.endpoint.unknown") self.assertEqual(candidate.event_type, "notification.endpoint.unknown")
def test_challenge_response_stays_identity_uncertain(self) -> None:
_inbound, parsed, candidate = parse_message_file(FIXTURES / "challenge_response.eml", mailbox_id="test")
self.assertEqual(parsed.message_class, MessageClass.CHALLENGE_RESPONSE)
self.assertIsNotNone(candidate)
self.assertEqual(candidate.event_type, "interaction.unverified_actor_interaction")
self.assertEqual(candidate.assessment_subclass, "undef.identity_uncertain")
def test_parse_failure_is_reportable_diagnostic(self) -> None:
_inbound, parsed, candidate = parse_message_file(FIXTURES / "parse_failed.eml", mailbox_id="test")
self.assertEqual(parsed.message_class, MessageClass.PARSE_FAILED)
self.assertIsNotNone(candidate)
self.assertEqual(candidate.event_type, "diagnostic.message.parse_failed")
self.assertEqual(candidate.assessment_subclass, "undef.parse_failed")
def test_dsn_detail_fields_are_preserved_as_notes(self) -> None:
_inbound, parsed, _candidate = parse_message_file(FIXTURES / "hard_bounce.eml", mailbox_id="test")
self.assertIn("final_recipient=rfc822; missing@example.com", parsed.notes)
self.assertIn("action=failed", parsed.notes)
self.assertIn("diagnostic_code=smtp; 550 5.1.1 User unknown", parsed.notes)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

View File

@@ -28,13 +28,13 @@ class ScannerTests(unittest.TestCase):
second = scan_mailbox(config) second = scan_mailbox(config)
full = scan_mailbox(config, full_rescan=True, report_only_new=True) full = scan_mailbox(config, full_rescan=True, report_only_new=True)
self.assertEqual(first.scan.messages_seen, 9) self.assertEqual(first.scan.messages_seen, 11)
self.assertEqual(first.scan.messages_new, 9) self.assertEqual(first.scan.messages_new, 11)
self.assertGreaterEqual(first.scan.evidence_events_created, 9) self.assertGreaterEqual(first.scan.evidence_events_created, 11)
self.assertEqual(second.scan.messages_seen, 0) self.assertEqual(second.scan.messages_seen, 0)
self.assertEqual(second.scan.messages_new, 0) self.assertEqual(second.scan.messages_new, 0)
self.assertEqual(second.scan.evidence_events_created, 0) self.assertEqual(second.scan.evidence_events_created, 0)
self.assertEqual(full.scan.messages_seen, 9) self.assertEqual(full.scan.messages_seen, 11)
self.assertEqual(full.scan.messages_new, 0) self.assertEqual(full.scan.messages_new, 0)
self.assertEqual(full.scan.evidence_events_created, 0) self.assertEqual(full.scan.evidence_events_created, 0)
self.assertTrue(first.report_path and first.report_path.exists()) self.assertTrue(first.report_path and first.report_path.exists())

View File

@@ -4,7 +4,7 @@ type: workplan
title: "MVP Mailbox Evidence Scanner" title: "MVP Mailbox Evidence Scanner"
domain: custodian domain: custodian
repo: email-connect repo: email-connect
status: active status: finished
owner: codex owner: codex
topic_slug: custodian topic_slug: custodian
created: "2026-06-02" created: "2026-06-02"
@@ -802,7 +802,7 @@ Scanner extracts basic metadata and text from representative bounce and reply me
```task ```task
id: EMAIL-WP-0002-T05 id: EMAIL-WP-0002-T05
status: progress status: done
priority: high priority: high
state_hub_task_id: "8ea826d1-0add-4573-9bb4-2b73adefba55" state_hub_task_id: "8ea826d1-0add-4573-9bb4-2b73adefba55"
``` ```
@@ -831,7 +831,7 @@ Representative hard and soft bounce samples are classified correctly.
```task ```task
id: EMAIL-WP-0002-T06 id: EMAIL-WP-0002-T06
status: progress status: done
priority: high priority: high
state_hub_task_id: "4d94a332-173b-4787-8fb2-27aa63db6a8d" state_hub_task_id: "4d94a332-173b-4787-8fb2-27aa63db6a8d"
``` ```
@@ -932,7 +932,7 @@ Complaint/unsubscribe updates suppression state.
```task ```task
id: EMAIL-WP-0002-T10 id: EMAIL-WP-0002-T10
status: progress status: done
priority: medium priority: medium
state_hub_task_id: "5ab35176-d6c2-4c73-b7b3-bde4c097e3ee" state_hub_task_id: "5ab35176-d6c2-4c73-b7b3-bde4c097e3ee"
``` ```
@@ -959,7 +959,7 @@ Report can be opened in spreadsheet tools.
```task ```task
id: EMAIL-WP-0002-T11 id: EMAIL-WP-0002-T11
status: progress status: done
priority: high priority: high
state_hub_task_id: "514fa099-781b-4590-aae4-c28970413b3f" state_hub_task_id: "514fa099-781b-4590-aae4-c28970413b3f"
``` ```