From 7ab1f9deb92e30c13c152aece04d3b79d128fe89 Mon Sep 17 00:00:00 2001 From: tegwick Date: Tue, 2 Jun 2026 02:24:39 +0200 Subject: [PATCH] feat: finish mailbox evidence scanner mvp --- README.md | 4 +- docs/email-evidence-canon.md | 2 + docs/initial-runtime-architecture.md | 6 +- src/email_connect/adapter_contract.py | 1 + src/email_connect/evidence.py | 8 +- src/email_connect/parser.py | 141 +++++++++++++++++- tests/fixtures/mailbox/challenge_response.eml | 9 ++ tests/fixtures/mailbox/parse_failed.eml | 1 + tests/test_parser.py | 20 +++ tests/test_scanner.py | 8 +- ...IL-WP-0002-mvp-mailbox-evidence-scanner.md | 10 +- 11 files changed, 192 insertions(+), 18 deletions(-) create mode 100644 tests/fixtures/mailbox/challenge_response.eml create mode 100644 tests/fixtures/mailbox/parse_failed.eml diff --git a/README.md b/README.md index 3a732b0..0892b66 100644 --- a/README.md +++ b/README.md @@ -36,8 +36,8 @@ marking messages seen are intentionally unsupported in this MVP. endpoint quality hints. - CSV report generation, including `--report-only-new`. - Golden fixture tests for hard bounce, soft bounce, delayed delivery, final - failure, complaint, unsubscribe, unknown return, out-of-office, and human - reply signals. + failure, complaint, unsubscribe, challenge-response, unknown return, + parse-failure, out-of-office, and human reply signals. Provider webhooks, outbound sending, suppression workflows, OAuth mailbox login, and a UI remain outside this first mailbox-scanner slice. diff --git a/docs/email-evidence-canon.md b/docs/email-evidence-canon.md index bd784fc..a6cfd66 100644 --- a/docs/email-evidence-canon.md +++ b/docs/email-evidence-canon.md @@ -36,6 +36,7 @@ coordination runtime decides whether those facts satisfy a coordination case. | `unsubscribe_or_opt_out` | `notification.channel.unsubscribe_received` | `fail.unsubscribed` | | `unknown_return_message` | `notification.endpoint.unknown` | `undef.conflicting_evidence` | | `challenge_response` | `interaction.unverified_actor_interaction` | `undef.identity_uncertain` | +| `parse_failed` | `diagnostic.message.parse_failed` | `undef.parse_failed` | ## Overclaim Prevention @@ -45,6 +46,7 @@ coordination runtime decides whether those facts satisfy a coordination case. - Out-of-office does not prove recipient awareness or action. - Human reply does not prove legal acceptance. - Unknown return messages remain visible. +- Parse failures are diagnostic rows, not delivery or interaction outcomes. - Scanner and proxy interactions must stay below identity-bound interaction. ## Endpoint Quality Hints diff --git a/docs/initial-runtime-architecture.md b/docs/initial-runtime-architecture.md index b42d765..3060e47 100644 --- a/docs/initial-runtime-architecture.md +++ b/docs/initial-runtime-architecture.md @@ -74,10 +74,14 @@ Examples: - `complaint_or_abuse` -> `notification.channel.complaint_received` - `unsubscribe_or_opt_out` -> `notification.channel.unsubscribe_received` - `out_of_office` -> `interaction.out_of_office_received` +- `challenge_response` -> `interaction.unverified_actor_interaction` - `human_reply` -> `interaction.reply_received` +- `parse_failed` -> `diagnostic.message.parse_failed` The mapper does not emit evidence for unrelated messages. Unknown return -messages stay visible as `notification.endpoint.unknown`. +messages stay visible as `notification.endpoint.unknown`. Parse failures are +visible as diagnostics without claiming delivery, interaction, identity, or +endpoint quality. ## coordination-engine Alignment diff --git a/src/email_connect/adapter_contract.py b/src/email_connect/adapter_contract.py index f2ba3ed..959be95 100644 --- a/src/email_connect/adapter_contract.py +++ b/src/email_connect/adapter_contract.py @@ -12,6 +12,7 @@ EMITTED_EVENT_TYPES = [ "interaction.reply_received", "interaction.out_of_office_received", "notification.endpoint.unknown", + "diagnostic.message.parse_failed", ] diff --git a/src/email_connect/evidence.py b/src/email_connect/evidence.py index fa17ee7..baad6f9 100644 --- a/src/email_connect/evidence.py +++ b/src/email_connect/evidence.py @@ -96,7 +96,13 @@ EVIDENCE_MAPPINGS: dict[MessageClass, EvidenceMapping | None] = { "Challenge-response or automated interaction was observed.", ), MessageClass.UNRELATED_MESSAGE: None, - MessageClass.PARSE_FAILED: None, + MessageClass.PARSE_FAILED: EvidenceMapping( + "diagnostic.message.parse_failed", + AssessmentCategory.UNDEF, + "undef.parse_failed", + EvidenceStrength.NONE, + "Message source could not be parsed reliably; no delivery or interaction claim is made.", + ), } diff --git a/src/email_connect/parser.py b/src/email_connect/parser.py index 87881a4..e5fba69 100644 --- a/src/email_connect/parser.py +++ b/src/email_connect/parser.py @@ -1,6 +1,7 @@ from __future__ import annotations import hashlib +import html import re from datetime import UTC, datetime from email import policy @@ -34,7 +35,25 @@ def parse_message_bytes( now: datetime | None = None, ) -> tuple[InboundMailboxMessage, ParsedMailboxMessage, EmailEvidenceCandidate | None]: observed_at = now or datetime.now(UTC) - msg = BytesParser(policy=policy.default).parsebytes(raw_bytes) + if not raw_bytes.strip(): + return _parse_failed( + mailbox_id=mailbox_id, + raw_message_ref=raw_message_ref, + imap_uid=imap_uid, + observed_at=observed_at, + reason="empty_message", + ) + + try: + msg = BytesParser(policy=policy.default).parsebytes(raw_bytes) + except Exception as exc: + return _parse_failed( + mailbox_id=mailbox_id, + raw_message_ref=raw_message_ref, + imap_uid=imap_uid, + observed_at=observed_at, + reason=f"parser_error:{type(exc).__name__}", + ) message_id = _clean_header(msg.get("Message-ID")) subject = _clean_header(msg.get("Subject")) @@ -104,8 +123,12 @@ def classify_message( text = combined_text.lower() enhanced_status = _first_match(ENHANCED_STATUS_RE, combined_text) smtp_status = _first_match(SMTP_STATUS_RE, combined_text) - affected = _extract_affected_recipient(combined_text) + dsn_fields = _extract_dsn_fields(combined_text) + affected = dsn_fields.get("final_recipient_email") or dsn_fields.get("original_recipient_email") + if affected is None: + affected = _extract_affected_recipient(combined_text) original_message_id = _extract_headerish(combined_text, "Original-Message-ID") + original_recipient = dsn_fields.get("original_recipient_email") or affected notes: list[str] = [] message_class = MessageClass.UNRELATED_MESSAGE @@ -124,6 +147,10 @@ def classify_message( message_class = MessageClass.OUT_OF_OFFICE confidence = Confidence.MEDIUM reason_code = "auto_reply" + elif _is_challenge_response(text): + message_class = MessageClass.CHALLENGE_RESPONSE + confidence = Confidence.MEDIUM + reason_code = "challenge_response" elif _contains_any(text, ["will keep trying", "delivery delayed", "message delayed", "not yet delivered"]): message_class = MessageClass.DELAYED_DELIVERY_NOTICE confidence = Confidence.HIGH @@ -140,6 +167,9 @@ def classify_message( reason_code = "unknown_return" notes.append("Return-channel message did not match a reliable classifier.") + for key in ["original_recipient", "final_recipient", "action", "diagnostic_code", "remote_mta"]: + if dsn_fields.get(key): + notes.append(f"{key}={dsn_fields[key]}") if enhanced_status: notes.append(f"enhanced_status={enhanced_status}") if smtp_status: @@ -153,7 +183,7 @@ def classify_message( message_class=message_class, affected_email_address=affected, original_message_id=original_message_id, - original_recipient=affected, + original_recipient=original_recipient, smtp_status_code=smtp_status, enhanced_status_code=enhanced_status, reason_code=reason_code, @@ -183,6 +213,7 @@ def _classify_dsn( def _extract_text(msg) -> str: chunks: list[str] = [] + html_chunks: list[str] = [] if msg.is_multipart(): for part in msg.walk(): if part.get_content_maintype() == "multipart": @@ -192,14 +223,75 @@ def _extract_text(msg) -> str: chunks.append(str(part.get_content())) except Exception: continue + elif part.get_content_type() == "text/html": + try: + html_chunks.append(_html_to_text(str(part.get_content()))) + except Exception: + continue else: try: - chunks.append(str(msg.get_content())) + content = str(msg.get_content()) + if msg.get_content_type() == "text/html": + html_chunks.append(_html_to_text(content)) + else: + chunks.append(content) except Exception: payload = msg.get_payload(decode=True) if payload: chunks.append(payload.decode(errors="replace")) - return "\n".join(chunks) + if chunks: + return "\n".join(chunks) + return "\n".join(html_chunks) + + +def _parse_failed( + *, + mailbox_id: str, + raw_message_ref: str | None, + imap_uid: str | None, + observed_at: datetime, + reason: str, +) -> tuple[InboundMailboxMessage, ParsedMailboxMessage, EmailEvidenceCandidate | None]: + dedup_key = "|".join([mailbox_id, imap_uid or "", raw_message_ref or "", reason]) + mailbox_message_id = str(uuid5(NAMESPACE_URL, "email-connect:message:" + dedup_key)) + inbound = InboundMailboxMessage( + mailbox_message_id=mailbox_message_id, + mailbox_id=mailbox_id, + imap_uid=imap_uid, + message_id_header=None, + received_at=None, + from_address=None, + to_addresses=[], + subject=None, + raw_headers_ref=raw_message_ref, + raw_message_ref=raw_message_ref, + first_seen_at=observed_at, + last_seen_at=observed_at, + deduplication_key=dedup_key, + ) + parsed_id_basis = "|".join([mailbox_message_id, PARSER_VERSION, "parse_failed"]) + parsed = ParsedMailboxMessage( + parsed_message_id=str(uuid5(NAMESPACE_URL, "email-connect:parsed:" + parsed_id_basis)), + mailbox_message_id=mailbox_message_id, + parser_version=PARSER_VERSION, + message_class=MessageClass.PARSE_FAILED, + affected_email_address=None, + original_message_id=None, + original_recipient=None, + smtp_status_code=None, + enhanced_status_code=None, + reason_code=reason, + confidence=Confidence.HIGH, + parsed_at=observed_at, + notes=[f"parse_failure={reason}"], + ) + candidate = candidate_from_parsed( + parsed, + raw_message_ref=raw_message_ref, + observed_at=observed_at, + occurred_at=None, + ) + return inbound, parsed, candidate def _message_dedup_key( @@ -262,6 +354,25 @@ def _extract_headerish(text: str, name: str) -> str | None: return match.group(1).strip() if match else None +def _extract_dsn_fields(text: str) -> dict[str, str]: + fields: dict[str, str] = {} + for field, key in [ + ("Original-Recipient", "original_recipient"), + ("Final-Recipient", "final_recipient"), + ("Action", "action"), + ("Diagnostic-Code", "diagnostic_code"), + ("Remote-MTA", "remote_mta"), + ]: + value = _extract_headerish(text, field) + if value: + fields[key] = value + if field in {"Original-Recipient", "Final-Recipient"}: + match = EMAIL_RE.search(value) + if match: + fields[f"{key}_email"] = match.group(0).lower() + return fields + + def _extract_affected_recipient(text: str) -> str | None: for name in ["Final-Recipient", "Original-Recipient", "X-Failed-Recipients", "Failed-Recipient"]: value = _extract_headerish(text, name) @@ -309,6 +420,21 @@ def _is_out_of_office(text: str) -> bool: ) +def _is_challenge_response(text: str) -> bool: + return _contains_any( + text, + [ + "challenge-response", + "challenge response", + "sender verification", + "verify your email before your message can be delivered", + "confirm you are a real person", + "confirm that you sent this message", + "please verify yourself", + ], + ) + + def _looks_like_human_reply(inbound: InboundMailboxMessage, text: str) -> bool: subject = (inbound.subject or "").lower() if _contains_any(text, ["auto-submitted: auto-replied", "x-autoreply", "auto-generated"]): @@ -318,3 +444,8 @@ def _looks_like_human_reply(inbound: InboundMailboxMessage, text: str) -> bool: def _looks_return_related(text: str) -> bool: return _contains_any(text, ["delivery", "mailbox", "recipient", "message", "smtp", "unsubscribe", "reply"]) + + +def _html_to_text(value: str) -> str: + without_tags = re.sub(r"<[^>]+>", " ", value) + return re.sub(r"\s+", " ", html.unescape(without_tags)).strip() diff --git a/tests/fixtures/mailbox/challenge_response.eml b/tests/fixtures/mailbox/challenge_response.eml new file mode 100644 index 0000000..2db0cc3 --- /dev/null +++ b/tests/fixtures/mailbox/challenge_response.eml @@ -0,0 +1,9 @@ +From: Sender Verification +To: sender@example.com +Subject: Sender verification required +Date: Tue, 02 Jun 2026 10:09:00 +0000 +Message-ID: +Content-Type: text/plain; charset=utf-8 + +This is a challenge-response message. Please verify yourself before your message +can be delivered to challenge@example.com. diff --git a/tests/fixtures/mailbox/parse_failed.eml b/tests/fixtures/mailbox/parse_failed.eml new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tests/fixtures/mailbox/parse_failed.eml @@ -0,0 +1 @@ + diff --git a/tests/test_parser.py b/tests/test_parser.py index a4ecb26..3cfcbd2 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -72,6 +72,26 @@ class ParserTests(unittest.TestCase): self.assertIsNotNone(candidate) self.assertEqual(candidate.event_type, "notification.endpoint.unknown") + def test_challenge_response_stays_identity_uncertain(self) -> None: + _inbound, parsed, candidate = parse_message_file(FIXTURES / "challenge_response.eml", mailbox_id="test") + self.assertEqual(parsed.message_class, MessageClass.CHALLENGE_RESPONSE) + self.assertIsNotNone(candidate) + self.assertEqual(candidate.event_type, "interaction.unverified_actor_interaction") + self.assertEqual(candidate.assessment_subclass, "undef.identity_uncertain") + + def test_parse_failure_is_reportable_diagnostic(self) -> None: + _inbound, parsed, candidate = parse_message_file(FIXTURES / "parse_failed.eml", mailbox_id="test") + self.assertEqual(parsed.message_class, MessageClass.PARSE_FAILED) + self.assertIsNotNone(candidate) + self.assertEqual(candidate.event_type, "diagnostic.message.parse_failed") + self.assertEqual(candidate.assessment_subclass, "undef.parse_failed") + + def test_dsn_detail_fields_are_preserved_as_notes(self) -> None: + _inbound, parsed, _candidate = parse_message_file(FIXTURES / "hard_bounce.eml", mailbox_id="test") + self.assertIn("final_recipient=rfc822; missing@example.com", parsed.notes) + self.assertIn("action=failed", parsed.notes) + self.assertIn("diagnostic_code=smtp; 550 5.1.1 User unknown", parsed.notes) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_scanner.py b/tests/test_scanner.py index 3753bd9..e484e56 100644 --- a/tests/test_scanner.py +++ b/tests/test_scanner.py @@ -28,13 +28,13 @@ class ScannerTests(unittest.TestCase): second = scan_mailbox(config) full = scan_mailbox(config, full_rescan=True, report_only_new=True) - self.assertEqual(first.scan.messages_seen, 9) - self.assertEqual(first.scan.messages_new, 9) - self.assertGreaterEqual(first.scan.evidence_events_created, 9) + self.assertEqual(first.scan.messages_seen, 11) + self.assertEqual(first.scan.messages_new, 11) + self.assertGreaterEqual(first.scan.evidence_events_created, 11) self.assertEqual(second.scan.messages_seen, 0) self.assertEqual(second.scan.messages_new, 0) self.assertEqual(second.scan.evidence_events_created, 0) - self.assertEqual(full.scan.messages_seen, 9) + self.assertEqual(full.scan.messages_seen, 11) self.assertEqual(full.scan.messages_new, 0) self.assertEqual(full.scan.evidence_events_created, 0) self.assertTrue(first.report_path and first.report_path.exists()) diff --git a/workplans/EMAIL-WP-0002-mvp-mailbox-evidence-scanner.md b/workplans/EMAIL-WP-0002-mvp-mailbox-evidence-scanner.md index 246473f..7b802ca 100644 --- a/workplans/EMAIL-WP-0002-mvp-mailbox-evidence-scanner.md +++ b/workplans/EMAIL-WP-0002-mvp-mailbox-evidence-scanner.md @@ -4,7 +4,7 @@ type: workplan title: "MVP Mailbox Evidence Scanner" domain: custodian repo: email-connect -status: active +status: finished owner: codex topic_slug: custodian created: "2026-06-02" @@ -802,7 +802,7 @@ Scanner extracts basic metadata and text from representative bounce and reply me ```task id: EMAIL-WP-0002-T05 -status: progress +status: done priority: high state_hub_task_id: "8ea826d1-0add-4573-9bb4-2b73adefba55" ``` @@ -831,7 +831,7 @@ Representative hard and soft bounce samples are classified correctly. ```task id: EMAIL-WP-0002-T06 -status: progress +status: done priority: high state_hub_task_id: "4d94a332-173b-4787-8fb2-27aa63db6a8d" ``` @@ -932,7 +932,7 @@ Complaint/unsubscribe updates suppression state. ```task id: EMAIL-WP-0002-T10 -status: progress +status: done priority: medium state_hub_task_id: "5ab35176-d6c2-4c73-b7b3-bde4c097e3ee" ``` @@ -959,7 +959,7 @@ Report can be opened in spreadsheet tools. ```task id: EMAIL-WP-0002-T11 -status: progress +status: done priority: high state_hub_task_id: "514fa099-781b-4590-aae4-c28970413b3f" ```