From 7ab1f9deb92e30c13c152aece04d3b79d128fe89 Mon Sep 17 00:00:00 2001
From: tegwick <bernd.worsch@gmail.com>
Date: Tue, 2 Jun 2026 02:24:39 +0200
Subject: [PATCH] feat: finish mailbox evidence scanner mvp

---
 README.md                                     |   4 +-
 docs/email-evidence-canon.md                  |   2 +
 docs/initial-runtime-architecture.md          |   6 +-
 src/email_connect/adapter_contract.py         |   1 +
 src/email_connect/evidence.py                 |   8 +-
 src/email_connect/parser.py                   | 141 +++++++++++++++++-
 tests/fixtures/mailbox/challenge_response.eml |   9 ++
 tests/fixtures/mailbox/parse_failed.eml       |   1 +
 tests/test_parser.py                          |  20 +++
 tests/test_scanner.py                         |   8 +-
 ...IL-WP-0002-mvp-mailbox-evidence-scanner.md |  10 +-
 11 files changed, 192 insertions(+), 18 deletions(-)
 create mode 100644 tests/fixtures/mailbox/challenge_response.eml
 create mode 100644 tests/fixtures/mailbox/parse_failed.eml

diff --git a/README.md b/README.md
index 3a732b0..0892b66 100644
--- a/README.md
+++ b/README.md
@@ -36,8 +36,8 @@ marking messages seen are intentionally unsupported in this MVP.
   endpoint quality hints.
 - CSV report generation, including `--report-only-new`.
 - Golden fixture tests for hard bounce, soft bounce, delayed delivery, final
-  failure, complaint, unsubscribe, unknown return, out-of-office, and human
-  reply signals.
+  failure, complaint, unsubscribe, challenge-response, unknown return,
+  parse-failure, out-of-office, and human reply signals.
 
 Provider webhooks, outbound sending, suppression workflows, OAuth mailbox login,
 and a UI remain outside this first mailbox-scanner slice.
diff --git a/docs/email-evidence-canon.md b/docs/email-evidence-canon.md
index bd784fc..a6cfd66 100644
--- a/docs/email-evidence-canon.md
+++ b/docs/email-evidence-canon.md
@@ -36,6 +36,7 @@ coordination runtime decides whether those facts satisfy a coordination case.
 | `unsubscribe_or_opt_out` | `notification.channel.unsubscribe_received` | `fail.unsubscribed` |
 | `unknown_return_message` | `notification.endpoint.unknown` | `undef.conflicting_evidence` |
 | `challenge_response` | `interaction.unverified_actor_interaction` | `undef.identity_uncertain` |
+| `parse_failed` | `diagnostic.message.parse_failed` | `undef.parse_failed` |
 
 ## Overclaim Prevention
 
@@ -45,6 +46,7 @@ coordination runtime decides whether those facts satisfy a coordination case.
 - Out-of-office does not prove recipient awareness or action.
 - Human reply does not prove legal acceptance.
 - Unknown return messages remain visible.
+- Parse failures are diagnostic rows, not delivery or interaction outcomes.
 - Scanner and proxy interactions must stay below identity-bound interaction.
 
 ## Endpoint Quality Hints
diff --git a/docs/initial-runtime-architecture.md b/docs/initial-runtime-architecture.md
index b42d765..3060e47 100644
--- a/docs/initial-runtime-architecture.md
+++ b/docs/initial-runtime-architecture.md
@@ -74,10 +74,14 @@ Examples:
 - `complaint_or_abuse` -> `notification.channel.complaint_received`
 - `unsubscribe_or_opt_out` -> `notification.channel.unsubscribe_received`
 - `out_of_office` -> `interaction.out_of_office_received`
+- `challenge_response` -> `interaction.unverified_actor_interaction`
 - `human_reply` -> `interaction.reply_received`
+- `parse_failed` -> `diagnostic.message.parse_failed`
 
 The mapper does not emit evidence for unrelated messages. Unknown return
-messages stay visible as `notification.endpoint.unknown`.
+messages stay visible as `notification.endpoint.unknown`. Parse failures are
+visible as diagnostics without claiming delivery, interaction, identity, or
+endpoint quality.
 
 ## coordination-engine Alignment
 
diff --git a/src/email_connect/adapter_contract.py b/src/email_connect/adapter_contract.py
index f2ba3ed..959be95 100644
--- a/src/email_connect/adapter_contract.py
+++ b/src/email_connect/adapter_contract.py
@@ -12,6 +12,7 @@ EMITTED_EVENT_TYPES = [
     "interaction.reply_received",
     "interaction.out_of_office_received",
     "notification.endpoint.unknown",
+    "diagnostic.message.parse_failed",
 ]
 
 
diff --git a/src/email_connect/evidence.py b/src/email_connect/evidence.py
index fa17ee7..baad6f9 100644
--- a/src/email_connect/evidence.py
+++ b/src/email_connect/evidence.py
@@ -96,7 +96,13 @@ EVIDENCE_MAPPINGS: dict[MessageClass, EvidenceMapping | None] = {
         "Challenge-response or automated interaction was observed.",
     ),
     MessageClass.UNRELATED_MESSAGE: None,
-    MessageClass.PARSE_FAILED: None,
+    MessageClass.PARSE_FAILED: EvidenceMapping(
+        "diagnostic.message.parse_failed",
+        AssessmentCategory.UNDEF,
+        "undef.parse_failed",
+        EvidenceStrength.NONE,
+        "Message source could not be parsed reliably; no delivery or interaction claim is made.",
+    ),
 }
 
 
diff --git a/src/email_connect/parser.py b/src/email_connect/parser.py
index 87881a4..e5fba69 100644
--- a/src/email_connect/parser.py
+++ b/src/email_connect/parser.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import hashlib
+import html
 import re
 from datetime import UTC, datetime
 from email import policy
@@ -34,7 +35,25 @@ def parse_message_bytes(
     now: datetime | None = None,
 ) -> tuple[InboundMailboxMessage, ParsedMailboxMessage, EmailEvidenceCandidate | None]:
     observed_at = now or datetime.now(UTC)
-    msg = BytesParser(policy=policy.default).parsebytes(raw_bytes)
+    if not raw_bytes.strip():
+        return _parse_failed(
+            mailbox_id=mailbox_id,
+            raw_message_ref=raw_message_ref,
+            imap_uid=imap_uid,
+            observed_at=observed_at,
+            reason="empty_message",
+        )
+
+    try:
+        msg = BytesParser(policy=policy.default).parsebytes(raw_bytes)
+    except Exception as exc:
+        return _parse_failed(
+            mailbox_id=mailbox_id,
+            raw_message_ref=raw_message_ref,
+            imap_uid=imap_uid,
+            observed_at=observed_at,
+            reason=f"parser_error:{type(exc).__name__}",
+        )
 
     message_id = _clean_header(msg.get("Message-ID"))
     subject = _clean_header(msg.get("Subject"))
@@ -104,8 +123,12 @@ def classify_message(
     text = combined_text.lower()
     enhanced_status = _first_match(ENHANCED_STATUS_RE, combined_text)
     smtp_status = _first_match(SMTP_STATUS_RE, combined_text)
-    affected = _extract_affected_recipient(combined_text)
+    dsn_fields = _extract_dsn_fields(combined_text)
+    affected = dsn_fields.get("final_recipient_email") or dsn_fields.get("original_recipient_email")
+    if affected is None:
+        affected = _extract_affected_recipient(combined_text)
     original_message_id = _extract_headerish(combined_text, "Original-Message-ID")
+    original_recipient = dsn_fields.get("original_recipient_email") or affected
     notes: list[str] = []
 
     message_class = MessageClass.UNRELATED_MESSAGE
@@ -124,6 +147,10 @@ def classify_message(
         message_class = MessageClass.OUT_OF_OFFICE
         confidence = Confidence.MEDIUM
         reason_code = "auto_reply"
+    elif _is_challenge_response(text):
+        message_class = MessageClass.CHALLENGE_RESPONSE
+        confidence = Confidence.MEDIUM
+        reason_code = "challenge_response"
     elif _contains_any(text, ["will keep trying", "delivery delayed", "message delayed", "not yet delivered"]):
         message_class = MessageClass.DELAYED_DELIVERY_NOTICE
         confidence = Confidence.HIGH
@@ -140,6 +167,9 @@ def classify_message(
         reason_code = "unknown_return"
         notes.append("Return-channel message did not match a reliable classifier.")
 
+    for key in ["original_recipient", "final_recipient", "action", "diagnostic_code", "remote_mta"]:
+        if dsn_fields.get(key):
+            notes.append(f"{key}={dsn_fields[key]}")
     if enhanced_status:
         notes.append(f"enhanced_status={enhanced_status}")
     if smtp_status:
@@ -153,7 +183,7 @@ def classify_message(
         message_class=message_class,
         affected_email_address=affected,
         original_message_id=original_message_id,
-        original_recipient=affected,
+        original_recipient=original_recipient,
         smtp_status_code=smtp_status,
         enhanced_status_code=enhanced_status,
         reason_code=reason_code,
@@ -183,6 +213,7 @@ def _classify_dsn(
 
 def _extract_text(msg) -> str:
     chunks: list[str] = []
+    html_chunks: list[str] = []
     if msg.is_multipart():
         for part in msg.walk():
             if part.get_content_maintype() == "multipart":
@@ -192,14 +223,75 @@ def _extract_text(msg) -> str:
                     chunks.append(str(part.get_content()))
                 except Exception:
                     continue
+            elif part.get_content_type() == "text/html":
+                try:
+                    html_chunks.append(_html_to_text(str(part.get_content())))
+                except Exception:
+                    continue
     else:
         try:
-            chunks.append(str(msg.get_content()))
+            content = str(msg.get_content())
+            if msg.get_content_type() == "text/html":
+                html_chunks.append(_html_to_text(content))
+            else:
+                chunks.append(content)
         except Exception:
             payload = msg.get_payload(decode=True)
             if payload:
                 chunks.append(payload.decode(errors="replace"))
-    return "\n".join(chunks)
+    if chunks:
+        return "\n".join(chunks)
+    return "\n".join(html_chunks)
+
+
+def _parse_failed(
+    *,
+    mailbox_id: str,
+    raw_message_ref: str | None,
+    imap_uid: str | None,
+    observed_at: datetime,
+    reason: str,
+) -> tuple[InboundMailboxMessage, ParsedMailboxMessage, EmailEvidenceCandidate | None]:
+    dedup_key = "|".join([mailbox_id, imap_uid or "", raw_message_ref or "", reason])
+    mailbox_message_id = str(uuid5(NAMESPACE_URL, "email-connect:message:" + dedup_key))
+    inbound = InboundMailboxMessage(
+        mailbox_message_id=mailbox_message_id,
+        mailbox_id=mailbox_id,
+        imap_uid=imap_uid,
+        message_id_header=None,
+        received_at=None,
+        from_address=None,
+        to_addresses=[],
+        subject=None,
+        raw_headers_ref=raw_message_ref,
+        raw_message_ref=raw_message_ref,
+        first_seen_at=observed_at,
+        last_seen_at=observed_at,
+        deduplication_key=dedup_key,
+    )
+    parsed_id_basis = "|".join([mailbox_message_id, PARSER_VERSION, "parse_failed"])
+    parsed = ParsedMailboxMessage(
+        parsed_message_id=str(uuid5(NAMESPACE_URL, "email-connect:parsed:" + parsed_id_basis)),
+        mailbox_message_id=mailbox_message_id,
+        parser_version=PARSER_VERSION,
+        message_class=MessageClass.PARSE_FAILED,
+        affected_email_address=None,
+        original_message_id=None,
+        original_recipient=None,
+        smtp_status_code=None,
+        enhanced_status_code=None,
+        reason_code=reason,
+        confidence=Confidence.HIGH,
+        parsed_at=observed_at,
+        notes=[f"parse_failure={reason}"],
+    )
+    candidate = candidate_from_parsed(
+        parsed,
+        raw_message_ref=raw_message_ref,
+        observed_at=observed_at,
+        occurred_at=None,
+    )
+    return inbound, parsed, candidate
 
 
 def _message_dedup_key(
@@ -262,6 +354,25 @@ def _extract_headerish(text: str, name: str) -> str | None:
     return match.group(1).strip() if match else None
 
 
+def _extract_dsn_fields(text: str) -> dict[str, str]:
+    fields: dict[str, str] = {}
+    for field, key in [
+        ("Original-Recipient", "original_recipient"),
+        ("Final-Recipient", "final_recipient"),
+        ("Action", "action"),
+        ("Diagnostic-Code", "diagnostic_code"),
+        ("Remote-MTA", "remote_mta"),
+    ]:
+        value = _extract_headerish(text, field)
+        if value:
+            fields[key] = value
+            if field in {"Original-Recipient", "Final-Recipient"}:
+                match = EMAIL_RE.search(value)
+                if match:
+                    fields[f"{key}_email"] = match.group(0).lower()
+    return fields
+
+
 def _extract_affected_recipient(text: str) -> str | None:
     for name in ["Final-Recipient", "Original-Recipient", "X-Failed-Recipients", "Failed-Recipient"]:
         value = _extract_headerish(text, name)
@@ -309,6 +420,21 @@ def _is_out_of_office(text: str) -> bool:
     )
 
 
+def _is_challenge_response(text: str) -> bool:
+    return _contains_any(
+        text,
+        [
+            "challenge-response",
+            "challenge response",
+            "sender verification",
+            "verify your email before your message can be delivered",
+            "confirm you are a real person",
+            "confirm that you sent this message",
+            "please verify yourself",
+        ],
+    )
+
+
 def _looks_like_human_reply(inbound: InboundMailboxMessage, text: str) -> bool:
     subject = (inbound.subject or "").lower()
     if _contains_any(text, ["auto-submitted: auto-replied", "x-autoreply", "auto-generated"]):
@@ -318,3 +444,8 @@ def _looks_like_human_reply(inbound: InboundMailboxMessage, text: str) -> bool:
 
 def _looks_return_related(text: str) -> bool:
     return _contains_any(text, ["delivery", "mailbox", "recipient", "message", "smtp", "unsubscribe", "reply"])
+
+
+def _html_to_text(value: str) -> str:
+    without_tags = re.sub(r"<[^>]+>", " ", value)
+    return re.sub(r"\s+", " ", html.unescape(without_tags)).strip()
diff --git a/tests/fixtures/mailbox/challenge_response.eml b/tests/fixtures/mailbox/challenge_response.eml
new file mode 100644
index 0000000..2db0cc3
--- /dev/null
+++ b/tests/fixtures/mailbox/challenge_response.eml
@@ -0,0 +1,9 @@
+From: Sender Verification <verify@example.net>
+To: sender@example.com
+Subject: Sender verification required
+Date: Tue, 02 Jun 2026 10:09:00 +0000
+Message-ID: <challenge-response@example.net>
+Content-Type: text/plain; charset=utf-8
+
+This is a challenge-response message. Please verify yourself before your message
+can be delivered to challenge@example.com.
diff --git a/tests/fixtures/mailbox/parse_failed.eml b/tests/fixtures/mailbox/parse_failed.eml
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/tests/fixtures/mailbox/parse_failed.eml
@@ -0,0 +1 @@
+
diff --git a/tests/test_parser.py b/tests/test_parser.py
index a4ecb26..3cfcbd2 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -72,6 +72,26 @@ class ParserTests(unittest.TestCase):
         self.assertIsNotNone(candidate)
         self.assertEqual(candidate.event_type, "notification.endpoint.unknown")
 
+    def test_challenge_response_stays_identity_uncertain(self) -> None:
+        _inbound, parsed, candidate = parse_message_file(FIXTURES / "challenge_response.eml", mailbox_id="test")
+        self.assertEqual(parsed.message_class, MessageClass.CHALLENGE_RESPONSE)
+        self.assertIsNotNone(candidate)
+        self.assertEqual(candidate.event_type, "interaction.unverified_actor_interaction")
+        self.assertEqual(candidate.assessment_subclass, "undef.identity_uncertain")
+
+    def test_parse_failure_is_reportable_diagnostic(self) -> None:
+        _inbound, parsed, candidate = parse_message_file(FIXTURES / "parse_failed.eml", mailbox_id="test")
+        self.assertEqual(parsed.message_class, MessageClass.PARSE_FAILED)
+        self.assertIsNotNone(candidate)
+        self.assertEqual(candidate.event_type, "diagnostic.message.parse_failed")
+        self.assertEqual(candidate.assessment_subclass, "undef.parse_failed")
+
+    def test_dsn_detail_fields_are_preserved_as_notes(self) -> None:
+        _inbound, parsed, _candidate = parse_message_file(FIXTURES / "hard_bounce.eml", mailbox_id="test")
+        self.assertIn("final_recipient=rfc822; missing@example.com", parsed.notes)
+        self.assertIn("action=failed", parsed.notes)
+        self.assertIn("diagnostic_code=smtp; 550 5.1.1 User unknown", parsed.notes)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/test_scanner.py b/tests/test_scanner.py
index 3753bd9..e484e56 100644
--- a/tests/test_scanner.py
+++ b/tests/test_scanner.py
@@ -28,13 +28,13 @@ class ScannerTests(unittest.TestCase):
             second = scan_mailbox(config)
             full = scan_mailbox(config, full_rescan=True, report_only_new=True)
 
-            self.assertEqual(first.scan.messages_seen, 9)
-            self.assertEqual(first.scan.messages_new, 9)
-            self.assertGreaterEqual(first.scan.evidence_events_created, 9)
+            self.assertEqual(first.scan.messages_seen, 11)
+            self.assertEqual(first.scan.messages_new, 11)
+            self.assertGreaterEqual(first.scan.evidence_events_created, 11)
             self.assertEqual(second.scan.messages_seen, 0)
             self.assertEqual(second.scan.messages_new, 0)
             self.assertEqual(second.scan.evidence_events_created, 0)
-            self.assertEqual(full.scan.messages_seen, 9)
+            self.assertEqual(full.scan.messages_seen, 11)
             self.assertEqual(full.scan.messages_new, 0)
             self.assertEqual(full.scan.evidence_events_created, 0)
             self.assertTrue(first.report_path and first.report_path.exists())
diff --git a/workplans/EMAIL-WP-0002-mvp-mailbox-evidence-scanner.md b/workplans/EMAIL-WP-0002-mvp-mailbox-evidence-scanner.md
index 246473f..7b802ca 100644
--- a/workplans/EMAIL-WP-0002-mvp-mailbox-evidence-scanner.md
+++ b/workplans/EMAIL-WP-0002-mvp-mailbox-evidence-scanner.md
@@ -4,7 +4,7 @@ type: workplan
 title: "MVP Mailbox Evidence Scanner"
 domain: custodian
 repo: email-connect
-status: active
+status: finished
 owner: codex
 topic_slug: custodian
 created: "2026-06-02"
@@ -802,7 +802,7 @@ Scanner extracts basic metadata and text from representative bounce and reply me
 
 ```task
 id: EMAIL-WP-0002-T05
-status: progress
+status: done
 priority: high
 state_hub_task_id: "8ea826d1-0add-4573-9bb4-2b73adefba55"
 ```
@@ -831,7 +831,7 @@ Representative hard and soft bounce samples are classified correctly.
 
 ```task
 id: EMAIL-WP-0002-T06
-status: progress
+status: done
 priority: high
 state_hub_task_id: "4d94a332-173b-4787-8fb2-27aa63db6a8d"
 ```
@@ -932,7 +932,7 @@ Complaint/unsubscribe updates suppression state.
 
 ```task
 id: EMAIL-WP-0002-T10
-status: progress
+status: done
 priority: medium
 state_hub_task_id: "5ab35176-d6c2-4c73-b7b3-bde4c097e3ee"
 ```
@@ -959,7 +959,7 @@ Report can be opened in spreadsheet tools.
 
 ```task
 id: EMAIL-WP-0002-T11
-status: progress
+status: done
 priority: high
 state_hub_task_id: "514fa099-781b-4590-aae4-c28970413b3f"
 ```