From 3e647dbe520490c2941500ec648effc2b5a5cb6f Mon Sep 17 00:00:00 2001
From: Shautvast <shautvast@gmail.com>
Date: Wed, 18 Feb 2026 13:41:25 +0100
Subject: [PATCH] Post-process markdown to clean up noisy HTML email output

Strip images, simplify links to just text, remove very long bare URLs,
and collapse excessive blank lines for a cleaner preview pane.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/inbox.rs | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)
diff --git a/src/inbox.rs b/src/inbox.rs
index 96148e7..a80df32 100644
--- a/src/inbox.rs
+++ b/src/inbox.rs
@@ -208,7 +208,7 @@ fn extract_plain_text(raw: &[u8]) -> Result<String, String> {
     // Fall back to text/html converted to markdown
     if let Some(html) = find_part(&parsed, "text/html") {
         let md = html2md::rewrite_html(&html, false);
-        return Ok(clean_text(&md));
+        return Ok(clean_markdown(&clean_text(&md)));
     }
     // Last resort: top-level body
     parsed.get_body().map(|s| clean_text(&s)).map_err(|e| e.to_string())
@@ -225,6 +225,24 @@ fn clean_text(text: &str) -> String {
     }
 }
 
+/// Clean up markdown converted from HTML emails:
+/// - Strip image references ![...](...)
+/// - Simplify links [text](url) → text
+/// - Remove bare long URLs
+/// - Collapse runs of 3+ blank lines to 2
+fn clean_markdown(text: &str) -> String {
+    let re_img = regex::Regex::new(r"!\[[^\]]*\]\([^)]*\)").unwrap();
+    let re_link = regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap();
+    let re_bare_url = regex::Regex::new(r"https?://\S{80,}").unwrap();
+    let re_blank_lines = regex::Regex::new(r"\n{3,}").unwrap();
+
+    let result = re_img.replace_all(text, "");
+    let result = re_link.replace_all(&result, "$1");
+    let result = re_bare_url.replace_all(&result, "");
+    let result = re_blank_lines.replace_all(&result, "\n\n");
+    result.trim().to_string()
+}
+
 fn find_part(mail: &mailparse::ParsedMail, mime_type: &str) -> Option<String> {
     let content_type = mail.ctype.mimetype.to_lowercase();
     if content_type == mime_type {