diff --git a/Cargo.lock b/Cargo.lock index e25a6d8..76b4596 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1549,7 +1549,9 @@ dependencies = [ "imap", "mailparse", "native-tls", + "quoted_printable", "ratatui", + "regex", "serde", "toml", ] diff --git a/Cargo.toml b/Cargo.toml index 056c3a5..e0d3f39 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,4 +12,6 @@ serde = { version = "1.0", features = ["derive"] } toml = "1.0" chrono = "0.4" mailparse = "0.15" -html2text = "0.14" \ No newline at end of file +html2text = "0.14" +quoted_printable = "0.5" +regex = "1" \ No newline at end of file diff --git a/src/inbox.rs b/src/inbox.rs index 29dad7c..b064fad 100644 --- a/src/inbox.rs +++ b/src/inbox.rs @@ -180,14 +180,25 @@ fn extract_plain_text(raw: &[u8]) -> Result { let parsed = mailparse::parse_mail(raw).map_err(|e| e.to_string())?; // Try text/plain first if let Some(text) = find_part(&parsed, "text/plain") { - return Ok(text); + return Ok(clean_text(&text)); } // Fall back to text/html rendered as text if let Some(html) = find_part(&parsed, "text/html") { - return Ok(html_to_text(&html)); + return Ok(clean_text(&html_to_text(&html))); } // Last resort: top-level body - parsed.get_body().map_err(|e| e.to_string()) + parsed.get_body().map(|s| clean_text(&s)).map_err(|e| e.to_string()) +} + +/// Remove leftover quoted-printable artifacts (=XX sequences) +fn clean_text(text: &str) -> String { + let re = regex::Regex::new(r"=[0-9A-Fa-f]{2}").unwrap(); + // Try to decode any remaining QP sequences + let decoded = quoted_printable::decode(text.as_bytes(), quoted_printable::ParseMode::Robust); + match decoded { + Ok(bytes) => String::from_utf8_lossy(&bytes).to_string(), + Err(_) => re.replace_all(text, "").to_string(), + } } fn find_part(mail: &mailparse::ParsedMail, mime_type: &str) -> Option {