Clean up leftover quoted-printable artifacts in email body

Add a second-pass QP decode to catch =XX sequences that survive
the initial mailparse decoding.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Shautvast 2026-02-17 21:47:47 +01:00
parent d0df411c57
commit e871c1aab8
3 changed files with 19 additions and 4 deletions

2
Cargo.lock generated
View file

@ -1549,7 +1549,9 @@ dependencies = [
"imap", "imap",
"mailparse", "mailparse",
"native-tls", "native-tls",
"quoted_printable",
"ratatui", "ratatui",
"regex",
"serde", "serde",
"toml", "toml",
] ]

View file

@ -13,3 +13,5 @@ toml = "1.0"
chrono = "0.4" chrono = "0.4"
mailparse = "0.15" mailparse = "0.15"
html2text = "0.14" html2text = "0.14"
quoted_printable = "0.5"
regex = "1"

View file

@ -180,14 +180,25 @@ fn extract_plain_text(raw: &[u8]) -> Result<String, String> {
let parsed = mailparse::parse_mail(raw).map_err(|e| e.to_string())?; let parsed = mailparse::parse_mail(raw).map_err(|e| e.to_string())?;
// Try text/plain first // Try text/plain first
if let Some(text) = find_part(&parsed, "text/plain") { if let Some(text) = find_part(&parsed, "text/plain") {
return Ok(text); return Ok(clean_text(&text));
} }
// Fall back to text/html rendered as text // Fall back to text/html rendered as text
if let Some(html) = find_part(&parsed, "text/html") { if let Some(html) = find_part(&parsed, "text/html") {
return Ok(html_to_text(&html)); return Ok(clean_text(&html_to_text(&html)));
} }
// Last resort: top-level body // Last resort: top-level body
parsed.get_body().map_err(|e| e.to_string()) parsed.get_body().map(|s| clean_text(&s)).map_err(|e| e.to_string())
}
/// Remove leftover quoted-printable artifacts (=XX sequences)
fn clean_text(text: &str) -> String {
let re = regex::Regex::new(r"=[0-9A-Fa-f]{2}").unwrap();
// Try to decode any remaining QP sequences
let decoded = quoted_printable::decode(text.as_bytes(), quoted_printable::ParseMode::Robust);
match decoded {
Ok(bytes) => String::from_utf8_lossy(&bytes).to_string(),
Err(_) => re.replace_all(text, "").to_string(),
}
} }
fn find_part(mail: &mailparse::ParsedMail, mime_type: &str) -> Option<String> { fn find_part(mail: &mailparse::ParsedMail, mime_type: &str) -> Option<String> {