Clean up leftover quoted-printable artifacts in email body

Add a second-pass QP decode to catch =XX sequences that survive
the initial mailparse decoding.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Shautvast 2026-02-17 21:47:47 +01:00
parent d0df411c57
commit e871c1aab8
3 changed files with 19 additions and 4 deletions

2
Cargo.lock generated
View file

@ -1549,7 +1549,9 @@ dependencies = [
"imap",
"mailparse",
"native-tls",
"quoted_printable",
"ratatui",
"regex",
"serde",
"toml",
]

View file

@ -12,4 +12,6 @@ serde = { version = "1.0", features = ["derive"] }
toml = "1.0"
chrono = "0.4"
mailparse = "0.15"
html2text = "0.14"
html2text = "0.14"
quoted_printable = "0.5"
regex = "1"

View file

@ -180,14 +180,25 @@ fn extract_plain_text(raw: &[u8]) -> Result<String, String> {
let parsed = mailparse::parse_mail(raw).map_err(|e| e.to_string())?;
// Try text/plain first
if let Some(text) = find_part(&parsed, "text/plain") {
return Ok(text);
return Ok(clean_text(&text));
}
// Fall back to text/html rendered as text
if let Some(html) = find_part(&parsed, "text/html") {
return Ok(html_to_text(&html));
return Ok(clean_text(&html_to_text(&html)));
}
// Last resort: top-level body
parsed.get_body().map_err(|e| e.to_string())
parsed.get_body().map(|s| clean_text(&s)).map_err(|e| e.to_string())
}
/// Remove leftover quoted-printable artifacts (=XX sequences)
fn clean_text(text: &str) -> String {
let re = regex::Regex::new(r"=[0-9A-Fa-f]{2}").unwrap();
// Try to decode any remaining QP sequences
let decoded = quoted_printable::decode(text.as_bytes(), quoted_printable::ParseMode::Robust);
match decoded {
Ok(bytes) => String::from_utf8_lossy(&bytes).to_string(),
Err(_) => re.replace_all(text, "").to_string(),
}
}
fn find_part(mail: &mailparse::ParsedMail, mime_type: &str) -> Option<String> {