Clean up leftover quoted-printable artifacts in email body
Add a second-pass QP decode to catch =XX sequences that survive the initial mailparse decoding. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
d0df411c57
commit
e871c1aab8
3 changed files with 19 additions and 4 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
|
@ -1549,7 +1549,9 @@ dependencies = [
|
||||||
"imap",
|
"imap",
|
||||||
"mailparse",
|
"mailparse",
|
||||||
"native-tls",
|
"native-tls",
|
||||||
|
"quoted_printable",
|
||||||
"ratatui",
|
"ratatui",
|
||||||
|
"regex",
|
||||||
"serde",
|
"serde",
|
||||||
"toml",
|
"toml",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -12,4 +12,6 @@ serde = { version = "1.0", features = ["derive"] }
|
||||||
toml = "1.0"
|
toml = "1.0"
|
||||||
chrono = "0.4"
|
chrono = "0.4"
|
||||||
mailparse = "0.15"
|
mailparse = "0.15"
|
||||||
html2text = "0.14"
|
html2text = "0.14"
|
||||||
|
quoted_printable = "0.5"
|
||||||
|
regex = "1"
|
||||||
17
src/inbox.rs
17
src/inbox.rs
|
|
@ -180,14 +180,25 @@ fn extract_plain_text(raw: &[u8]) -> Result<String, String> {
|
||||||
let parsed = mailparse::parse_mail(raw).map_err(|e| e.to_string())?;
|
let parsed = mailparse::parse_mail(raw).map_err(|e| e.to_string())?;
|
||||||
// Try text/plain first
|
// Try text/plain first
|
||||||
if let Some(text) = find_part(&parsed, "text/plain") {
|
if let Some(text) = find_part(&parsed, "text/plain") {
|
||||||
return Ok(text);
|
return Ok(clean_text(&text));
|
||||||
}
|
}
|
||||||
// Fall back to text/html rendered as text
|
// Fall back to text/html rendered as text
|
||||||
if let Some(html) = find_part(&parsed, "text/html") {
|
if let Some(html) = find_part(&parsed, "text/html") {
|
||||||
return Ok(html_to_text(&html));
|
return Ok(clean_text(&html_to_text(&html)));
|
||||||
}
|
}
|
||||||
// Last resort: top-level body
|
// Last resort: top-level body
|
||||||
parsed.get_body().map_err(|e| e.to_string())
|
parsed.get_body().map(|s| clean_text(&s)).map_err(|e| e.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Remove leftover quoted-printable artifacts (=XX sequences)
|
||||||
|
fn clean_text(text: &str) -> String {
|
||||||
|
let re = regex::Regex::new(r"=[0-9A-Fa-f]{2}").unwrap();
|
||||||
|
// Try to decode any remaining QP sequences
|
||||||
|
let decoded = quoted_printable::decode(text.as_bytes(), quoted_printable::ParseMode::Robust);
|
||||||
|
match decoded {
|
||||||
|
Ok(bytes) => String::from_utf8_lossy(&bytes).to_string(),
|
||||||
|
Err(_) => re.replace_all(text, "").to_string(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn find_part(mail: &mailparse::ParsedMail, mime_type: &str) -> Option<String> {
|
fn find_part(mail: &mailparse::ParsedMail, mime_type: &str) -> Option<String> {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue