Add a second-pass QP decode to catch =XX sequences that survive the initial mailparse decoding. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
219 lines
6.7 KiB
Rust
219 lines
6.7 KiB
Rust
use chrono::{DateTime, FixedOffset, Local};
|
|
use crate::config::Config;
|
|
use crate::connect::ImapSession;
|
|
use crate::{connect, Email};
|
|
|
|
const BATCH_SIZE: u32 = 50;
|
|
|
|
pub(crate) struct Inbox {
|
|
pub emails: Vec<Email>,
|
|
pub oldest_seq: Option<u32>,
|
|
}
|
|
|
|
impl Inbox {
|
|
pub fn has_older(&self) -> bool {
|
|
self.oldest_seq.map_or(false, |s| s > 1)
|
|
}
|
|
}
|
|
|
|
/// Refresh inbox (full reload). Reconnects on error.
|
|
pub(crate) fn refresh(
|
|
session: &mut Option<ImapSession>,
|
|
config: &Config,
|
|
) -> Result<Inbox, String> {
|
|
if let Some(s) = session.as_mut() {
|
|
if s.noop().is_ok() {
|
|
return fetch_latest(s);
|
|
}
|
|
}
|
|
|
|
*session = None;
|
|
let mut new_session = connect::connect(config)?;
|
|
let result = fetch_latest(&mut new_session);
|
|
*session = Some(new_session);
|
|
result
|
|
}
|
|
|
|
/// Fetch a batch of older emails. Returns (emails, new_oldest_seq).
|
|
pub(crate) fn fetch_older_batch(
|
|
session: &mut Option<ImapSession>,
|
|
oldest_seq: u32,
|
|
config: &Config,
|
|
) -> Result<(Vec<Email>, u32), String> {
|
|
if oldest_seq <= 1 {
|
|
return Ok((Vec::new(), 1));
|
|
}
|
|
|
|
ensure_session(session, config)?;
|
|
|
|
let s = session.as_mut().unwrap();
|
|
let end = oldest_seq - 1;
|
|
let start = end.saturating_sub(BATCH_SIZE - 1).max(1);
|
|
let range = format!("{}:{}", start, end);
|
|
|
|
let mut emails = fetch_range_emails(s, &range)?;
|
|
emails.reverse();
|
|
Ok((emails, start))
|
|
}
|
|
|
|
fn fetch_latest(session: &mut ImapSession) -> Result<Inbox, String> {
|
|
let exists = select_inbox(session)?;
|
|
if exists == 0 {
|
|
return Ok(Inbox {
|
|
emails: Vec::new(),
|
|
oldest_seq: None,
|
|
});
|
|
}
|
|
let start = exists.saturating_sub(BATCH_SIZE - 1).max(1);
|
|
let range = format!("{}:{}", start, exists);
|
|
let mut emails = fetch_range_emails(session, &range)?;
|
|
emails.reverse();
|
|
Ok(Inbox {
|
|
emails,
|
|
oldest_seq: Some(start),
|
|
})
|
|
}
|
|
|
|
fn select_inbox(session: &mut ImapSession) -> Result<u32, String> {
|
|
match session {
|
|
ImapSession::Plain(s) => {
|
|
let mailbox = s.select("INBOX").map_err(|e| e.to_string())?;
|
|
Ok(mailbox.exists)
|
|
}
|
|
ImapSession::Tls(s) => {
|
|
let mailbox = s.select("INBOX").map_err(|e| e.to_string())?;
|
|
Ok(mailbox.exists)
|
|
}
|
|
}
|
|
}
|
|
|
|
fn fetch_range_emails(session: &mut ImapSession, range: &str) -> Result<Vec<Email>, String> {
|
|
match session {
|
|
ImapSession::Plain(s) => {
|
|
let messages = s
|
|
.fetch(range, "BODY.PEEK[HEADER.FIELDS (SUBJECT FROM DATE)]")
|
|
.map_err(|e| e.to_string())?;
|
|
Ok(parse_emails(&messages))
|
|
}
|
|
ImapSession::Tls(s) => {
|
|
let messages = s
|
|
.fetch(range, "BODY.PEEK[HEADER.FIELDS (SUBJECT FROM DATE)]")
|
|
.map_err(|e| e.to_string())?;
|
|
Ok(parse_emails(&messages))
|
|
}
|
|
}
|
|
}
|
|
|
|
fn parse_emails(fetches: &[imap::types::Fetch]) -> Vec<Email> {
|
|
let mut emails = Vec::new();
|
|
for message in fetches {
|
|
if let Some(body) = message.header() {
|
|
let header = String::from_utf8_lossy(body);
|
|
let mut subject = String::new();
|
|
let mut from = String::new();
|
|
let mut date = String::new();
|
|
|
|
for line in header.lines() {
|
|
if let Some(val) = line.strip_prefix("Subject: ") {
|
|
subject = val.to_string();
|
|
} else if let Some(val) = line.strip_prefix("From: ") {
|
|
from = val.to_string();
|
|
} else if let Some(val) = line.strip_prefix("Date: ") {
|
|
date = DateTime::parse_from_rfc2822(val)
|
|
.map(|dt: DateTime<FixedOffset>| {
|
|
dt.with_timezone(&Local).format("%Y-%m-%d %H:%M").to_string()
|
|
})
|
|
.unwrap_or_else(|_| val.to_string());
|
|
}
|
|
}
|
|
|
|
emails.push(Email { seq: message.message, subject, from, date });
|
|
}
|
|
}
|
|
emails
|
|
}
|
|
|
|
fn ensure_session<'a>(
|
|
session: &'a mut Option<ImapSession>,
|
|
config: &Config,
|
|
) -> Result<&'a mut ImapSession, String> {
|
|
if session.as_mut().map_or(true, |s| s.noop().is_err()) {
|
|
*session = None;
|
|
*session = Some(connect::connect(config)?);
|
|
}
|
|
Ok(session.as_mut().unwrap())
|
|
}
|
|
|
|
/// Fetch the text body of a single email by sequence number.
|
|
pub(crate) fn fetch_body(
|
|
session: &mut Option<ImapSession>,
|
|
seq: u32,
|
|
config: &Config,
|
|
) -> Result<String, String> {
|
|
let s = ensure_session(session, config)?;
|
|
let range = seq.to_string();
|
|
let raw = match s {
|
|
ImapSession::Plain(s) => {
|
|
let messages = s
|
|
.fetch(&range, "BODY.PEEK[]")
|
|
.map_err(|e| e.to_string())?;
|
|
extract_raw_body(&messages)
|
|
}
|
|
ImapSession::Tls(s) => {
|
|
let messages = s
|
|
.fetch(&range, "BODY.PEEK[]")
|
|
.map_err(|e| e.to_string())?;
|
|
extract_raw_body(&messages)
|
|
}
|
|
};
|
|
let raw = raw.ok_or_else(|| "No body found".to_string())?;
|
|
extract_plain_text(&raw)
|
|
}
|
|
|
|
fn extract_raw_body(fetches: &[imap::types::Fetch]) -> Option<Vec<u8>> {
|
|
fetches.first().and_then(|f| {
|
|
f.body().map(|b| b.to_vec())
|
|
})
|
|
}
|
|
|
|
fn extract_plain_text(raw: &[u8]) -> Result<String, String> {
|
|
let parsed = mailparse::parse_mail(raw).map_err(|e| e.to_string())?;
|
|
// Try text/plain first
|
|
if let Some(text) = find_part(&parsed, "text/plain") {
|
|
return Ok(clean_text(&text));
|
|
}
|
|
// Fall back to text/html rendered as text
|
|
if let Some(html) = find_part(&parsed, "text/html") {
|
|
return Ok(clean_text(&html_to_text(&html)));
|
|
}
|
|
// Last resort: top-level body
|
|
parsed.get_body().map(|s| clean_text(&s)).map_err(|e| e.to_string())
|
|
}
|
|
|
|
/// Remove leftover quoted-printable artifacts (=XX sequences)
|
|
fn clean_text(text: &str) -> String {
|
|
let re = regex::Regex::new(r"=[0-9A-Fa-f]{2}").unwrap();
|
|
// Try to decode any remaining QP sequences
|
|
let decoded = quoted_printable::decode(text.as_bytes(), quoted_printable::ParseMode::Robust);
|
|
match decoded {
|
|
Ok(bytes) => String::from_utf8_lossy(&bytes).to_string(),
|
|
Err(_) => re.replace_all(text, "").to_string(),
|
|
}
|
|
}
|
|
|
|
fn find_part(mail: &mailparse::ParsedMail, mime_type: &str) -> Option<String> {
|
|
let content_type = mail.ctype.mimetype.to_lowercase();
|
|
if content_type == mime_type {
|
|
return mail.get_body().ok();
|
|
}
|
|
for part in &mail.subparts {
|
|
if let Some(text) = find_part(part, mime_type) {
|
|
return Some(text);
|
|
}
|
|
}
|
|
None
|
|
}
|
|
|
|
fn html_to_text(html: &str) -> String {
|
|
html2text::from_read(html.as_bytes(), 80).unwrap_or_else(|_| html.to_string())
|
|
}
|