tuimail/src/inbox.rs
Shautvast e871c1aab8 Clean up leftover quoted-printable artifacts in email body
Add a second-pass QP decode to catch =XX sequences that survive
the initial mailparse decoding.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-17 21:47:47 +01:00

219 lines
6.7 KiB
Rust

use chrono::{DateTime, FixedOffset, Local};
use crate::config::Config;
use crate::connect::ImapSession;
use crate::{connect, Email};
const BATCH_SIZE: u32 = 50;
pub(crate) struct Inbox {
pub emails: Vec<Email>,
pub oldest_seq: Option<u32>,
}
impl Inbox {
pub fn has_older(&self) -> bool {
self.oldest_seq.map_or(false, |s| s > 1)
}
}
/// Refresh inbox (full reload). Reconnects on error.
pub(crate) fn refresh(
session: &mut Option<ImapSession>,
config: &Config,
) -> Result<Inbox, String> {
if let Some(s) = session.as_mut() {
if s.noop().is_ok() {
return fetch_latest(s);
}
}
*session = None;
let mut new_session = connect::connect(config)?;
let result = fetch_latest(&mut new_session);
*session = Some(new_session);
result
}
/// Fetch a batch of older emails. Returns (emails, new_oldest_seq).
pub(crate) fn fetch_older_batch(
session: &mut Option<ImapSession>,
oldest_seq: u32,
config: &Config,
) -> Result<(Vec<Email>, u32), String> {
if oldest_seq <= 1 {
return Ok((Vec::new(), 1));
}
ensure_session(session, config)?;
let s = session.as_mut().unwrap();
let end = oldest_seq - 1;
let start = end.saturating_sub(BATCH_SIZE - 1).max(1);
let range = format!("{}:{}", start, end);
let mut emails = fetch_range_emails(s, &range)?;
emails.reverse();
Ok((emails, start))
}
fn fetch_latest(session: &mut ImapSession) -> Result<Inbox, String> {
let exists = select_inbox(session)?;
if exists == 0 {
return Ok(Inbox {
emails: Vec::new(),
oldest_seq: None,
});
}
let start = exists.saturating_sub(BATCH_SIZE - 1).max(1);
let range = format!("{}:{}", start, exists);
let mut emails = fetch_range_emails(session, &range)?;
emails.reverse();
Ok(Inbox {
emails,
oldest_seq: Some(start),
})
}
fn select_inbox(session: &mut ImapSession) -> Result<u32, String> {
match session {
ImapSession::Plain(s) => {
let mailbox = s.select("INBOX").map_err(|e| e.to_string())?;
Ok(mailbox.exists)
}
ImapSession::Tls(s) => {
let mailbox = s.select("INBOX").map_err(|e| e.to_string())?;
Ok(mailbox.exists)
}
}
}
fn fetch_range_emails(session: &mut ImapSession, range: &str) -> Result<Vec<Email>, String> {
match session {
ImapSession::Plain(s) => {
let messages = s
.fetch(range, "BODY.PEEK[HEADER.FIELDS (SUBJECT FROM DATE)]")
.map_err(|e| e.to_string())?;
Ok(parse_emails(&messages))
}
ImapSession::Tls(s) => {
let messages = s
.fetch(range, "BODY.PEEK[HEADER.FIELDS (SUBJECT FROM DATE)]")
.map_err(|e| e.to_string())?;
Ok(parse_emails(&messages))
}
}
}
fn parse_emails(fetches: &[imap::types::Fetch]) -> Vec<Email> {
let mut emails = Vec::new();
for message in fetches {
if let Some(body) = message.header() {
let header = String::from_utf8_lossy(body);
let mut subject = String::new();
let mut from = String::new();
let mut date = String::new();
for line in header.lines() {
if let Some(val) = line.strip_prefix("Subject: ") {
subject = val.to_string();
} else if let Some(val) = line.strip_prefix("From: ") {
from = val.to_string();
} else if let Some(val) = line.strip_prefix("Date: ") {
date = DateTime::parse_from_rfc2822(val)
.map(|dt: DateTime<FixedOffset>| {
dt.with_timezone(&Local).format("%Y-%m-%d %H:%M").to_string()
})
.unwrap_or_else(|_| val.to_string());
}
}
emails.push(Email { seq: message.message, subject, from, date });
}
}
emails
}
fn ensure_session<'a>(
session: &'a mut Option<ImapSession>,
config: &Config,
) -> Result<&'a mut ImapSession, String> {
if session.as_mut().map_or(true, |s| s.noop().is_err()) {
*session = None;
*session = Some(connect::connect(config)?);
}
Ok(session.as_mut().unwrap())
}
/// Fetch the text body of a single email by sequence number.
pub(crate) fn fetch_body(
session: &mut Option<ImapSession>,
seq: u32,
config: &Config,
) -> Result<String, String> {
let s = ensure_session(session, config)?;
let range = seq.to_string();
let raw = match s {
ImapSession::Plain(s) => {
let messages = s
.fetch(&range, "BODY.PEEK[]")
.map_err(|e| e.to_string())?;
extract_raw_body(&messages)
}
ImapSession::Tls(s) => {
let messages = s
.fetch(&range, "BODY.PEEK[]")
.map_err(|e| e.to_string())?;
extract_raw_body(&messages)
}
};
let raw = raw.ok_or_else(|| "No body found".to_string())?;
extract_plain_text(&raw)
}
fn extract_raw_body(fetches: &[imap::types::Fetch]) -> Option<Vec<u8>> {
fetches.first().and_then(|f| {
f.body().map(|b| b.to_vec())
})
}
fn extract_plain_text(raw: &[u8]) -> Result<String, String> {
let parsed = mailparse::parse_mail(raw).map_err(|e| e.to_string())?;
// Try text/plain first
if let Some(text) = find_part(&parsed, "text/plain") {
return Ok(clean_text(&text));
}
// Fall back to text/html rendered as text
if let Some(html) = find_part(&parsed, "text/html") {
return Ok(clean_text(&html_to_text(&html)));
}
// Last resort: top-level body
parsed.get_body().map(|s| clean_text(&s)).map_err(|e| e.to_string())
}
/// Remove leftover quoted-printable artifacts (=XX sequences)
fn clean_text(text: &str) -> String {
let re = regex::Regex::new(r"=[0-9A-Fa-f]{2}").unwrap();
// Try to decode any remaining QP sequences
let decoded = quoted_printable::decode(text.as_bytes(), quoted_printable::ParseMode::Robust);
match decoded {
Ok(bytes) => String::from_utf8_lossy(&bytes).to_string(),
Err(_) => re.replace_all(text, "").to_string(),
}
}
fn find_part(mail: &mailparse::ParsedMail, mime_type: &str) -> Option<String> {
let content_type = mail.ctype.mimetype.to_lowercase();
if content_type == mime_type {
return mail.get_body().ok();
}
for part in &mail.subparts {
if let Some(text) = find_part(part, mime_type) {
return Some(text);
}
}
None
}
fn html_to_text(html: &str) -> String {
html2text::from_read(html.as_bytes(), 80).unwrap_or_else(|_| html.to_string())
}