keep mores links in the message

This commit is contained in:
Shautvast 2026-02-25 17:01:29 +01:00
parent a709a3a946
commit 687e8d028b

View file

@ -232,14 +232,12 @@ fn clean_text(text: &str) -> String {
/// Clean up markdown converted from HTML emails: /// Clean up markdown converted from HTML emails:
/// - Strip image references ![...](...) /// - Strip image references ![...](...)
/// - Simplify links [text](url) → text /// - Expand links [text](url) → "text url" or just "url" when text == url
/// - Remove bare long URLs
/// - Remove pipe characters from HTML table remnants /// - Remove pipe characters from HTML table remnants
/// - Collapse runs of 3+ blank lines to 2 /// - Collapse runs of 3+ blank lines to 2
fn clean_markdown(text: &str) -> String { fn clean_markdown(text: &str) -> String {
let re_img = regex::Regex::new(r"!\[[^\]]*\]\([^)]*\)").unwrap(); let re_img = regex::Regex::new(r"!\[[^]]*]\([^)]*\)").unwrap();
let re_link = regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap(); let re_link = regex::Regex::new(r"\[([^]]*)]\(([^)]*)\)").unwrap();
let re_bare_url = regex::Regex::new(r"https?://\S{80,}").unwrap();
let re_blank_lines = regex::Regex::new(r"\n{3,}").unwrap(); let re_blank_lines = regex::Regex::new(r"\n{3,}").unwrap();
// Two or more pipes possibly separated by whitespace (table borders / empty cells) // Two or more pipes possibly separated by whitespace (table borders / empty cells)
let re_multi_pipes = regex::Regex::new(r"\|[\s|]*\|").unwrap(); let re_multi_pipes = regex::Regex::new(r"\|[\s|]*\|").unwrap();
@ -248,8 +246,15 @@ fn clean_markdown(text: &str) -> String {
let re_multi_space = regex::Regex::new(r"[ \t]{2,}").unwrap(); let re_multi_space = regex::Regex::new(r"[ \t]{2,}").unwrap();
let result = re_img.replace_all(text, ""); let result = re_img.replace_all(text, "");
let result = re_link.replace_all(&result, "$1"); let result = re_link.replace_all(&result, |caps: &regex::Captures| {
let result = re_bare_url.replace_all(&result, ""); let link_text = caps[1].trim();
let url = caps[2].trim();
if link_text.is_empty() || link_text == url {
url.to_string()
} else {
format!("{link_text} {url}")
}
});
// Process line by line to strip HTML table pipe remnants // Process line by line to strip HTML table pipe remnants
let result: String = result let result: String = result