keep mores links in the message
This commit is contained in:
parent
a709a3a946
commit
687e8d028b
1 changed files with 12 additions and 7 deletions
19
src/inbox.rs
19
src/inbox.rs
|
|
@ -232,14 +232,12 @@ fn clean_text(text: &str) -> String {
|
|||
|
||||
/// Clean up markdown converted from HTML emails:
|
||||
/// - Strip image references 
|
||||
/// - Simplify links [text](url) → text
|
||||
/// - Remove bare long URLs
|
||||
/// - Expand links [text](url) → "text url" or just "url" when text == url
|
||||
/// - Remove pipe characters from HTML table remnants
|
||||
/// - Collapse runs of 3+ blank lines to 2
|
||||
fn clean_markdown(text: &str) -> String {
|
||||
let re_img = regex::Regex::new(r"!\[[^\]]*\]\([^)]*\)").unwrap();
|
||||
let re_link = regex::Regex::new(r"\[([^\]]*)\]\([^)]*\)").unwrap();
|
||||
let re_bare_url = regex::Regex::new(r"https?://\S{80,}").unwrap();
|
||||
let re_img = regex::Regex::new(r"!\[[^]]*]\([^)]*\)").unwrap();
|
||||
let re_link = regex::Regex::new(r"\[([^]]*)]\(([^)]*)\)").unwrap();
|
||||
let re_blank_lines = regex::Regex::new(r"\n{3,}").unwrap();
|
||||
// Two or more pipes possibly separated by whitespace (table borders / empty cells)
|
||||
let re_multi_pipes = regex::Regex::new(r"\|[\s|]*\|").unwrap();
|
||||
|
|
@ -248,8 +246,15 @@ fn clean_markdown(text: &str) -> String {
|
|||
let re_multi_space = regex::Regex::new(r"[ \t]{2,}").unwrap();
|
||||
|
||||
let result = re_img.replace_all(text, "");
|
||||
let result = re_link.replace_all(&result, "$1");
|
||||
let result = re_bare_url.replace_all(&result, "");
|
||||
let result = re_link.replace_all(&result, |caps: ®ex::Captures| {
|
||||
let link_text = caps[1].trim();
|
||||
let url = caps[2].trim();
|
||||
if link_text.is_empty() || link_text == url {
|
||||
url.to_string()
|
||||
} else {
|
||||
format!("{link_text} {url}")
|
||||
}
|
||||
});
|
||||
|
||||
// Process line by line to strip HTML table pipe remnants
|
||||
let result: String = result
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue