undeepend/src/maven/xml/sax_parser.rs
2025-07-20 18:16:56 +02:00

218 lines
6.3 KiB
Rust

use crate::maven::xml::{Attribute, SaxHandler};
pub fn parse_string(xml: String, handler: Box<&mut dyn SaxHandler>) -> anyhow::Result<()> {
let mut parser = SAXParser::new(xml, handler);
parser.parse()
}
struct SAXParser<'a> {
xml: Vec<char>,
handler: Box<&'a mut dyn SaxHandler>,
position: usize,
current: char,
namespace_stack: Vec<(String, isize)>,
}
impl<'a> SAXParser<'a> {
pub fn new(xml: String, handler: Box<&'a mut dyn SaxHandler>) -> Self {
Self {
xml: xml.chars().collect(),
handler,
position: 0,
current: '\0',
namespace_stack: Vec::new(),
}
}
fn parse(&mut self) -> anyhow::Result<()> {
self.advance()?;
self.expect(
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>",
"Content is not allowed in prolog.",
)?;
self.skip_whitespace()?;
self.handler.start_document();
self.parse_elements()
}
fn parse_elements(&mut self) -> anyhow::Result<()> {
while self.position < self.xml.len() {
if self.current == '<' {
self.advance()?;
if self.current == '!' {
self.skip_comment()?;
} else if self.current != '/' {
self.parse_start_element()?;
} else {
self.parse_end_element()?;
}
}
}
self.handler.end_document();
Ok(())
}
fn skip_comment(&mut self) -> anyhow::Result<()> {
self.expect("!--", "Expect comment start")?;
let mut c = self.current;
let mut end_in_sight = 0;
while end_in_sight < 3 && self.position < self.xml.len() {
match c {
'-' if end_in_sight < 2 => {
end_in_sight += 1;
}
'>' if end_in_sight == 2 => {
end_in_sight += 1;
}
_ if end_in_sight > 0 => {
end_in_sight -= 0;
}
_ => {}
}
c = self.advance()?;
}
self.skip_whitespace()?;
Ok(())
}
fn parse_start_element(&mut self) -> anyhow::Result<()> {
let name = self.read_until(" \t\n/>")?;
let mut atts = vec![];
let mut c = self.current;
while c.is_whitespace() {
self.skip_whitespace()?;
atts.push(self.parse_attribute()?);
c = self.advance()?;
}
let namespace = if !self.namespace_stack.is_empty() {
let (name, count) = self.namespace_stack.pop().unwrap();
self.namespace_stack.push((name.clone(), count + 1));
Some(name.clone())
} else {
None
};
self.handler
.start_element(namespace.clone(), name.as_str(), "", atts);
self.skip_whitespace()?;
if self.current == '/' {
self.advance()?;
let namespace = self.pop_namespace();
self.handler.end_element(namespace, name.as_str(), "");
}
self.expect_char('>')?;
self.skip_whitespace()?;
Ok(())
}
fn parse_attribute(&mut self) -> anyhow::Result<Attribute> {
let att_name = self.read_until("=")?;
self.skip_whitespace()?;
self.expect("=", "Expected =")?;
self.expect("\"", "Expected start of attribute value")?;
let att_value = self.read_until("\"")?;
let namespace = if att_name == "xmlns" {
self.namespace_stack.push((att_value.clone(), -1));
Some(att_value.clone())
} else {
None
};
Ok(Attribute {
name: att_name.trim().to_string(),
namespace,
value: att_value,
})
}
fn parse_end_element(&mut self) -> anyhow::Result<()> {
self.advance()?;
let name = self.read_until(">")?;
let namespace = self.pop_namespace();
self.handler.end_element(namespace, name.as_str(), "");
self.expect(">", "Expect end of element")?;
self.skip_whitespace()?;
Ok(())
}
fn pop_namespace(&mut self) -> Option<String> {
let namespace = if !self.namespace_stack.is_empty() {
let (name, count) = self.namespace_stack.pop().unwrap();
if count > 0 {
self.namespace_stack.push((name.to_string(), count - 1));
Some(name)
} else {
None
}
} else {
None
};
namespace
}
fn read_until(&mut self, until: &str) -> anyhow::Result<String> {
let start = self.position;
let mut c = self.current;
let until = until.chars().collect::<Vec<char>>();
while !until.contains(&c) {
if self.position > self.xml.len() {
return Err(anyhow::anyhow!("End reached while expecting {:?}", until));
}
c = self.advance()?;
}
Ok(self.xml[start - 1..self.position - 1]
.iter()
.collect::<String>())
}
fn skip_whitespace(&mut self) -> anyhow::Result<()> {
let mut c = self.current;
while (c.is_whitespace()) && self.position < self.xml.len() {
c = self.advance()?;
}
Ok(())
}
fn advance(&mut self) -> anyhow::Result<char> {
if self.position > self.xml.len() {
return Err(anyhow::anyhow!(
"End reached while expecting {:?}",
self.current
));
}
self.position += 1;
self.current = if self.position <= self.xml.len() {
self.xml[self.position - 1]
} else {
'\0'
};
Ok(self.current)
}
fn expect(&mut self, expected: &str, message: &str) -> anyhow::Result<()> {
for c in expected.chars() {
if !self.expect_char(c)? {
return Err(anyhow::anyhow!(message.to_string()));
}
}
Ok(())
}
fn expect_char(&mut self, expected: char) -> anyhow::Result<bool> {
if self.position > self.xml.len() {
return Ok(false);
}
let same = self.current == expected;
if same {
self.advance()?;
}
Ok(same)
}
}