skip comments

This commit is contained in:
Shautvast 2025-07-19 13:31:11 +02:00
parent 214e790c27
commit e6f1eb0f08
4 changed files with 77 additions and 17 deletions

View file

@ -5,7 +5,7 @@ mod debug;
#[derive(Debug)] #[derive(Debug)]
pub struct Attribute { pub struct Attribute {
name: String, name: String,
namespace: Option<String>, _namespace: Option<String>,
value: String, value: String,
} }

View file

@ -23,6 +23,7 @@ impl<'a> SAXParser<'a> {
} }
fn parse(&mut self) -> anyhow::Result<()> { fn parse(&mut self) -> anyhow::Result<()> {
self.advance()?;
self.expect( self.expect(
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>", "<?xml version=\"1.0\" encoding=\"UTF-8\"?>",
"Content is not allowed in prolog.", "Content is not allowed in prolog.",
@ -36,7 +37,9 @@ impl<'a> SAXParser<'a> {
while self.position < self.xml.len() { while self.position < self.xml.len() {
if self.current == '<' { if self.current == '<' {
self.advance()?; self.advance()?;
if self.current != '/' { if self.current == '!' {
self.skip_comment()?;
} else if self.current != '/' {
self.parse_start_element()?; self.parse_start_element()?;
} else { } else {
self.parse_end_element()?; self.parse_end_element()?;
@ -47,6 +50,29 @@ impl<'a> SAXParser<'a> {
Ok(()) Ok(())
} }
fn skip_comment(&mut self) -> anyhow::Result<()> {
self.expect("!--", "Expect comment start")?;
let mut c = self.current;
let mut end_in_sight = 0;
while end_in_sight < 3 && self.position < self.xml.len() {
match c {
'-' if end_in_sight < 2 => {
end_in_sight += 1;
}
'>' if end_in_sight == 2 => {
end_in_sight += 1;
}
_ if end_in_sight > 0 => {
end_in_sight -= 0;
}
_ => {}
}
c = self.advance()?;
}
self.skip_whitespace()?;
Ok(())
}
fn parse_start_element(&mut self) -> anyhow::Result<()> { fn parse_start_element(&mut self) -> anyhow::Result<()> {
let name = self.read_until(" />")?; let name = self.read_until(" />")?;
let mut atts = vec![]; let mut atts = vec![];
@ -59,26 +85,33 @@ impl<'a> SAXParser<'a> {
self.handler.start_element("", name.as_str(), "", atts); self.handler.start_element("", name.as_str(), "", atts);
self.skip_whitespace()?; self.skip_whitespace()?;
if self.current == '/' {
self.advance()?;
}
self.expect_char('>')?; self.expect_char('>')?;
self.skip_whitespace()?;
Ok(()) Ok(())
} }
fn parse_attribute(&mut self) -> anyhow::Result<Attribute> { fn parse_attribute(&mut self) -> anyhow::Result<Attribute> {
let att_name = self.read_until("=")?; let att_name = self.read_until("=")?;
self.skip_whitespace()?; self.skip_whitespace()?;
self.expect("=", "Expected =")?;
self.expect("\"", "Expected start of attribute value")?; self.expect("\"", "Expected start of attribute value")?;
let att_value = self.read_until("\"")?; let att_value = self.read_until("\"")?;
Ok(Attribute { Ok(Attribute {
name: att_name.trim().to_string(), name: att_name.trim().to_string(),
namespace: Some("".to_string()), _namespace: Some("".to_string()),
value: att_value, value: att_value,
}) })
} }
fn parse_end_element(&mut self) -> anyhow::Result<()> { fn parse_end_element(&mut self) -> anyhow::Result<()> {
self.advance()?;
let name = self.read_until(">")?; let name = self.read_until(">")?;
self.handler.end_element("", name.as_str(), ""); self.handler.end_element("", name.as_str(), "");
self.expect(">", "Expect end of element")?;
Ok(()) Ok(())
} }
@ -87,6 +120,9 @@ impl<'a> SAXParser<'a> {
let mut c = self.current; let mut c = self.current;
let until = until.chars().collect::<Vec<char>>(); let until = until.chars().collect::<Vec<char>>();
while !until.contains(&c) { while !until.contains(&c) {
if self.position > self.xml.len() {
return Err(anyhow::anyhow!("End reached while expecting {:?}", until));
}
c = self.advance()?; c = self.advance()?;
} }
Ok(self.xml[start - 1..self.position - 1] Ok(self.xml[start - 1..self.position - 1]
@ -103,33 +139,38 @@ impl<'a> SAXParser<'a> {
} }
fn advance(&mut self) -> anyhow::Result<char> { fn advance(&mut self) -> anyhow::Result<char> {
if self.position > self.xml.len() {
return Err(anyhow::anyhow!(
"End reached while expecting {:?}",
self.current
));
}
self.position += 1; self.position += 1;
self.current = self.xml[self.position - 1]; self.current = if self.position <= self.xml.len() {
self.xml[self.position - 1]
} else {
'\0'
};
Ok(self.current) Ok(self.current)
} }
fn next_char(&mut self) -> anyhow::Result<char> { fn expect(&mut self, expected: &str, message: &str) -> anyhow::Result<()> {
if self.position >= self.xml.len() { for c in expected.chars() {
Err(anyhow::anyhow!("End reached"))
} else {
Ok(self.xml[self.position + 1])
}
}
fn expect(&mut self, header_line: &str, message: &str) -> anyhow::Result<()> {
for c in header_line.chars() {
if !self.expect_char(c)? { if !self.expect_char(c)? {
return Err(anyhow::anyhow!(message.to_string())); return Err(anyhow::anyhow!(message.to_string()));
} }
} }
self.advance()?;
Ok(()) Ok(())
} }
fn expect_char(&mut self, expected: char) -> anyhow::Result<bool> { fn expect_char(&mut self, expected: char) -> anyhow::Result<bool> {
if self.position >= self.xml.len() { if self.position > self.xml.len() {
return Ok(false); return Ok(false);
} }
Ok(self.advance()? == expected) let same = self.current == expected;
if same {
self.advance()?;
}
Ok(same)
} }
} }

View file

@ -16,6 +16,7 @@ mod tests {
#[test] #[test]
fn test_xml_header() { fn test_xml_header() {
initialize();
let test_xml = include_str!("test/header.xml"); let test_xml = include_str!("test/header.xml");
let mut testhandler = TestHandler::new(); let mut testhandler = TestHandler::new();
parse_string(test_xml.to_string(), Box::new(&mut testhandler)) parse_string(test_xml.to_string(), Box::new(&mut testhandler))
@ -61,6 +62,20 @@ mod tests {
assert!(testhandler.end_element_called); assert!(testhandler.end_element_called);
assert!(testhandler.end_document_called); assert!(testhandler.end_document_called);
} }
#[test]
fn test_ignore_comment() {
let test_xml = include_str!("test/comment.xml");
let mut testhandler = TestHandler::new();
parse_string(test_xml.to_string(), Box::new(&mut testhandler))
.expect("Failed to parse test xml");
assert!(testhandler.start_document_called);
assert!(testhandler.start_element_called);
assert!(!testhandler.elements.is_empty());
assert_eq!(testhandler.elements[0], r#"<bookstore xmlns="http://example.com/books">"#);
assert!(testhandler.end_element_called);
assert!(testhandler.end_document_called);
}
} }
#[derive(Debug)] #[derive(Debug)]

View file

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Test XML file for SAX parser -->
<bookstore xmlns="http://example.com/books">
</bookstore>