diff --git a/src/maven/xml/debug.rs b/src/maven/xml/debug.rs index ed1bda6..67641e1 100644 --- a/src/maven/xml/debug.rs +++ b/src/maven/xml/debug.rs @@ -18,14 +18,14 @@ impl SaxHandler for DebugHandler { } fn start_element( &mut self, - _uri: &str, + _uri: Option, local_name: &str, _qualified_name: &str, attributes: Vec, ) { debug!("start_element {}, {:?}", local_name, attributes); } - fn end_element(&mut self, _uri: &str, local_name: &str, _qualified_name: &str) { + fn end_element(&mut self, _uri: Option, local_name: &str, _qualified_name: &str) { debug!("end_element {} ", local_name); } fn characters(&mut self, chars: &[char]) { diff --git a/src/maven/xml/mod.rs b/src/maven/xml/mod.rs index 2638317..9ecb7a8 100644 --- a/src/maven/xml/mod.rs +++ b/src/maven/xml/mod.rs @@ -5,7 +5,7 @@ mod debug; #[derive(Debug)] pub struct Attribute { name: String, - _namespace: Option, + namespace: Option, value: String, } @@ -16,12 +16,12 @@ pub trait SaxHandler { fn end_prefix_mapping(&mut self, prefix: &str, uri: &str); fn start_element( &mut self, - uri: &str, + uri: Option, local_name: &str, qualified_name: &str, attributes: Vec, ); - fn end_element(&mut self, uri: &str, local_name: &str, qualified_name: &str); + fn end_element(&mut self, uri: Option, local_name: &str, qualified_name: &str); fn characters(&mut self, chars: &[char]); fn error(&mut self, error: &str); } diff --git a/src/maven/xml/sax_parser.rs b/src/maven/xml/sax_parser.rs index b577a6d..7f6b5d6 100644 --- a/src/maven/xml/sax_parser.rs +++ b/src/maven/xml/sax_parser.rs @@ -10,6 +10,7 @@ struct SAXParser<'a> { handler: Box<&'a mut dyn SaxHandler>, position: usize, current: char, + namespace_stack: Vec<(String, isize)>, } impl<'a> SAXParser<'a> { @@ -19,6 +20,7 @@ impl<'a> SAXParser<'a> { handler, position: 0, current: '\0', + namespace_stack: Vec::new(), } } @@ -74,19 +76,31 @@ impl<'a> SAXParser<'a> { } fn parse_start_element(&mut self) -> anyhow::Result<()> { - let name = self.read_until(" />")?; + let name = self.read_until(" \t\n/>")?; let mut atts = vec![]; let mut c = self.current; - while c == ' ' { + + while c.is_whitespace() { self.skip_whitespace()?; atts.push(self.parse_attribute()?); c = self.advance()?; } - self.handler.start_element("", name.as_str(), "", atts); + let namespace = if !self.namespace_stack.is_empty() { + let (name, count) = self.namespace_stack.pop().unwrap(); + self.namespace_stack.push((name.clone(), count + 1)); + Some(name.clone()) + } else { + None + }; + + self.handler + .start_element(namespace.clone(), name.as_str(), "", atts); self.skip_whitespace()?; if self.current == '/' { self.advance()?; + let namespace = self.pop_namespace(); + self.handler.end_element(namespace, name.as_str(), ""); } self.expect_char('>')?; self.skip_whitespace()?; @@ -100,9 +114,16 @@ impl<'a> SAXParser<'a> { self.expect("\"", "Expected start of attribute value")?; let att_value = self.read_until("\"")?; + let namespace = if att_name == "xmlns" { + self.namespace_stack.push((att_value.clone(), -1)); + Some(att_value.clone()) + } else { + None + }; + Ok(Attribute { name: att_name.trim().to_string(), - _namespace: Some("".to_string()), + namespace, value: att_value, }) } @@ -110,11 +131,32 @@ impl<'a> SAXParser<'a> { fn parse_end_element(&mut self) -> anyhow::Result<()> { self.advance()?; let name = self.read_until(">")?; - self.handler.end_element("", name.as_str(), ""); + + let namespace = self.pop_namespace(); + + self.handler.end_element(namespace, name.as_str(), ""); + self.expect(">", "Expect end of element")?; + self.skip_whitespace()?; Ok(()) } + fn pop_namespace(&mut self) -> Option { + let namespace = if !self.namespace_stack.is_empty() { + let (name, count) = self.namespace_stack.pop().unwrap(); + + if count > 0 { + self.namespace_stack.push((name.to_string(), count - 1)); + Some(name) + } else { + None + } + } else { + None + }; + namespace + } + fn read_until(&mut self, until: &str) -> anyhow::Result { let start = self.position; let mut c = self.current; diff --git a/src/maven/xml/sax_parser_test.rs b/src/maven/xml/sax_parser_test.rs index 3f73452..c9bad4d 100644 --- a/src/maven/xml/sax_parser_test.rs +++ b/src/maven/xml/sax_parser_test.rs @@ -4,25 +4,16 @@ use crate::maven::xml::{Attribute, SaxHandler}; mod tests { use crate::maven::xml::sax_parser::parse_string; use crate::maven::xml::sax_parser_test::TestHandler; - use std::sync::Once; - - static INIT: Once = Once::new(); - - pub fn initialize() { - INIT.call_once(|| { - env_logger::init(); - }); - } #[test] fn test_xml_header() { - initialize(); let test_xml = include_str!("test/header.xml"); let mut testhandler = TestHandler::new(); parse_string(test_xml.to_string(), Box::new(&mut testhandler)) .expect("Failed to parse test xml"); println!("{:?}", testhandler); - assert!(testhandler.start_document_called); + assert_eq!(testhandler.start_document_called, 1); + assert_eq!(testhandler.end_document_called, 1); } #[test] @@ -31,10 +22,11 @@ mod tests { let mut testhandler = TestHandler::new(); parse_string(test_xml.to_string(), Box::new(&mut testhandler)) .expect("Failed to parse test xml"); - assert!(testhandler.start_document_called); - assert!(testhandler.start_element_called); + assert_eq!(testhandler.start_document_called, 1); + assert_eq!(testhandler.start_element_called, 1); assert!(!testhandler.elements.is_empty()); assert_eq!(testhandler.elements[0], ""); + assert_eq!(testhandler.end_document_called, 1); } #[test] @@ -43,10 +35,11 @@ mod tests { let mut testhandler = TestHandler::new(); parse_string(test_xml.to_string(), Box::new(&mut testhandler)) .expect("Failed to parse test xml"); - assert!(testhandler.start_document_called); - assert!(testhandler.start_element_called); + assert_eq!(testhandler.start_document_called, 1); + assert_eq!(testhandler.start_element_called, 1); assert!(!testhandler.elements.is_empty()); assert_eq!(testhandler.elements[0], ""); + assert_eq!(testhandler.end_document_called, 1); } #[test] @@ -55,12 +48,12 @@ mod tests { let mut testhandler = TestHandler::new(); parse_string(test_xml.to_string(), Box::new(&mut testhandler)) .expect("Failed to parse test xml"); - assert!(testhandler.start_document_called); - assert!(testhandler.start_element_called); + assert_eq!(testhandler.start_document_called, 1); + assert_eq!(testhandler.start_element_called, 1); assert!(!testhandler.elements.is_empty()); assert_eq!(testhandler.elements[0], r#""#); - assert!(testhandler.end_element_called); - assert!(testhandler.end_document_called); + assert_eq!(testhandler.end_element_called, 1); + assert_eq!(testhandler.end_document_called, 1); } #[test] @@ -69,31 +62,63 @@ mod tests { let mut testhandler = TestHandler::new(); parse_string(test_xml.to_string(), Box::new(&mut testhandler)) .expect("Failed to parse test xml"); - assert!(testhandler.start_document_called); - assert!(testhandler.start_element_called); + assert_eq!(testhandler.start_document_called, 1); + assert_eq!(testhandler.start_element_called, 1); assert!(!testhandler.elements.is_empty()); - assert_eq!(testhandler.elements[0], r#""#); - assert!(testhandler.end_element_called); - assert!(testhandler.end_document_called); + assert_eq!( + testhandler.elements[0], + r#""# + ); + assert_eq!(testhandler.end_element_called, 1); + assert_eq!(testhandler.end_document_called, 1); + } + + #[test] + fn test_namespaces() { + let test_xml = include_str!("test/namespaces.xml"); + let mut testhandler = TestHandler::new(); + parse_string(test_xml.to_string(), Box::new(&mut testhandler)) + .expect("Failed to parse test xml"); + assert_eq!(testhandler.start_document_called, 1); + assert_eq!(testhandler.start_element_called, 4); + assert!(!testhandler.elements.is_empty()); + assert_eq!( + testhandler.elements[0], + r#""# + ); + assert_eq!( + testhandler.elements[1], + r#""# + ); + assert_eq!( + testhandler.elements[2], + r#""# + ); + assert_eq!( + testhandler.elements[3], + r#""# + ); + assert_eq!(testhandler.end_element_called, 4); + assert_eq!(testhandler.end_document_called, 1); } } #[derive(Debug)] struct TestHandler { - start_document_called: bool, - end_document_called: bool, - start_element_called: bool, - end_element_called: bool, + start_document_called: usize, + end_document_called: usize, + start_element_called: usize, + end_element_called: usize, elements: Vec, } impl TestHandler { pub fn new() -> Self { Self { - start_document_called: false, - end_document_called: false, - start_element_called: false, - end_element_called: false, + start_document_called: 0, + end_document_called: 0, + start_element_called: 0, + end_element_called: 0, elements: vec![], } } @@ -101,11 +126,11 @@ impl TestHandler { impl SaxHandler for TestHandler { fn start_document(&mut self) { - self.start_document_called = true; + self.start_document_called += 1; } fn end_document(&mut self) { - self.end_document_called = true; + self.end_document_called += 1; } fn start_prefix_mapping(&mut self, _prefix: &str, _uri: &str) { @@ -118,25 +143,31 @@ impl SaxHandler for TestHandler { fn start_element( &mut self, - _uri: &str, + uri: Option, local_name: &str, _qualified_name: &str, attributes: Vec, ) { - self.start_element_called = true; + self.start_element_called += 1; let atts = attributes .iter() .map(|att| format!(r#"{}="{}""#, att.name, att.value)) .collect::>() .join(" "); + let uri = if let Some(uri) = uri { + format!("{}:", uri) + } else { + "".to_string() + }; + let divider = if atts.is_empty() { "" } else { " " }; self.elements - .push(format!("<{}{}{}>", local_name, divider, atts)); + .push(format!("<{}{}{}{}>", uri, local_name, divider, atts)); } - fn end_element(&mut self, _uri: &str, _local_name: &str, _qualified_name: &str) { - self.end_element_called = true; + fn end_element(&mut self, _uri: Option, _local_name: &str, _qualified_name: &str) { + self.end_element_called += 1; } fn characters(&mut self, _chars: &[char]) { diff --git a/src/maven/xml/test/full.xml b/src/maven/xml/test/full.xml new file mode 100644 index 0000000..35d13bd --- /dev/null +++ b/src/maven/xml/test/full.xml @@ -0,0 +1,24 @@ + + + + + The Great Gatsby + F. Scott Fitzgerald + 12.99 + A classic American novel about the Jazz Age + + + A Brief History of Time + Stephen Hawking + 15.50 + An exploration of space & time + + Our Picture of the Universe + Space and Time + + + + 2024-01-15 + 2024-01-20 + + diff --git a/src/maven/xml/test/namespaces.xml b/src/maven/xml/test/namespaces.xml new file mode 100644 index 0000000..5317c5d --- /dev/null +++ b/src/maven/xml/test/namespaces.xml @@ -0,0 +1,8 @@ + + + + + + + +