commit 8cd82fffbdf8b5d5205bf229e9c77fd6ab0de05e Author: Shautvast Date: Sat Jul 19 08:42:26 2025 +0200 element with single attribute diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..79ebebb --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/undeepend2.iml b/.idea/undeepend2.iml new file mode 100644 index 0000000..cf84ae4 --- /dev/null +++ b/.idea/undeepend2.iml @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..5fc062e --- /dev/null +++ b/src/lib.rs @@ -0,0 +1 @@ +mod maven; \ No newline at end of file diff --git a/src/maven/mod.rs b/src/maven/mod.rs new file mode 100644 index 0000000..d1be782 --- /dev/null +++ b/src/maven/mod.rs @@ -0,0 +1 @@ +mod xml; \ No newline at end of file diff --git a/src/maven/xml/debug.rs b/src/maven/xml/debug.rs new file mode 100644 index 0000000..ed1bda6 --- /dev/null +++ b/src/maven/xml/debug.rs @@ -0,0 +1,37 @@ +use log::debug; +use crate::maven::xml::SaxHandler; + +pub struct DebugHandler {} + +impl SaxHandler for DebugHandler { + fn start_document(&mut self) { + debug!("start_document"); + } + fn end_document(&mut self) { + debug!("end_document"); + } + fn start_prefix_mapping(&mut self, _prefix: &str, _uri: &str) { + debug!("start_prefix_mapping"); + } + fn end_prefix_mapping(&mut self, _prefix: &str, _uri: &str) { + debug!("end_prefix_mapping"); + } + fn start_element( + &mut self, + _uri: &str, + local_name: &str, + _qualified_name: &str, + attributes: Vec, + ) { + debug!("start_element {}, {:?}", local_name, attributes); + } + fn end_element(&mut self, _uri: &str, local_name: &str, _qualified_name: &str) { + debug!("end_element {} ", local_name); + } + fn characters(&mut self, chars: &[char]) { + debug!("characters {:?}", chars.iter().collect::()); + } + fn error(&mut self, _error: &str) { + debug!("error"); + } +} \ No newline at end of file diff --git a/src/maven/xml/mod.rs b/src/maven/xml/mod.rs new file mode 100644 index 0000000..d403a79 --- /dev/null +++ b/src/maven/xml/mod.rs @@ -0,0 +1,28 @@ +mod sax_parser; +mod sax_parser_test; +mod debug; + +#[derive(Debug)] +pub struct Attribute { + name: String, + namespace: Option, + value: String, +} + +pub trait SaxHandler { + fn start_document(&mut self); + fn end_document(&mut self); + fn start_prefix_mapping(&mut self, prefix: &str, uri: &str); + fn end_prefix_mapping(&mut self, prefix: &str, uri: &str); + fn start_element( + &mut self, + uri: &str, + local_name: &str, + qualified_name: &str, + attributes: Vec, + ); + fn end_element(&mut self, uri: &str, local_name: &str, qualified_name: &str); + fn characters(&mut self, chars: &[char]); + fn error(&mut self, error: &str); +} + diff --git a/src/maven/xml/sax_parser.rs b/src/maven/xml/sax_parser.rs new file mode 100644 index 0000000..e754c8a --- /dev/null +++ b/src/maven/xml/sax_parser.rs @@ -0,0 +1,130 @@ +use crate::maven::xml::{Attribute, SaxHandler}; + +pub fn parse_string(xml: String, handler: Box<&mut dyn SaxHandler>) -> anyhow::Result<()> { + let mut parser = SAXParser::new(xml, handler); + parser.parse() +} + +struct SAXParser<'a> { + xml: Vec, + handler: Box<&'a mut dyn SaxHandler>, + position: usize, + current: char, +} + +impl<'a> SAXParser<'a> { + pub fn new(xml: String, handler: Box<&'a mut dyn SaxHandler>) -> Self { + Self { + xml: xml.chars().collect(), + handler, + position: 0, + current: '\0', + } + } + + fn parse(&mut self) -> anyhow::Result<()> { + self.expect( + "", + "Content is not allowed in prolog.", + )?; + self.skip_whitespace()?; + self.handler.start_document(); + self.parse_elements() + } + + fn parse_elements(&mut self) -> anyhow::Result<()> { + if self.current == '<' { + self.advance()?; + if self.next_char()? != '/' { + self.parse_start_element()?; + } else { + self.parse_end_element()?; + } + } + Ok(()) + } + + fn parse_start_element(&mut self) -> anyhow::Result<()> { + let name = self.read_until(" />")?; + let mut atts = vec![]; + let mut c = self.current; + while c == ' ' { + self.skip_whitespace()?; + atts.push(self.parse_attribute()?); + c = self.advance()?; + } + + self.handler.start_element("", name.as_str(), "", atts); + Ok(()) + } + + fn parse_attribute(&mut self) -> anyhow::Result { + let att_name = self.read_until("=")?; + self.skip_whitespace()?; + self.expect("\"", "Expected start of attribute value")?; + let att_value = self.read_until("\"")?; + + Ok(Attribute { + name: att_name.trim().to_string(), + namespace: Some("".to_string()), + value: att_value, + }) + } + + fn parse_end_element(&mut self) -> anyhow::Result<()> { + let name = self.read_until(">")?; + self.handler.end_element("", name.as_str(), ""); + Ok(()) + } + + fn read_until(&mut self, until: &str) -> anyhow::Result { + let start = self.position; + let mut c = self.current; + let until = until.chars().collect::>(); + while !until.contains(&c) { + c = self.advance()?; + } + Ok(self.xml[start - 1..self.position - 1] + .iter() + .collect::()) + } + + fn skip_whitespace(&mut self) -> anyhow::Result<()> { + let mut c = self.current; + while (c.is_whitespace()) && self.position < self.xml.len() { + c = self.advance()?; + } + Ok(()) + } + + fn advance(&mut self) -> anyhow::Result { + self.position += 1; + self.current = self.xml[self.position - 1]; + Ok(self.current) + } + + fn next_char(&mut self) -> anyhow::Result { + if self.position >= self.xml.len() { + Err(anyhow::anyhow!("End reached")) + } else { + Ok(self.xml[self.position + 1]) + } + } + + fn expect(&mut self, header_line: &str, message: &str) -> anyhow::Result<()> { + for c in header_line.chars() { + if !self.expect_char(c)? { + return Err(anyhow::anyhow!(message.to_string())); + } + } + self.advance()?; + Ok(()) + } + + fn expect_char(&mut self, expected: char) -> anyhow::Result { + if self.position >= self.xml.len() { + return Ok(false); + } + Ok(self.advance()? == expected) + } +} diff --git a/src/maven/xml/sax_parser_test.rs b/src/maven/xml/sax_parser_test.rs new file mode 100644 index 0000000..38a37fd --- /dev/null +++ b/src/maven/xml/sax_parser_test.rs @@ -0,0 +1,132 @@ +use crate::maven::xml::{Attribute, SaxHandler}; + +#[cfg(test)] +mod tests { + use crate::maven::xml::sax_parser::parse_string; + use crate::maven::xml::sax_parser_test::TestHandler; + use std::sync::Once; + + static INIT: Once = Once::new(); + + pub fn initialize() { + INIT.call_once(|| { + env_logger::init(); + }); + } + + #[test] + fn test_xml_header() { + let test_xml = include_str!("test/header.xml"); + let mut testhandler = TestHandler::new(); + parse_string(test_xml.to_string(), Box::new(&mut testhandler)) + .expect("Failed to parse test xml"); + println!("{:?}", testhandler); + assert!(testhandler.start_document_called); + } + + #[test] + fn test_single_element_short() { + let test_xml = include_str!("test/header.xml"); + let mut testhandler = TestHandler::new(); + parse_string(test_xml.to_string(), Box::new(&mut testhandler)) + .expect("Failed to parse test xml"); + assert!(testhandler.start_document_called); + assert!(testhandler.start_element_called); + assert!(!testhandler.elements.is_empty()); + assert_eq!(testhandler.elements[0], ""); + } + + #[test] + fn test_single_element() { + let test_xml = include_str!("test/element.xml"); + let mut testhandler = TestHandler::new(); + parse_string(test_xml.to_string(), Box::new(&mut testhandler)) + .expect("Failed to parse test xml"); + assert!(testhandler.start_document_called); + assert!(testhandler.start_element_called); + assert!(!testhandler.elements.is_empty()); + assert_eq!(testhandler.elements[0], ""); + } + + #[test] + fn test_single_element_single_attribute() { + let test_xml = include_str!("test/element_with_attribute.xml"); + let mut testhandler = TestHandler::new(); + parse_string(test_xml.to_string(), Box::new(&mut testhandler)) + .expect("Failed to parse test xml"); + assert!(testhandler.start_document_called); + assert!(testhandler.start_element_called); + assert!(!testhandler.elements.is_empty()); + assert_eq!(testhandler.elements[0], r#""#); + } +} + +#[derive(Debug)] +struct TestHandler { + start_document_called: bool, + end_document_called: bool, + start_element_called: bool, + end_element_called: bool, + elements: Vec, +} + +impl TestHandler { + pub fn new() -> Self { + Self { + start_document_called: false, + end_document_called: false, + start_element_called: false, + end_element_called: false, + elements: vec![], + } + } +} + +impl SaxHandler for TestHandler { + fn start_document(&mut self) { + self.start_document_called = true; + } + + fn end_document(&mut self) { + self.end_document_called = true; + } + + fn start_prefix_mapping(&mut self, _prefix: &str, _uri: &str) { + todo!() + } + + fn end_prefix_mapping(&mut self, _prefix: &str, _uri: &str) { + todo!() + } + + fn start_element( + &mut self, + _uri: &str, + local_name: &str, + _qualified_name: &str, + attributes: Vec, + ) { + self.start_element_called = true; + let atts = attributes + .iter() + .map(|att| format!(r#"{}="{}""#, att.name, att.value)) + .collect::>() + .join(" "); + + let divider = if atts.is_empty() { "" } else { " " }; + self.elements + .push(format!("<{}{}{}>", local_name, divider, atts)); + } + + fn end_element(&mut self, _uri: &str, _local_name: &str, _qualified_name: &str) { + self.end_element_called = true; + } + + fn characters(&mut self, _chars: &[char]) { + todo!() + } + + fn error(&mut self, _error: &str) { + todo!() + } +} diff --git a/src/maven/xml/test/element.xml b/src/maven/xml/test/element.xml new file mode 100644 index 0000000..6c6e803 --- /dev/null +++ b/src/maven/xml/test/element.xml @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/src/maven/xml/test/element_with_attribute.xml b/src/maven/xml/test/element_with_attribute.xml new file mode 100644 index 0000000..ee64a27 --- /dev/null +++ b/src/maven/xml/test/element_with_attribute.xml @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/src/maven/xml/test/header.xml b/src/maven/xml/test/header.xml new file mode 100644 index 0000000..eb13695 --- /dev/null +++ b/src/maven/xml/test/header.xml @@ -0,0 +1,2 @@ + + \ No newline at end of file