namespace handling

This commit is contained in:
Shautvast 2025-07-20 18:16:56 +02:00
parent e6f1eb0f08
commit b301901d0a
6 changed files with 154 additions and 49 deletions

View file

@ -18,14 +18,14 @@ impl SaxHandler for DebugHandler {
} }
fn start_element( fn start_element(
&mut self, &mut self,
_uri: &str, _uri: Option<String>,
local_name: &str, local_name: &str,
_qualified_name: &str, _qualified_name: &str,
attributes: Vec<crate::maven::xml::Attribute>, attributes: Vec<crate::maven::xml::Attribute>,
) { ) {
debug!("start_element {}, {:?}", local_name, attributes); debug!("start_element {}, {:?}", local_name, attributes);
} }
fn end_element(&mut self, _uri: &str, local_name: &str, _qualified_name: &str) { fn end_element(&mut self, _uri: Option<String>, local_name: &str, _qualified_name: &str) {
debug!("end_element {} ", local_name); debug!("end_element {} ", local_name);
} }
fn characters(&mut self, chars: &[char]) { fn characters(&mut self, chars: &[char]) {

View file

@ -5,7 +5,7 @@ mod debug;
#[derive(Debug)] #[derive(Debug)]
pub struct Attribute { pub struct Attribute {
name: String, name: String,
_namespace: Option<String>, namespace: Option<String>,
value: String, value: String,
} }
@ -16,12 +16,12 @@ pub trait SaxHandler {
fn end_prefix_mapping(&mut self, prefix: &str, uri: &str); fn end_prefix_mapping(&mut self, prefix: &str, uri: &str);
fn start_element( fn start_element(
&mut self, &mut self,
uri: &str, uri: Option<String>,
local_name: &str, local_name: &str,
qualified_name: &str, qualified_name: &str,
attributes: Vec<Attribute>, attributes: Vec<Attribute>,
); );
fn end_element(&mut self, uri: &str, local_name: &str, qualified_name: &str); fn end_element(&mut self, uri: Option<String>, local_name: &str, qualified_name: &str);
fn characters(&mut self, chars: &[char]); fn characters(&mut self, chars: &[char]);
fn error(&mut self, error: &str); fn error(&mut self, error: &str);
} }

View file

@ -10,6 +10,7 @@ struct SAXParser<'a> {
handler: Box<&'a mut dyn SaxHandler>, handler: Box<&'a mut dyn SaxHandler>,
position: usize, position: usize,
current: char, current: char,
namespace_stack: Vec<(String, isize)>,
} }
impl<'a> SAXParser<'a> { impl<'a> SAXParser<'a> {
@ -19,6 +20,7 @@ impl<'a> SAXParser<'a> {
handler, handler,
position: 0, position: 0,
current: '\0', current: '\0',
namespace_stack: Vec::new(),
} }
} }
@ -74,19 +76,31 @@ impl<'a> SAXParser<'a> {
} }
fn parse_start_element(&mut self) -> anyhow::Result<()> { fn parse_start_element(&mut self) -> anyhow::Result<()> {
let name = self.read_until(" />")?; let name = self.read_until(" \t\n/>")?;
let mut atts = vec![]; let mut atts = vec![];
let mut c = self.current; let mut c = self.current;
while c == ' ' {
while c.is_whitespace() {
self.skip_whitespace()?; self.skip_whitespace()?;
atts.push(self.parse_attribute()?); atts.push(self.parse_attribute()?);
c = self.advance()?; c = self.advance()?;
} }
self.handler.start_element("", name.as_str(), "", atts); let namespace = if !self.namespace_stack.is_empty() {
let (name, count) = self.namespace_stack.pop().unwrap();
self.namespace_stack.push((name.clone(), count + 1));
Some(name.clone())
} else {
None
};
self.handler
.start_element(namespace.clone(), name.as_str(), "", atts);
self.skip_whitespace()?; self.skip_whitespace()?;
if self.current == '/' { if self.current == '/' {
self.advance()?; self.advance()?;
let namespace = self.pop_namespace();
self.handler.end_element(namespace, name.as_str(), "");
} }
self.expect_char('>')?; self.expect_char('>')?;
self.skip_whitespace()?; self.skip_whitespace()?;
@ -100,9 +114,16 @@ impl<'a> SAXParser<'a> {
self.expect("\"", "Expected start of attribute value")?; self.expect("\"", "Expected start of attribute value")?;
let att_value = self.read_until("\"")?; let att_value = self.read_until("\"")?;
let namespace = if att_name == "xmlns" {
self.namespace_stack.push((att_value.clone(), -1));
Some(att_value.clone())
} else {
None
};
Ok(Attribute { Ok(Attribute {
name: att_name.trim().to_string(), name: att_name.trim().to_string(),
_namespace: Some("".to_string()), namespace,
value: att_value, value: att_value,
}) })
} }
@ -110,11 +131,32 @@ impl<'a> SAXParser<'a> {
fn parse_end_element(&mut self) -> anyhow::Result<()> { fn parse_end_element(&mut self) -> anyhow::Result<()> {
self.advance()?; self.advance()?;
let name = self.read_until(">")?; let name = self.read_until(">")?;
self.handler.end_element("", name.as_str(), "");
let namespace = self.pop_namespace();
self.handler.end_element(namespace, name.as_str(), "");
self.expect(">", "Expect end of element")?; self.expect(">", "Expect end of element")?;
self.skip_whitespace()?;
Ok(()) Ok(())
} }
fn pop_namespace(&mut self) -> Option<String> {
let namespace = if !self.namespace_stack.is_empty() {
let (name, count) = self.namespace_stack.pop().unwrap();
if count > 0 {
self.namespace_stack.push((name.to_string(), count - 1));
Some(name)
} else {
None
}
} else {
None
};
namespace
}
fn read_until(&mut self, until: &str) -> anyhow::Result<String> { fn read_until(&mut self, until: &str) -> anyhow::Result<String> {
let start = self.position; let start = self.position;
let mut c = self.current; let mut c = self.current;

View file

@ -4,25 +4,16 @@ use crate::maven::xml::{Attribute, SaxHandler};
mod tests { mod tests {
use crate::maven::xml::sax_parser::parse_string; use crate::maven::xml::sax_parser::parse_string;
use crate::maven::xml::sax_parser_test::TestHandler; use crate::maven::xml::sax_parser_test::TestHandler;
use std::sync::Once;
static INIT: Once = Once::new();
pub fn initialize() {
INIT.call_once(|| {
env_logger::init();
});
}
#[test] #[test]
fn test_xml_header() { fn test_xml_header() {
initialize();
let test_xml = include_str!("test/header.xml"); let test_xml = include_str!("test/header.xml");
let mut testhandler = TestHandler::new(); let mut testhandler = TestHandler::new();
parse_string(test_xml.to_string(), Box::new(&mut testhandler)) parse_string(test_xml.to_string(), Box::new(&mut testhandler))
.expect("Failed to parse test xml"); .expect("Failed to parse test xml");
println!("{:?}", testhandler); println!("{:?}", testhandler);
assert!(testhandler.start_document_called); assert_eq!(testhandler.start_document_called, 1);
assert_eq!(testhandler.end_document_called, 1);
} }
#[test] #[test]
@ -31,10 +22,11 @@ mod tests {
let mut testhandler = TestHandler::new(); let mut testhandler = TestHandler::new();
parse_string(test_xml.to_string(), Box::new(&mut testhandler)) parse_string(test_xml.to_string(), Box::new(&mut testhandler))
.expect("Failed to parse test xml"); .expect("Failed to parse test xml");
assert!(testhandler.start_document_called); assert_eq!(testhandler.start_document_called, 1);
assert!(testhandler.start_element_called); assert_eq!(testhandler.start_element_called, 1);
assert!(!testhandler.elements.is_empty()); assert!(!testhandler.elements.is_empty());
assert_eq!(testhandler.elements[0], "<xml>"); assert_eq!(testhandler.elements[0], "<xml>");
assert_eq!(testhandler.end_document_called, 1);
} }
#[test] #[test]
@ -43,10 +35,11 @@ mod tests {
let mut testhandler = TestHandler::new(); let mut testhandler = TestHandler::new();
parse_string(test_xml.to_string(), Box::new(&mut testhandler)) parse_string(test_xml.to_string(), Box::new(&mut testhandler))
.expect("Failed to parse test xml"); .expect("Failed to parse test xml");
assert!(testhandler.start_document_called); assert_eq!(testhandler.start_document_called, 1);
assert!(testhandler.start_element_called); assert_eq!(testhandler.start_element_called, 1);
assert!(!testhandler.elements.is_empty()); assert!(!testhandler.elements.is_empty());
assert_eq!(testhandler.elements[0], "<element>"); assert_eq!(testhandler.elements[0], "<element>");
assert_eq!(testhandler.end_document_called, 1);
} }
#[test] #[test]
@ -55,12 +48,12 @@ mod tests {
let mut testhandler = TestHandler::new(); let mut testhandler = TestHandler::new();
parse_string(test_xml.to_string(), Box::new(&mut testhandler)) parse_string(test_xml.to_string(), Box::new(&mut testhandler))
.expect("Failed to parse test xml"); .expect("Failed to parse test xml");
assert!(testhandler.start_document_called); assert_eq!(testhandler.start_document_called, 1);
assert!(testhandler.start_element_called); assert_eq!(testhandler.start_element_called, 1);
assert!(!testhandler.elements.is_empty()); assert!(!testhandler.elements.is_empty());
assert_eq!(testhandler.elements[0], r#"<element a="1">"#); assert_eq!(testhandler.elements[0], r#"<element a="1">"#);
assert!(testhandler.end_element_called); assert_eq!(testhandler.end_element_called, 1);
assert!(testhandler.end_document_called); assert_eq!(testhandler.end_document_called, 1);
} }
#[test] #[test]
@ -69,31 +62,63 @@ mod tests {
let mut testhandler = TestHandler::new(); let mut testhandler = TestHandler::new();
parse_string(test_xml.to_string(), Box::new(&mut testhandler)) parse_string(test_xml.to_string(), Box::new(&mut testhandler))
.expect("Failed to parse test xml"); .expect("Failed to parse test xml");
assert!(testhandler.start_document_called); assert_eq!(testhandler.start_document_called, 1);
assert!(testhandler.start_element_called); assert_eq!(testhandler.start_element_called, 1);
assert!(!testhandler.elements.is_empty()); assert!(!testhandler.elements.is_empty());
assert_eq!(testhandler.elements[0], r#"<bookstore xmlns="http://example.com/books">"#); assert_eq!(
assert!(testhandler.end_element_called); testhandler.elements[0],
assert!(testhandler.end_document_called); r#"<http://example.com/books:bookstore xmlns="http://example.com/books">"#
);
assert_eq!(testhandler.end_element_called, 1);
assert_eq!(testhandler.end_document_called, 1);
}
#[test]
fn test_namespaces() {
let test_xml = include_str!("test/namespaces.xml");
let mut testhandler = TestHandler::new();
parse_string(test_xml.to_string(), Box::new(&mut testhandler))
.expect("Failed to parse test xml");
assert_eq!(testhandler.start_document_called, 1);
assert_eq!(testhandler.start_element_called, 4);
assert!(!testhandler.elements.is_empty());
assert_eq!(
testhandler.elements[0],
r#"<bookstore>"#
);
assert_eq!(
testhandler.elements[1],
r#"<http://example.com/books:book xmlns="http://example.com/books" id="1" category="fiction">"#
);
assert_eq!(
testhandler.elements[2],
r#"<http://example.com/books:page>"#
);
assert_eq!(
testhandler.elements[3],
r#"<publisher>"#
);
assert_eq!(testhandler.end_element_called, 4);
assert_eq!(testhandler.end_document_called, 1);
} }
} }
#[derive(Debug)] #[derive(Debug)]
struct TestHandler { struct TestHandler {
start_document_called: bool, start_document_called: usize,
end_document_called: bool, end_document_called: usize,
start_element_called: bool, start_element_called: usize,
end_element_called: bool, end_element_called: usize,
elements: Vec<String>, elements: Vec<String>,
} }
impl TestHandler { impl TestHandler {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
start_document_called: false, start_document_called: 0,
end_document_called: false, end_document_called: 0,
start_element_called: false, start_element_called: 0,
end_element_called: false, end_element_called: 0,
elements: vec![], elements: vec![],
} }
} }
@ -101,11 +126,11 @@ impl TestHandler {
impl SaxHandler for TestHandler { impl SaxHandler for TestHandler {
fn start_document(&mut self) { fn start_document(&mut self) {
self.start_document_called = true; self.start_document_called += 1;
} }
fn end_document(&mut self) { fn end_document(&mut self) {
self.end_document_called = true; self.end_document_called += 1;
} }
fn start_prefix_mapping(&mut self, _prefix: &str, _uri: &str) { fn start_prefix_mapping(&mut self, _prefix: &str, _uri: &str) {
@ -118,25 +143,31 @@ impl SaxHandler for TestHandler {
fn start_element( fn start_element(
&mut self, &mut self,
_uri: &str, uri: Option<String>,
local_name: &str, local_name: &str,
_qualified_name: &str, _qualified_name: &str,
attributes: Vec<Attribute>, attributes: Vec<Attribute>,
) { ) {
self.start_element_called = true; self.start_element_called += 1;
let atts = attributes let atts = attributes
.iter() .iter()
.map(|att| format!(r#"{}="{}""#, att.name, att.value)) .map(|att| format!(r#"{}="{}""#, att.name, att.value))
.collect::<Vec<String>>() .collect::<Vec<String>>()
.join(" "); .join(" ");
let uri = if let Some(uri) = uri {
format!("{}:", uri)
} else {
"".to_string()
};
let divider = if atts.is_empty() { "" } else { " " }; let divider = if atts.is_empty() { "" } else { " " };
self.elements self.elements
.push(format!("<{}{}{}>", local_name, divider, atts)); .push(format!("<{}{}{}{}>", uri, local_name, divider, atts));
} }
fn end_element(&mut self, _uri: &str, _local_name: &str, _qualified_name: &str) { fn end_element(&mut self, _uri: Option<String>, _local_name: &str, _qualified_name: &str) {
self.end_element_called = true; self.end_element_called += 1;
} }
fn characters(&mut self, _chars: &[char]) { fn characters(&mut self, _chars: &[char]) {

View file

@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Test XML file for SAX parser -->
<bookstore xmlns="http://example.com/books">
<book id="1" category="fiction">
<title lang="en">The Great Gatsby</title>
<author>F. Scott Fitzgerald</author>
<price currency="USD">12.99</price>
<description>A classic American novel about the Jazz Age</description>
</book>
<book id="2" category="science">
<title lang="en">A Brief History of Time</title>
<author>Stephen Hawking</author>
<price currency="USD">15.50</price>
<description>An exploration of space &amp; time</description>
<chapters>
<chapter number="1">Our Picture of the Universe</chapter>
<chapter number="2">Space and Time</chapter>
</chapters>
</book>
<metadata>
<created>2024-01-15</created>
<lastModified>2024-01-20</lastModified>
</metadata>
</bookstore>

View file

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Test XML file for SAX parser -->
<bookstore>
<book xmlns="http://example.com/books" id="1" category="fiction">
<page/>
</book>
<publisher/>
</bookstore>