From 3b55dd3536c3fefd1a8f711d8979adbabb61ab94 Mon Sep 17 00:00:00 2001 From: Shautvast Date: Fri, 25 Jul 2025 12:54:22 +0200 Subject: [PATCH] added comments and rustdoc --- Dockerfile | 11 +++++++++ README.md | 15 +++++++---- TODO.md | 10 ++++++++ src/main.rs | 9 +++++-- src/maven/metadata.rs | 3 +++ src/maven/mod.rs | 1 - src/maven/pom.rs | 4 +-- src/maven/pom_parser.rs | 3 ++- src/maven/pom_view.rs | 26 ------------------- src/maven/project.rs | 55 ++++++++++++++++++++++++++++++++--------- src/xml/debug.rs | 35 -------------------------- src/xml/dom_parser.rs | 27 +++++++++++--------- src/xml/mod.rs | 12 ++------- src/xml/sax_parser.rs | 18 ++++++++++++-- 14 files changed, 121 insertions(+), 108 deletions(-) create mode 100644 Dockerfile create mode 100644 TODO.md delete mode 100644 src/maven/pom_view.rs delete mode 100644 src/xml/debug.rs diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..7d4e1c2 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,11 @@ +FROM rust:1.88-alpine as builder +WORKDIR /usr/src/undeepend +COPY . . +RUN cargo install --path . + +FROM debian:bullseye-slim +RUN apt-get update && rm -rf /var/lib/apt/lists/* +COPY --from=builder /usr/local/cargo/bin/undeepend /usr/local/bin/undeepend +CMD ["undeepend"] + +#&& apt-get install -y extra-runtime-dependencies \ No newline at end of file diff --git a/README.md b/README.md index fd811b6..ac4bfb6 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,15 @@ currently implementing in rust: -* a sax parser to read xml files (and existing xml binding in rust has trouble reading maven properties) -* a dom parser to get a generic xml representation -* a pom reader to get a maven specific representation -* to find out what dependencies you have +* V a sax parser to read xml files (and existing xml binding in rust has trouble reading maven properties) +* V a dom parser to get a generic xml representation +* V a pom reader to get a maven specific representation +* V to find out what dependencies you have +* try default localRepository ~/.m2/repository +* load settings.xml +* search dependency in localRepository +* download dependency from remote repo's Why rust and not a maven plugin? * faster * more challenges -* run it in docker as a separate step \ No newline at end of file +* run it in docker as a separate step + diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..2682852 --- /dev/null +++ b/TODO.md @@ -0,0 +1,10 @@ +```sh +docker build -t ghcr.io/shautvast/undeepend:latest . +docker push ghcr.io/shautvast/undeepend:latest +``` + +```sh +#!/bin/bash +# undeepend.sh +docker run --rm -v $(pwd):/project ghcr.io/shautvast/undeepend "$@" +``` \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 16d402f..71e23f2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,12 @@ -use std::path::Path; +use std::env; +use std::path::PathBuf; use undeepend::maven::project::parse_project; fn main() { - let project = parse_project(Path::new("tests/maven/resources/sample_project")).unwrap(); + let args = std::env::args().collect::>(); + let dir = if args.len() ==1 { + env::current_dir().expect("Could not access current directory") + } else { PathBuf::from(&args[1]) }; + let project = parse_project(&dir).unwrap(); println!("{:?}", project.get_dependencies(&project.root)); } diff --git a/src/maven/metadata.rs b/src/maven/metadata.rs index 1dd66e2..7d51901 100644 --- a/src/maven/metadata.rs +++ b/src/maven/metadata.rs @@ -1,3 +1,6 @@ +// part of maven model, I may throw it away + + /// The Maven variant to parse poms /// These structs is directly modelled after the XML because that is what strong-xml plugin requires #[derive(PartialEq, Debug)] diff --git a/src/maven/mod.rs b/src/maven/mod.rs index 382bbec..3c16aea 100644 --- a/src/maven/mod.rs +++ b/src/maven/mod.rs @@ -1,5 +1,4 @@ pub mod metadata; pub mod pom; -pub mod pom_view; pub mod pom_parser; pub mod project; diff --git a/src/maven/pom.rs b/src/maven/pom.rs index 3636ee5..6b623a2 100644 --- a/src/maven/pom.rs +++ b/src/maven/pom.rs @@ -1,8 +1,8 @@ use std::collections::HashMap; use std::path::{Path, PathBuf}; -/// The Maven variant to parse poms -/// These structs is directly modelled after the XML because that is what strong-xml plugin requires +/// the maven object model + #[derive(PartialEq, Debug)] pub struct Pom { pub parent: Option, diff --git a/src/maven/pom_parser.rs b/src/maven/pom_parser.rs index e897e2b..bcd39f4 100644 --- a/src/maven/pom_parser.rs +++ b/src/maven/pom_parser.rs @@ -4,6 +4,7 @@ use crate::xml::dom_parser::{Node, get_document}; use std::collections::HashMap; use std::path::PathBuf; +/// parse the pom.xml into a Pom object (struct) pub fn get_pom(xml: impl Into) -> Result { let mut group_id = None; let mut artefact_id = None; @@ -14,7 +15,7 @@ pub fn get_pom(xml: impl Into) -> Result { let mut url = None; let mut dependencies = vec![]; let mut dependency_management = vec![]; - let mut properties = HashMap::new(); // useless assignment... + let mut properties = HashMap::new(); // useless assignments... let mut module_names = vec![]; // not useless assignment... for child in get_document(xml.into().as_str())?.root.children { diff --git a/src/maven/pom_view.rs b/src/maven/pom_view.rs deleted file mode 100644 index f69d048..0000000 --- a/src/maven/pom_view.rs +++ /dev/null @@ -1,26 +0,0 @@ -/// offers a (non-mutable) view on the pom-as-xml-representation -/// the main use of this is that it resolves the parent information when needed -/// - -#[derive(Debug)] -pub struct Artifact { - pub group: String, - pub name: String, - pub version: String, - pub path: String, -} - -impl Artifact { - pub fn new(group: &str, name: &str, version: &str) -> Self { - Self { - group: group.into(), - name: name.into(), - version: version.into(), - path: format!("{}/{}/{}", group.replace(".", "/"), name, version), - } - } - - pub fn is_snapshot(&self) -> bool { - self.version.ends_with("-SNAPSHOT") - } -} diff --git a/src/maven/project.rs b/src/maven/project.rs index d5148e1..e344767 100644 --- a/src/maven/project.rs +++ b/src/maven/project.rs @@ -7,6 +7,10 @@ use std::sync::LazyLock; static PROPERTY_EXPR: LazyLock = LazyLock::new(|| Regex::new(r"\$\{(.+)}").unwrap()); +/// Loads all poms from a given project directory. +/// A POM (project object model) is a description of the project to build written in XML. +/// It has modules which are also a pom.xml in a subdirectory of the project root +/// (nesting is in theory infinite, but in practice you'll have 2 or maybe 3 levels) pub fn parse_project(project_dir: &Path) -> Result { if !project_dir.is_dir() { return Err(format!("{:?} is not a directory", project_dir)); @@ -14,7 +18,10 @@ pub fn parse_project(project_dir: &Path) -> Result { let mut pom_file = project_dir.to_path_buf(); pom_file.push(Path::new("pom.xml")); - + if !pom_file.exists(){ + return Err(format!("Directory {} does not contain pom.xml", project_dir.to_str().unwrap())); + } + let pom_file = fs::read_to_string(pom_file).map_err(|e| e.to_string())?; let mut root = get_pom(pom_file).map_err(|e| e.to_string())?; @@ -22,6 +29,7 @@ pub fn parse_project(project_dir: &Path) -> Result { Ok(Project { root }) } +// examines modules in pom and loads them fn resolve_modules(project_dir: &Path, pom: &mut Pom) { let mut modules = pom .module_names @@ -34,6 +42,7 @@ fn resolve_modules(project_dir: &Path, pom: &mut Pom) { pom.modules.append(&mut modules); } +// loads module pom fn read_module_pom(project_dir: &Path, module: &String) -> Pom { let mut module_dir = project_dir.to_path_buf(); module_dir.push(Path::new(module)); @@ -48,12 +57,15 @@ fn read_module_pom(project_dir: &Path, module: &String) -> Pom { pom } +//main entry to project +//the (root) pom holds the child references to modules #[derive(Debug)] pub struct Project { pub root: Pom, } impl Project { + /// get a list of dependencies for a pom in the project pub fn get_dependencies(&self, pom: &Pom) -> Vec { pom.dependencies .iter() @@ -68,20 +80,31 @@ impl Project { .collect() } + // determining a version of a dependency can be done in different ways + // 1. version element below dependency, containing the version + // 2. version element below dependency, containing a property name that is declared in the pom, or a parent which contains the version + // 3. there is no version. In that case in the pom hierarchy there must be a dependencyManagement element in which the version is set + // 4. combination of 2 and 3. This is what I typically see in enterprise software. The root pom contains a list of version properties, so all versions are kept in the same place. Takes some diligence to maintain though. fn get_version(&self, pom: &Pom, group_id: &str, artifact_id: &str) -> Option { pom.dependencies .iter() + // find to dependency .find(|d| d.group_id == group_id && d.artifact_id == artifact_id) + // extract the version .and_then(|d| d.version.clone()) + // is it a property? .and_then(|version| { if PROPERTY_EXPR.is_match(&version) { let property_name = &PROPERTY_EXPR.captures(&version).unwrap()[1]; + // search property in project hierarchy self.get_property(pom, property_name) } else { Some(version) } }) .or_else(|| { + // version not set, try dependencyManagement + // TODO also search super poms pom.dependency_management .iter() .find(|d| d.group_id == group_id && d.artifact_id == artifact_id) @@ -97,6 +120,7 @@ impl Project { }) } + // recursively searches a property going up the chain towards parents fn get_property(&self, pom: &Pom, name: &str) -> Option { if pom.properties.contains_key(name) { pom.properties.get(name).cloned() @@ -111,7 +135,24 @@ impl Project { } } + // look up a pom in the project fn get_pom<'a>(&'a self, group_id: &str, artifact_id: &str) -> Option<&'a Pom> { + + // inner function to match poms (by artifactId and groupId) + // (extract if needed elsewhere) + fn is_same(pom: &Pom, group_id: &str, artifact_id: &str) -> bool { + if pom.artifact_id == artifact_id { + if let Some(pom_group_id) = &pom.group_id { + pom_group_id == group_id + } else { + false + } + } else { + false + } + } + + // inner function for recursion fn get_project_pom<'a>(pom: &'a Pom, group_id: &str, artifact_id: &str) -> Option<&'a Pom> { if is_same(pom, group_id, artifact_id) { return Some(pom); @@ -122,18 +163,8 @@ impl Project { } None } + get_project_pom(&self.root, group_id, artifact_id) } } -fn is_same(pom: &Pom, group_id: &str, artifact_id: &str) -> bool { - if pom.artifact_id == artifact_id { - if let Some(pom_group_id) = &pom.group_id { - pom_group_id == group_id - } else { - false - } - } else { - false - } -} diff --git a/src/xml/debug.rs b/src/xml/debug.rs deleted file mode 100644 index e0591e7..0000000 --- a/src/xml/debug.rs +++ /dev/null @@ -1,35 +0,0 @@ -use log::debug; -use crate::xml::SaxHandler; - -pub struct DebugHandler {} - -impl SaxHandler for DebugHandler { - fn start_document(&mut self) { - debug!("start_document"); - } - fn end_document(&mut self) { - debug!("end_document"); - } - fn start_prefix_mapping(&mut self, prefix: &str, _uri: &str) { - debug!("start_prefix_mapping for {}", prefix); - } - - fn start_element( - &mut self, - _uri: Option, - local_name: &str, - _qualified_name: &str, - attributes: Vec, - ) { - debug!("start_element {}, {:?}", local_name, attributes); - } - fn end_element(&mut self, _uri: Option, local_name: &str, _qualified_name: &str) { - debug!("end_element {} ", local_name); - } - fn characters(&mut self, chars: &[char]) { - debug!("characters {:?}", chars.iter().collect::()); - } - fn error(&mut self, _error: &str) { - debug!("error"); - } -} \ No newline at end of file diff --git a/src/xml/dom_parser.rs b/src/xml/dom_parser.rs index c408950..644d9a5 100644 --- a/src/xml/dom_parser.rs +++ b/src/xml/dom_parser.rs @@ -1,6 +1,7 @@ use crate::xml::sax_parser::parse_string; use crate::xml::{Attribute, SaxError, SaxHandler}; +/// get a generic XML object (Document) from the xml contents. This is called DOM parsing pub fn get_document(xml: &str) -> Result { let mut dom_hax_handler = DomSaxHandler::new(); parse_string(xml, Box::new(&mut dom_hax_handler))?; @@ -13,6 +14,9 @@ pub struct Document { pub root: Node, } +// used internally to holds usize references to children. +// needed to ward off the borrow checker +// don't ask about the name. #[derive(Debug, Clone, PartialEq)] struct BNode { name: String, @@ -22,6 +26,7 @@ struct BNode { text: Option, } +// in the end the usize references are translated to other Nodes #[derive(Debug, Clone, PartialEq)] pub struct Node { pub name: String, @@ -32,13 +37,13 @@ pub struct Node { } impl From<&BNode> for Node { - fn from(bnode: &BNode) -> Self { + fn from(b_node: &BNode) -> Self { Self { - name: bnode.name.clone(), - namespace: bnode.namespace.clone(), + name: b_node.name.clone(), + namespace: b_node.namespace.clone(), children: vec![], - attributes: bnode.attributes.to_vec(), - text: bnode.text.clone(), + attributes: b_node.attributes.to_vec(), + text: b_node.text.clone(), } } } @@ -58,7 +63,6 @@ impl BNode { struct DomSaxHandler { node_stack: Vec, nodes: Vec, - name: String, } impl DomSaxHandler { @@ -66,19 +70,18 @@ impl DomSaxHandler { Self { node_stack: vec![], nodes: vec![], - name: "koe".to_string(), } } fn into_doc(self) -> Document { - let bnode = &self.nodes[self.node_stack[0]]; - let node = self.to_node(bnode); + let b_node = &self.nodes[self.node_stack[0]]; + let node = self.to_node(b_node); Document { root: node } } - fn to_node(&self, bnode: &BNode) -> Node { - let mut node: Node = bnode.into(); - for child_index in &bnode.children { + fn to_node(&self, b_node: &BNode) -> Node { + let mut node: Node = b_node.into(); + for child_index in &b_node.children { let child = self.nodes.get(*child_index).unwrap(); node.children.push(self.to_node(child)); } diff --git a/src/xml/mod.rs b/src/xml/mod.rs index 96820de..4397012 100644 --- a/src/xml/mod.rs +++ b/src/xml/mod.rs @@ -1,5 +1,4 @@ pub mod sax_parser; -mod debug; pub mod dom_parser; #[derive(Debug,Clone,PartialEq)] @@ -9,15 +8,6 @@ pub struct Attribute { pub value: String, } -enum SaxEvent{ - StartDocument, - EndDocument, - StartElement(Option, String, String, Vec), - EndElement(Option, String), - Characters(String), - Error(String) -} - pub trait SaxHandler { fn start_document(&mut self); fn end_document(&mut self); @@ -36,6 +26,8 @@ pub trait SaxHandler { use std::fmt; +/// Custom error for XML situations +// likely incomplete #[derive(Debug, PartialEq)] pub enum SaxError { BadCharacter, diff --git a/src/xml/sax_parser.rs b/src/xml/sax_parser.rs index 1b78208..6295e8e 100644 --- a/src/xml/sax_parser.rs +++ b/src/xml/sax_parser.rs @@ -1,11 +1,24 @@ use crate::xml::{Attribute, SaxError, SaxHandler}; use std::collections::HashMap; +// So I decided to model it after java SAX api, which was a bad choice +// it defines a trait, like the java SAXHandler interface +// The rusty way to do it would be using a SAXEvent enum with different variants where there are now trait methods. +// That would also imply that you go from push (current) to a pull parser, which gives you more control. But hey, who needs that? + +/// Parses an xml as string and call the SAXHandler functions accordingly +// no streaming? Nah. I could do that, but I didn't feel the need, because maven pom.xmls should not be of gigbabyte size +// It's not a lot of code, but it handles my pom files well, so far. I have see implementations (xml_oxide) that are way bigger. +// It's a basic recursive descent parser. +// It handles namespaces (and prefixes) correctly AFAIK +// No validation (DTD/XSD) +// I probably missed some other XML functionality that I don't think I need. pub fn parse_string(xml: &str, handler: Box<&mut dyn SaxHandler>) -> Result<(), SaxError> { SAXParser::new(xml, handler).parse() } -pub struct SAXParser<'a> { +// struct containing the parser state +struct SAXParser<'a> { xml: Vec, handler: Box<&'a mut dyn SaxHandler>, position: usize, @@ -16,7 +29,8 @@ pub struct SAXParser<'a> { } impl<'a> SAXParser<'a> { - pub fn new(xml: &str, handler: Box<&'a mut dyn SaxHandler>) -> Self { + /// + fn new(xml: &str, handler: Box<&'a mut dyn SaxHandler>) -> Self { Self { xml: xml.chars().collect(), handler,