added comments and rustdoc

This commit is contained in:
Shautvast 2025-07-25 12:54:22 +02:00
parent 04b94f35b2
commit 3b55dd3536
14 changed files with 121 additions and 108 deletions

11
Dockerfile Normal file
View file

@ -0,0 +1,11 @@
FROM rust:1.88-alpine as builder
WORKDIR /usr/src/undeepend
COPY . .
RUN cargo install --path .
FROM debian:bullseye-slim
RUN apt-get update && rm -rf /var/lib/apt/lists/*
COPY --from=builder /usr/local/cargo/bin/undeepend /usr/local/bin/undeepend
CMD ["undeepend"]
#&& apt-get install -y extra-runtime-dependencies

View file

@ -1,10 +1,15 @@
currently implementing in rust:
* a sax parser to read xml files (and existing xml binding in rust has trouble reading maven properties)
* a dom parser to get a generic xml representation
* a pom reader to get a maven specific representation
* to find out what dependencies you have
* V a sax parser to read xml files (and existing xml binding in rust has trouble reading maven properties)
* V a dom parser to get a generic xml representation
* V a pom reader to get a maven specific representation
* V to find out what dependencies you have
* try default localRepository ~/.m2/repository
* load settings.xml
* search dependency in localRepository
* download dependency from remote repo's
Why rust and not a maven plugin?
* faster
* more challenges
* run it in docker as a separate step
* run it in docker as a separate step

10
TODO.md Normal file
View file

@ -0,0 +1,10 @@
```sh
docker build -t ghcr.io/shautvast/undeepend:latest .
docker push ghcr.io/shautvast/undeepend:latest
```
```sh
#!/bin/bash
# undeepend.sh
docker run --rm -v $(pwd):/project ghcr.io/shautvast/undeepend "$@"
```

View file

@ -1,7 +1,12 @@
use std::path::Path;
use std::env;
use std::path::PathBuf;
use undeepend::maven::project::parse_project;
fn main() {
let project = parse_project(Path::new("tests/maven/resources/sample_project")).unwrap();
let args = std::env::args().collect::<Vec<String>>();
let dir = if args.len() ==1 {
env::current_dir().expect("Could not access current directory")
} else { PathBuf::from(&args[1]) };
let project = parse_project(&dir).unwrap();
println!("{:?}", project.get_dependencies(&project.root));
}

View file

@ -1,3 +1,6 @@
// part of maven model, I may throw it away
/// The Maven variant to parse poms
/// These structs is directly modelled after the XML because that is what strong-xml plugin requires
#[derive(PartialEq, Debug)]

View file

@ -1,5 +1,4 @@
pub mod metadata;
pub mod pom;
pub mod pom_view;
pub mod pom_parser;
pub mod project;

View file

@ -1,8 +1,8 @@
use std::collections::HashMap;
use std::path::{Path, PathBuf};
/// The Maven variant to parse poms
/// These structs is directly modelled after the XML because that is what strong-xml plugin requires
/// the maven object model
#[derive(PartialEq, Debug)]
pub struct Pom {
pub parent: Option<Parent>,

View file

@ -4,6 +4,7 @@ use crate::xml::dom_parser::{Node, get_document};
use std::collections::HashMap;
use std::path::PathBuf;
/// parse the pom.xml into a Pom object (struct)
pub fn get_pom(xml: impl Into<String>) -> Result<Pom, SaxError> {
let mut group_id = None;
let mut artefact_id = None;
@ -14,7 +15,7 @@ pub fn get_pom(xml: impl Into<String>) -> Result<Pom, SaxError> {
let mut url = None;
let mut dependencies = vec![];
let mut dependency_management = vec![];
let mut properties = HashMap::new(); // useless assignment...
let mut properties = HashMap::new(); // useless assignments...
let mut module_names = vec![]; // not useless assignment...
for child in get_document(xml.into().as_str())?.root.children {

View file

@ -1,26 +0,0 @@
/// offers a (non-mutable) view on the pom-as-xml-representation
/// the main use of this is that it resolves the parent information when needed
///
#[derive(Debug)]
pub struct Artifact {
pub group: String,
pub name: String,
pub version: String,
pub path: String,
}
impl Artifact {
pub fn new(group: &str, name: &str, version: &str) -> Self {
Self {
group: group.into(),
name: name.into(),
version: version.into(),
path: format!("{}/{}/{}", group.replace(".", "/"), name, version),
}
}
pub fn is_snapshot(&self) -> bool {
self.version.ends_with("-SNAPSHOT")
}
}

View file

@ -7,6 +7,10 @@ use std::sync::LazyLock;
static PROPERTY_EXPR: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"\$\{(.+)}").unwrap());
/// Loads all poms from a given project directory.
/// A POM (project object model) is a description of the project to build written in XML.
/// It has modules which are also a pom.xml in a subdirectory of the project root
/// (nesting is in theory infinite, but in practice you'll have 2 or maybe 3 levels)
pub fn parse_project(project_dir: &Path) -> Result<Project, String> {
if !project_dir.is_dir() {
return Err(format!("{:?} is not a directory", project_dir));
@ -14,7 +18,10 @@ pub fn parse_project(project_dir: &Path) -> Result<Project, String> {
let mut pom_file = project_dir.to_path_buf();
pom_file.push(Path::new("pom.xml"));
if !pom_file.exists(){
return Err(format!("Directory {} does not contain pom.xml", project_dir.to_str().unwrap()));
}
let pom_file = fs::read_to_string(pom_file).map_err(|e| e.to_string())?;
let mut root = get_pom(pom_file).map_err(|e| e.to_string())?;
@ -22,6 +29,7 @@ pub fn parse_project(project_dir: &Path) -> Result<Project, String> {
Ok(Project { root })
}
// examines modules in pom and loads them
fn resolve_modules(project_dir: &Path, pom: &mut Pom) {
let mut modules = pom
.module_names
@ -34,6 +42,7 @@ fn resolve_modules(project_dir: &Path, pom: &mut Pom) {
pom.modules.append(&mut modules);
}
// loads module pom
fn read_module_pom(project_dir: &Path, module: &String) -> Pom {
let mut module_dir = project_dir.to_path_buf();
module_dir.push(Path::new(module));
@ -48,12 +57,15 @@ fn read_module_pom(project_dir: &Path, module: &String) -> Pom {
pom
}
//main entry to project
//the (root) pom holds the child references to modules
#[derive(Debug)]
pub struct Project {
pub root: Pom,
}
impl Project {
/// get a list of dependencies for a pom in the project
pub fn get_dependencies(&self, pom: &Pom) -> Vec<Dependency> {
pom.dependencies
.iter()
@ -68,20 +80,31 @@ impl Project {
.collect()
}
// determining a version of a dependency can be done in different ways
// 1. version element below dependency, containing the version
// 2. version element below dependency, containing a property name that is declared in the pom, or a parent which contains the version
// 3. there is no version. In that case in the pom hierarchy there must be a dependencyManagement element in which the version is set
// 4. combination of 2 and 3. This is what I typically see in enterprise software. The root pom contains a list of version properties, so all versions are kept in the same place. Takes some diligence to maintain though.
fn get_version(&self, pom: &Pom, group_id: &str, artifact_id: &str) -> Option<String> {
pom.dependencies
.iter()
// find to dependency
.find(|d| d.group_id == group_id && d.artifact_id == artifact_id)
// extract the version
.and_then(|d| d.version.clone())
// is it a property?
.and_then(|version| {
if PROPERTY_EXPR.is_match(&version) {
let property_name = &PROPERTY_EXPR.captures(&version).unwrap()[1];
// search property in project hierarchy
self.get_property(pom, property_name)
} else {
Some(version)
}
})
.or_else(|| {
// version not set, try dependencyManagement
// TODO also search super poms
pom.dependency_management
.iter()
.find(|d| d.group_id == group_id && d.artifact_id == artifact_id)
@ -97,6 +120,7 @@ impl Project {
})
}
// recursively searches a property going up the chain towards parents
fn get_property(&self, pom: &Pom, name: &str) -> Option<String> {
if pom.properties.contains_key(name) {
pom.properties.get(name).cloned()
@ -111,7 +135,24 @@ impl Project {
}
}
// look up a pom in the project
fn get_pom<'a>(&'a self, group_id: &str, artifact_id: &str) -> Option<&'a Pom> {
// inner function to match poms (by artifactId and groupId)
// (extract if needed elsewhere)
fn is_same(pom: &Pom, group_id: &str, artifact_id: &str) -> bool {
if pom.artifact_id == artifact_id {
if let Some(pom_group_id) = &pom.group_id {
pom_group_id == group_id
} else {
false
}
} else {
false
}
}
// inner function for recursion
fn get_project_pom<'a>(pom: &'a Pom, group_id: &str, artifact_id: &str) -> Option<&'a Pom> {
if is_same(pom, group_id, artifact_id) {
return Some(pom);
@ -122,18 +163,8 @@ impl Project {
}
None
}
get_project_pom(&self.root, group_id, artifact_id)
}
}
fn is_same(pom: &Pom, group_id: &str, artifact_id: &str) -> bool {
if pom.artifact_id == artifact_id {
if let Some(pom_group_id) = &pom.group_id {
pom_group_id == group_id
} else {
false
}
} else {
false
}
}

View file

@ -1,35 +0,0 @@
use log::debug;
use crate::xml::SaxHandler;
pub struct DebugHandler {}
impl SaxHandler for DebugHandler {
fn start_document(&mut self) {
debug!("start_document");
}
fn end_document(&mut self) {
debug!("end_document");
}
fn start_prefix_mapping(&mut self, prefix: &str, _uri: &str) {
debug!("start_prefix_mapping for {}", prefix);
}
fn start_element(
&mut self,
_uri: Option<String>,
local_name: &str,
_qualified_name: &str,
attributes: Vec<crate::xml::Attribute>,
) {
debug!("start_element {}, {:?}", local_name, attributes);
}
fn end_element(&mut self, _uri: Option<String>, local_name: &str, _qualified_name: &str) {
debug!("end_element {} ", local_name);
}
fn characters(&mut self, chars: &[char]) {
debug!("characters {:?}", chars.iter().collect::<String>());
}
fn error(&mut self, _error: &str) {
debug!("error");
}
}

View file

@ -1,6 +1,7 @@
use crate::xml::sax_parser::parse_string;
use crate::xml::{Attribute, SaxError, SaxHandler};
/// get a generic XML object (Document) from the xml contents. This is called DOM parsing
pub fn get_document(xml: &str) -> Result<Document, SaxError> {
let mut dom_hax_handler = DomSaxHandler::new();
parse_string(xml, Box::new(&mut dom_hax_handler))?;
@ -13,6 +14,9 @@ pub struct Document {
pub root: Node,
}
// used internally to holds usize references to children.
// needed to ward off the borrow checker
// don't ask about the name.
#[derive(Debug, Clone, PartialEq)]
struct BNode {
name: String,
@ -22,6 +26,7 @@ struct BNode {
text: Option<String>,
}
// in the end the usize references are translated to other Nodes
#[derive(Debug, Clone, PartialEq)]
pub struct Node {
pub name: String,
@ -32,13 +37,13 @@ pub struct Node {
}
impl From<&BNode> for Node {
fn from(bnode: &BNode) -> Self {
fn from(b_node: &BNode) -> Self {
Self {
name: bnode.name.clone(),
namespace: bnode.namespace.clone(),
name: b_node.name.clone(),
namespace: b_node.namespace.clone(),
children: vec![],
attributes: bnode.attributes.to_vec(),
text: bnode.text.clone(),
attributes: b_node.attributes.to_vec(),
text: b_node.text.clone(),
}
}
}
@ -58,7 +63,6 @@ impl BNode {
struct DomSaxHandler {
node_stack: Vec<usize>,
nodes: Vec<BNode>,
name: String,
}
impl DomSaxHandler {
@ -66,19 +70,18 @@ impl DomSaxHandler {
Self {
node_stack: vec![],
nodes: vec![],
name: "koe".to_string(),
}
}
fn into_doc(self) -> Document {
let bnode = &self.nodes[self.node_stack[0]];
let node = self.to_node(bnode);
let b_node = &self.nodes[self.node_stack[0]];
let node = self.to_node(b_node);
Document { root: node }
}
fn to_node(&self, bnode: &BNode) -> Node {
let mut node: Node = bnode.into();
for child_index in &bnode.children {
fn to_node(&self, b_node: &BNode) -> Node {
let mut node: Node = b_node.into();
for child_index in &b_node.children {
let child = self.nodes.get(*child_index).unwrap();
node.children.push(self.to_node(child));
}

View file

@ -1,5 +1,4 @@
pub mod sax_parser;
mod debug;
pub mod dom_parser;
#[derive(Debug,Clone,PartialEq)]
@ -9,15 +8,6 @@ pub struct Attribute {
pub value: String,
}
enum SaxEvent{
StartDocument,
EndDocument,
StartElement(Option<String>, String, String, Vec<Attribute>),
EndElement(Option<String>, String),
Characters(String),
Error(String)
}
pub trait SaxHandler {
fn start_document(&mut self);
fn end_document(&mut self);
@ -36,6 +26,8 @@ pub trait SaxHandler {
use std::fmt;
/// Custom error for XML situations
// likely incomplete
#[derive(Debug, PartialEq)]
pub enum SaxError {
BadCharacter,

View file

@ -1,11 +1,24 @@
use crate::xml::{Attribute, SaxError, SaxHandler};
use std::collections::HashMap;
// So I decided to model it after java SAX api, which was a bad choice
// it defines a trait, like the java SAXHandler interface
// The rusty way to do it would be using a SAXEvent enum with different variants where there are now trait methods.
// That would also imply that you go from push (current) to a pull parser, which gives you more control. But hey, who needs that?
/// Parses an xml as string and call the SAXHandler functions accordingly
// no streaming? Nah. I could do that, but I didn't feel the need, because maven pom.xmls should not be of gigbabyte size
// It's not a lot of code, but it handles my pom files well, so far. I have see implementations (xml_oxide) that are way bigger.
// It's a basic recursive descent parser.
// It handles namespaces (and prefixes) correctly AFAIK
// No validation (DTD/XSD)
// I probably missed some other XML functionality that I don't think I need.
pub fn parse_string(xml: &str, handler: Box<&mut dyn SaxHandler>) -> Result<(), SaxError> {
SAXParser::new(xml, handler).parse()
}
pub struct SAXParser<'a> {
// struct containing the parser state
struct SAXParser<'a> {
xml: Vec<char>,
handler: Box<&'a mut dyn SaxHandler>,
position: usize,
@ -16,7 +29,8 @@ pub struct SAXParser<'a> {
}
impl<'a> SAXParser<'a> {
pub fn new(xml: &str, handler: Box<&'a mut dyn SaxHandler>) -> Self {
///
fn new(xml: &str, handler: Box<&'a mut dyn SaxHandler>) -> Self {
Self {
xml: xml.chars().collect(),
handler,