From 011075d5cee3dfc4fe0209ed51f6c9b24515fcca Mon Sep 17 00:00:00 2001 From: Shautvast Date: Fri, 13 Mar 2026 10:52:43 +0100 Subject: [PATCH] draft --- .gitignore | 36 ++ .../github/shautvast/xmldiff/DiffEngine.java | 47 +++ .../github/shautvast/xmldiff/DiffResult.java | 3 + .../github/shautvast/xmldiff/HtmlBuilder.java | 59 +++ .../shautvast/xmldiff/HtmlRenderer.java | 348 ++++++++++++++++++ .../com/github/shautvast/xmldiff/XmlDiff.java | 61 +++ .../shautvast/xmldiff/XmlDiffException.java | 7 + .../github/shautvast/xmldiff/XmlDiffTest.java | 169 +++++++++ xmldiff.md | 143 +++++++ 9 files changed, 873 insertions(+) create mode 100644 .gitignore create mode 100644 src/main/java/com/github/shautvast/xmldiff/DiffEngine.java create mode 100644 src/main/java/com/github/shautvast/xmldiff/DiffResult.java create mode 100644 src/main/java/com/github/shautvast/xmldiff/HtmlBuilder.java create mode 100644 src/main/java/com/github/shautvast/xmldiff/HtmlRenderer.java create mode 100644 src/main/java/com/github/shautvast/xmldiff/XmlDiff.java create mode 100644 src/main/java/com/github/shautvast/xmldiff/XmlDiffException.java create mode 100644 src/test/java/com/github/shautvast/xmldiff/XmlDiffTest.java create mode 100644 xmldiff.md diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c7ef23f --- /dev/null +++ b/.gitignore @@ -0,0 +1,36 @@ +target/ +!.mvn/wrapper/maven-wrapper.jar +!**/src/main/**/target/ +!**/src/test/**/target/ +.kotlin + +### IntelliJ IDEA ### +.idea/ +*.iws +*.iml +*.ipr + +### Eclipse ### +.apt_generated +.classpath +.factorypath +.project +.settings +.springBeans +.sts4-cache + +### NetBeans ### +/nbproject/private/ +/nbbuild/ +/dist/ +/nbdist/ +/.nb-gradle/ +build/ +!**/src/main/**/build/ +!**/src/test/**/build/ + +### VS Code ### +.vscode/ + +### Mac OS ### +.DS_Store \ No newline at end of file diff --git a/src/main/java/com/github/shautvast/xmldiff/DiffEngine.java b/src/main/java/com/github/shautvast/xmldiff/DiffEngine.java new file mode 100644 index 0000000..8c620a2 --- /dev/null +++ b/src/main/java/com/github/shautvast/xmldiff/DiffEngine.java @@ -0,0 +1,47 @@ +package com.github.shautvast.xmldiff; + +import org.xmlunit.builder.DiffBuilder; +import org.xmlunit.diff.*; + +import java.util.HashMap; +import java.util.Map; + +/** + * Runs XMLUnit's {@link DiffBuilder} and indexes the resulting differences into two + * XPath → {@link ComparisonType} maps (one per side). + * + *

Children are matched by element name ({@link ElementSelectors#byName}), so: + *

+ */ +class DiffEngine { + + private DiffEngine() {} + + record DiffMaps(Map left, Map right) {} + + static DiffMaps compute(String leftXml, String rightXml) { + Diff diff = DiffBuilder.compare(leftXml) + .withTest(rightXml) + .withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byName)) + .ignoreWhitespace() + .build(); + + Map left = new HashMap<>(); + Map right = new HashMap<>(); + + for (Difference d : diff.getDifferences()) { + Comparison c = d.getComparison(); + String lxp = c.getControlDetails().getXPath(); + String rxp = c.getTestDetails().getXPath(); + if (lxp != null) left.put(lxp, c.getType()); + if (rxp != null) right.put(rxp, c.getType()); + } + + return new DiffMaps(left, right); + } +} diff --git a/src/main/java/com/github/shautvast/xmldiff/DiffResult.java b/src/main/java/com/github/shautvast/xmldiff/DiffResult.java new file mode 100644 index 0000000..8bb2ca2 --- /dev/null +++ b/src/main/java/com/github/shautvast/xmldiff/DiffResult.java @@ -0,0 +1,3 @@ +package com.github.shautvast.xmldiff; + +public record DiffResult(String leftHtml, String rightHtml) {} diff --git a/src/main/java/com/github/shautvast/xmldiff/HtmlBuilder.java b/src/main/java/com/github/shautvast/xmldiff/HtmlBuilder.java new file mode 100644 index 0000000..d8d99ff --- /dev/null +++ b/src/main/java/com/github/shautvast/xmldiff/HtmlBuilder.java @@ -0,0 +1,59 @@ +package com.github.shautvast.xmldiff; + +/** + * Builds an HTML string of nested spans. + * Consecutive spans with the same CSS class are merged for cleaner output. + */ +class HtmlBuilder { + + private final StringBuilder sb = new StringBuilder(); + private String currentClass = null; + private final StringBuilder currentContent = new StringBuilder(); + + void span(String cssClass, String content) { + if (cssClass.equals(currentClass)) { + currentContent.append(escape(content)); + } else { + flush(); + currentClass = cssClass; + currentContent.append(escape(content)); + } + } + + /** Emits a newline + indent, always in a neutral span. */ + void newline(int indent) { + if (!"neutral".equals(currentClass)) { + flush(); + currentClass = "neutral"; + } + currentContent.append("\n").append(" ".repeat(indent)); + } + + /** Emits a bare empty span (placeholder for a node absent on this side). */ + void emptySpan() { + flush(); + sb.append(""); + } + + String build() { + flush(); + return sb.toString(); + } + + private void flush() { + if (currentClass != null && !currentContent.isEmpty()) { + sb.append(""); + sb.append(currentContent); + sb.append(""); + } + currentContent.setLength(0); + currentClass = null; + } + + static String escape(String s) { + return s.replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace("\"", """); + } +} diff --git a/src/main/java/com/github/shautvast/xmldiff/HtmlRenderer.java b/src/main/java/com/github/shautvast/xmldiff/HtmlRenderer.java new file mode 100644 index 0000000..456fd31 --- /dev/null +++ b/src/main/java/com/github/shautvast/xmldiff/HtmlRenderer.java @@ -0,0 +1,348 @@ +package com.github.shautvast.xmldiff; + +import org.w3c.dom.*; +import org.xmlunit.diff.ComparisonType; + +import java.util.*; + +/** + * Walks two DOM trees in parallel and writes annotated HTML to two {@link HtmlBuilder}s, + * guided by XMLUnit diff maps. + * + *

What XMLUnit drives

+ *
    + *
  • {@code CHILD_LOOKUP} — identifies which children have no name-match on the other side.
  • + *
  • {@code TEXT_VALUE} — drives correct/wrong on paired text nodes.
  • + *
  • {@code ATTR_VALUE} — drives correct/wrong on attribute values when names match.
  • + *
+ * + *

Child matching

+ *
    + *
  1. XMLUnit {@code CHILD_LOOKUP} identifies structurally unmatched children.
  2. + *
  3. Those are paired positionally (so {@code } vs {@code } renders as a + * tag-name diff with {@code skipped} children, not as two independent missing nodes).
  4. + *
  5. Remaining children are paired by element name, mirroring XMLUnit's own strategy.
  6. + *
+ * + *

Attributes

+ * Matched by name (order is not significant). Values are compared via the XMLUnit diff maps. + */ +class HtmlRenderer { + + private final HtmlBuilder left; + private final HtmlBuilder right; + private final Map leftDiffs; + private final Map rightDiffs; + + HtmlRenderer(HtmlBuilder left, HtmlBuilder right, + Map leftDiffs, + Map rightDiffs) { + this.left = left; + this.right = right; + this.leftDiffs = leftDiffs; + this.rightDiffs = rightDiffs; + } + + void render(Element leftEl, Element rightEl) { + String lxp = "/" + leftEl.getTagName() + "[1]"; + String rxp = "/" + rightEl.getTagName() + "[1]"; + renderElement(leftEl, rightEl, lxp, rxp, 0); + } + + // ------------------------------------------------------------------------- + // Element + // ------------------------------------------------------------------------- + + private void renderElement(Element leftEl, Element rightEl, + String lxp, String rxp, int indent) { + if (!leftEl.getTagName().equals(rightEl.getTagName())) { + renderTagNameDiff(leftEl, rightEl, indent); + return; + } + + String tag = leftEl.getTagName(); + left.span("neutral", "<" + tag); + right.span("neutral", "<" + tag); + + compareAttributes(leftEl, rightEl, lxp, rxp); + + List leftChildren = significantChildren(leftEl); + List rightChildren = significantChildren(rightEl); + + if (leftChildren.isEmpty() && rightChildren.isEmpty()) { + left.span("neutral", "/>"); + right.span("neutral", "/>"); + return; + } + + left.span("neutral", ">"); + right.span("neutral", ">"); + + compareChildren(leftChildren, rightChildren, lxp, rxp, indent + 1); + + left.newline(indent); + right.newline(indent); + left.span("neutral", ""); + right.span("neutral", ""); + } + + /** Tag names differ: name → correct/wrong, all content → skipped. */ + private void renderTagNameDiff(Element leftEl, Element rightEl, int indent) { + left.span("correct", "<" + leftEl.getTagName()); + renderAttrsAsClass(left, leftEl, "skipped"); + renderBodyAsSkipped(left, leftEl, indent); + + right.span("wrong", "<" + rightEl.getTagName()); + renderAttrsAsClass(right, rightEl, "skipped"); + renderBodyAsSkipped(right, rightEl, indent); + } + + private void renderBodyAsSkipped(HtmlBuilder builder, Element el, int indent) { + List children = significantChildren(el); + if (children.isEmpty()) { + builder.span("skipped", "/>"); + } else { + builder.span("skipped", ">"); + for (Node child : children) { + builder.newline(indent + 1); + renderSubtree(builder, child, "skipped", indent + 1); + } + builder.newline(indent); + builder.span("skipped", ""); + } + } + + // ------------------------------------------------------------------------- + // Attributes — matched by name; values compared via XMLUnit diff maps + // ------------------------------------------------------------------------- + + private void compareAttributes(Element leftEl, Element rightEl, String lxp, String rxp) { + Map leftAttrs = attrMap(leftEl); + Map rightAttrs = attrMap(rightEl); + + for (Map.Entry e : leftAttrs.entrySet()) { + String name = e.getKey(); + String leftVal = e.getValue(); + String rightVal = rightAttrs.get(name); + + if (rightVal == null) { + left.span("correct", " " + name + "=\"" + leftVal + "\""); + right.emptySpan(); + } else { + boolean valueDiff = leftDiffs.get(lxp + "/@" + name) == ComparisonType.ATTR_VALUE + || rightDiffs.get(rxp + "/@" + name) == ComparisonType.ATTR_VALUE; + if (valueDiff) { + left.span("neutral", " " + name + "=\""); + left.span("correct", leftVal); + left.span("neutral", "\""); + right.span("neutral", " " + name + "=\""); + right.span("wrong", rightVal); + right.span("neutral", "\""); + } else { + left.span("neutral", " " + name + "=\"" + leftVal + "\""); + right.span("neutral", " " + name + "=\"" + rightVal + "\""); + } + } + } + + for (Map.Entry e : rightAttrs.entrySet()) { + if (!leftAttrs.containsKey(e.getKey())) { + left.emptySpan(); + right.span("wrong", " " + e.getKey() + "=\"" + e.getValue() + "\""); + } + } + } + + // ------------------------------------------------------------------------- + // Children — paired using XMLUnit CHILD_LOOKUP + positional fallback + // ------------------------------------------------------------------------- + + private void compareChildren(List leftChildren, List rightChildren, + String lParentXPath, String rParentXPath, int indent) { + List lxps = childXPaths(leftChildren, lParentXPath); + List rxps = childXPaths(rightChildren, rParentXPath); + + // Step 1: identify structurally unmatched children per XMLUnit + List leftUnmatched = new ArrayList<>(); + List rightUnmatched = new ArrayList<>(); + for (int i = 0; i < leftChildren.size(); i++) { + if (leftDiffs.get(lxps.get(i)) == ComparisonType.CHILD_LOOKUP) + leftUnmatched.add(i); + } + for (int i = 0; i < rightChildren.size(); i++) { + if (rightDiffs.get(rxps.get(i)) == ComparisonType.CHILD_LOOKUP) + rightUnmatched.add(i); + } + + // Step 2: pair unmatched children positionally so differently-named siblings + // render as a tag-name diff (with skipped children) rather than independent missing nodes + Map leftToRight = new LinkedHashMap<>(); + Set matchedRight = new LinkedHashSet<>(); + int positional = Math.min(leftUnmatched.size(), rightUnmatched.size()); + for (int i = 0; i < positional; i++) { + leftToRight.put(leftUnmatched.get(i), rightUnmatched.get(i)); + matchedRight.add(rightUnmatched.get(i)); + } + + // Step 3: name-based matching for non-CHILD_LOOKUP element children + Set leftUnmatchedSet = new HashSet<>(leftUnmatched); + Set rightUnmatchedSet = new HashSet<>(rightUnmatched); + for (int li = 0; li < leftChildren.size(); li++) { + if (leftUnmatchedSet.contains(li) || leftToRight.containsKey(li)) continue; + if (!(leftChildren.get(li) instanceof Element le)) continue; + for (int ri = 0; ri < rightChildren.size(); ri++) { + if (rightUnmatchedSet.contains(ri) || matchedRight.contains(ri)) continue; + if (rightChildren.get(ri) instanceof Element re + && re.getTagName().equals(le.getTagName())) { + leftToRight.put(li, ri); + matchedRight.add(ri); + break; + } + } + } + + // Step 4: positional fallback for any remaining unmatched nodes (e.g. text) + List stillLeft = new ArrayList<>(), stillRight = new ArrayList<>(); + for (int li = 0; li < leftChildren.size(); li++) + if (!leftToRight.containsKey(li)) stillLeft.add(li); + for (int ri = 0; ri < rightChildren.size(); ri++) + if (!matchedRight.contains(ri)) stillRight.add(ri); + int extra = Math.min(stillLeft.size(), stillRight.size()); + for (int i = 0; i < extra; i++) { + leftToRight.put(stillLeft.get(i), stillRight.get(i)); + matchedRight.add(stillRight.get(i)); + } + + // Step 5: render left children in document order + Set renderedRight = new LinkedHashSet<>(); + for (int li = 0; li < leftChildren.size(); li++) { + left.newline(indent); + right.newline(indent); + Integer ri = leftToRight.get(li); + if (ri != null) { + renderNode(leftChildren.get(li), rightChildren.get(ri), + lxps.get(li), rxps.get(ri), indent); + renderedRight.add(ri); + } else { + renderSubtree(left, leftChildren.get(li), "correct", indent); + right.emptySpan(); + } + } + + // Step 6: render truly right-only children + for (int ri = 0; ri < rightChildren.size(); ri++) { + if (!renderedRight.contains(ri)) { + left.newline(indent); + right.newline(indent); + left.emptySpan(); + renderSubtree(right, rightChildren.get(ri), "wrong", indent); + } + } + } + + private void renderNode(Node ln, Node rn, String lxp, String rxp, int indent) { + if (ln instanceof Element le && rn instanceof Element re) { + renderElement(le, re, lxp, rxp, indent); + } else if (ln instanceof Text lt && rn instanceof Text rt) { + renderText(lt, rt, lxp, rxp); + } else { + renderSubtree(left, ln, "correct", indent); + renderSubtree(right, rn, "wrong", indent); + } + } + + // ------------------------------------------------------------------------- + // Text — driven by XMLUnit TEXT_VALUE + // ------------------------------------------------------------------------- + + private void renderText(Text lt, Text rt, String lxp, String rxp) { + boolean differs = leftDiffs.get(lxp) == ComparisonType.TEXT_VALUE + || rightDiffs.get(rxp) == ComparisonType.TEXT_VALUE; + String lc = lt.getTextContent().strip(); + String rc = rt.getTextContent().strip(); + if (differs) { + left.span("correct", lc); + right.span("wrong", rc); + } else { + left.span("neutral", lc); + right.span("neutral", rc); + } + } + + // ------------------------------------------------------------------------- + // Single-side subtree (one CSS class for all tokens) + // ------------------------------------------------------------------------- + + private void renderSubtree(HtmlBuilder builder, Node node, String cssClass, int indent) { + if (node instanceof Element el) { + builder.span(cssClass, "<" + el.getTagName()); + renderAttrsAsClass(builder, el, cssClass); + List children = significantChildren(el); + if (children.isEmpty()) { + builder.span(cssClass, "/>"); + } else { + builder.span(cssClass, ">"); + for (Node child : children) { + builder.newline(indent + 1); + renderSubtree(builder, child, cssClass, indent + 1); + } + builder.newline(indent); + builder.span(cssClass, ""); + } + } else if (node instanceof Text text) { + String content = text.getTextContent().strip(); + if (!content.isEmpty()) builder.span(cssClass, content); + } + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + private void renderAttrsAsClass(HtmlBuilder builder, Element el, String cssClass) { + attrMap(el).forEach((name, value) -> + builder.span(cssClass, " " + name + "=\"" + value + "\"")); + } + + private Map attrMap(Element el) { + NamedNodeMap nnm = el.getAttributes(); + Map map = new LinkedHashMap<>(); + for (int i = 0; i < nnm.getLength(); i++) { + Attr a = (Attr) nnm.item(i); + map.put(a.getName(), a.getValue()); + } + return map; + } + + private List significantChildren(Element el) { + NodeList nl = el.getChildNodes(); + List result = new ArrayList<>(); + for (int i = 0; i < nl.getLength(); i++) { + Node n = nl.item(i); + if (n.getNodeType() == Node.ELEMENT_NODE) { + result.add(n); + } else if (n.getNodeType() == Node.TEXT_NODE + && !n.getTextContent().strip().isEmpty()) { + result.add(n); + } + } + return result; + } + + /** Generates XMLUnit-compatible XPaths for a list of child nodes. */ + private List childXPaths(List children, String parentXPath) { + Map elementCounts = new LinkedHashMap<>(); + int textCount = 0; + List result = new ArrayList<>(children.size()); + for (Node child : children) { + if (child.getNodeType() == Node.ELEMENT_NODE) { + String tag = ((Element) child).getTagName(); + int n = elementCounts.merge(tag, 1, Integer::sum); + result.add(parentXPath + "/" + tag + "[" + n + "]"); + } else { + result.add(parentXPath + "/text()[" + (++textCount) + "]"); + } + } + return result; + } +} diff --git a/src/main/java/com/github/shautvast/xmldiff/XmlDiff.java b/src/main/java/com/github/shautvast/xmldiff/XmlDiff.java new file mode 100644 index 0000000..7a382a5 --- /dev/null +++ b/src/main/java/com/github/shautvast/xmldiff/XmlDiff.java @@ -0,0 +1,61 @@ +package com.github.shautvast.xmldiff; + +import org.w3c.dom.Document; +import org.xml.sax.InputSource; + +import javax.xml.parsers.DocumentBuilderFactory; +import java.io.StringReader; + +/** + * Public API for XML diffing. + * + *

Takes two XML strings (left = expected, right = actual) and returns two HTML strings + * representing a side-by-side diff. Each output is a tree of {@code } elements + * annotated with CSS classes: + *

    + *
  • {@code neutral} — token is identical on both sides
  • + *
  • {@code correct} — token is on the left (expected) side and differs
  • + *
  • {@code wrong} — token is on the right (actual) side and differs
  • + *
  • {@code skipped} — child content of an element whose tag name differs
  • + *
+ * + *

Structural diffing is performed by XMLUnit 2 + * using name-based child matching. Attribute order is not significant. + */ +public class XmlDiff { + + private XmlDiff() {} + + /** + * Compares two XML strings and returns annotated HTML for both sides. + * + * @param leftXml the expected XML + * @param rightXml the actual XML + * @return a {@link DiffResult} containing the left and right HTML strings + * @throws XmlDiffException if either string cannot be parsed as XML + */ + public static DiffResult compare(String leftXml, String rightXml) { + DiffEngine.DiffMaps maps = DiffEngine.compute(leftXml, rightXml); + + Document leftDoc = parse(leftXml); + Document rightDoc = parse(rightXml); + + HtmlBuilder leftBuilder = new HtmlBuilder(); + HtmlBuilder rightBuilder = new HtmlBuilder(); + + new HtmlRenderer(leftBuilder, rightBuilder, maps.left(), maps.right()) + .render(leftDoc.getDocumentElement(), rightDoc.getDocumentElement()); + + return new DiffResult(leftBuilder.build(), rightBuilder.build()); + } + + private static Document parse(String xml) { + try { + return DocumentBuilderFactory.newInstance() + .newDocumentBuilder() + .parse(new InputSource(new StringReader(xml))); + } catch (Exception e) { + throw new XmlDiffException("Failed to parse XML: " + e.getMessage(), e); + } + } +} diff --git a/src/main/java/com/github/shautvast/xmldiff/XmlDiffException.java b/src/main/java/com/github/shautvast/xmldiff/XmlDiffException.java new file mode 100644 index 0000000..db8d536 --- /dev/null +++ b/src/main/java/com/github/shautvast/xmldiff/XmlDiffException.java @@ -0,0 +1,7 @@ +package com.github.shautvast.xmldiff; + +public class XmlDiffException extends RuntimeException { + public XmlDiffException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/src/test/java/com/github/shautvast/xmldiff/XmlDiffTest.java b/src/test/java/com/github/shautvast/xmldiff/XmlDiffTest.java new file mode 100644 index 0000000..b065488 --- /dev/null +++ b/src/test/java/com/github/shautvast/xmldiff/XmlDiffTest.java @@ -0,0 +1,169 @@ +package com.github.shautvast.xmldiff; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +class XmlDiffTest { + + // ------------------------------------------------------------------------- + // 1. Identical elements — everything neutral + // ------------------------------------------------------------------------- + @Test + void identicalSimple_allNeutral() { + assertNoDiff(XmlDiff.compare("", "")); + } + + @Test + void identicalWithText_allNeutral() { + assertNoDiff(XmlDiff.compare("hello", "hello")); + } + + @Test + void identicalNested_allNeutral() { + String xml = "text"; + assertNoDiff(XmlDiff.compare(xml, xml)); + } + + // ------------------------------------------------------------------------- + // 2. Differing text content + // ------------------------------------------------------------------------- + @Test + void differingText() { + DiffResult r = XmlDiff.compare("expected", "actual"); + assertContains(r.leftHtml(), "correct", "expected"); + assertContains(r.rightHtml(), "wrong", "actual"); + assertAbsent(r.leftHtml(), "wrong"); + assertAbsent(r.rightHtml(), "correct"); + } + + // ------------------------------------------------------------------------- + // 3. Differing attribute value — name neutral, value correct/wrong + // ------------------------------------------------------------------------- + @Test + void differingAttributeValue() { + DiffResult r = XmlDiff.compare( + "", + ""); + + assertContains(r.leftHtml(), "correct", "expected"); + assertContains(r.rightHtml(), "wrong", "actual"); + // The attribute name itself must not be marked correct/wrong + assertFalse(r.leftHtml().contains("attr"), + "attr name should not be marked correct"); + assertFalse(r.rightHtml().contains("attr"), + "attr name should not be marked wrong"); + } + + // ------------------------------------------------------------------------- + // 4. Attribute only on one side + // ------------------------------------------------------------------------- + @Test + void attributeOnlyOnLeft() { + DiffResult r = XmlDiff.compare("", ""); + assertContains(r.leftHtml(), "correct", "x"); + assertTrue(r.rightHtml().contains(""), "right should have placeholder"); + } + + @Test + void attributeOnlyOnRight() { + DiffResult r = XmlDiff.compare("", ""); + assertTrue(r.leftHtml().contains(""), "left should have placeholder"); + assertContains(r.rightHtml(), "wrong", "x"); + } + + // ------------------------------------------------------------------------- + // 5. Differing element name — tag correct/wrong, children skipped + // ------------------------------------------------------------------------- + @Test + void differingElementName() { + DiffResult r = XmlDiff.compare( + "text", + "text"); + + assertContains(r.leftHtml(), "correct", "expected"); + assertContains(r.rightHtml(), "wrong", "actual"); + assertTrue(r.leftHtml().contains("class=\"skipped\""), "left children should be skipped"); + assertTrue(r.rightHtml().contains("class=\"skipped\""), "right children should be skipped"); + } + + // ------------------------------------------------------------------------- + // 6. Extra child on left + // ------------------------------------------------------------------------- + @Test + void extraChildOnLeft() { + DiffResult r = XmlDiff.compare("", ""); + assertContains(r.leftHtml(), "correct", "child"); + assertTrue(r.rightHtml().contains(""), "right should have placeholder"); + assertAbsent(r.rightHtml(), "correct"); + } + + // ------------------------------------------------------------------------- + // 7. Extra child on right + // ------------------------------------------------------------------------- + @Test + void extraChildOnRight() { + DiffResult r = XmlDiff.compare("", ""); + assertTrue(r.leftHtml().contains(""), "left should have placeholder"); + assertContains(r.rightHtml(), "wrong", "child"); + assertAbsent(r.leftHtml(), "wrong"); + } + + // ------------------------------------------------------------------------- + // 8. Attribute order does not matter + // ------------------------------------------------------------------------- + @Test + void attributeOrderDoesNotMatter() { + DiffResult r = XmlDiff.compare( + "", + ""); + assertNoDiff(r); + } + + // ------------------------------------------------------------------------- + // 9. Nested elements — only the differing subtree is marked + // ------------------------------------------------------------------------- + @Test + void nestedPartialDiff() { + DiffResult r = XmlDiff.compare( + "expected", + "actual"); + + assertTrue(r.leftHtml().contains("class=\"neutral\""), "unchanged parts should be neutral"); + assertContains(r.leftHtml(), "correct", "expected"); + assertContains(r.rightHtml(), "wrong", "actual"); + } + + // ------------------------------------------------------------------------- + // 10. Self-closing element, no diff + // ------------------------------------------------------------------------- + @Test + void selfClosingNoDiff() { + assertNoDiff(XmlDiff.compare("", "")); + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + private void assertNoDiff(DiffResult r) { + assertAbsent(r.leftHtml(), "correct"); + assertAbsent(r.leftHtml(), "wrong"); + assertAbsent(r.leftHtml(), "skipped"); + assertAbsent(r.rightHtml(), "correct"); + assertAbsent(r.rightHtml(), "wrong"); + assertAbsent(r.rightHtml(), "skipped"); + } + + private void assertContains(String html, String cssClass, String text) { + assertTrue(html.contains("class=\"" + cssClass + "\""), + "expected class=" + cssClass + " in: " + html); + assertTrue(html.contains(text), + "expected text '" + text + "' in: " + html); + } + + private void assertAbsent(String html, String cssClass) { + assertFalse(html.contains("class=\"" + cssClass + "\""), + "unexpected class=" + cssClass + " in: " + html); + } +} diff --git a/xmldiff.md b/xmldiff.md new file mode 100644 index 0000000..b3944d8 --- /dev/null +++ b/xmldiff.md @@ -0,0 +1,143 @@ +# xmldiff — Implementation Plan + +## Goal + +A Java library that takes two XML strings (left = expected, right = actual) and produces two HTML strings suitable for rendering a side-by-side diff. Each output is a `` tree with inner spans annotated with CSS classes. + +## CSS Classes + +| Class | Meaning | +|------------|-----------------------------------------------------------| +| `neutral` | This token is identical in both sides | +| `correct` | This token is on the **left** side and differs from right | +| `wrong` | This token is on the **right** side and differs from left | +| `skipped` | Child content of an element whose **tag name** differs | + +## Diff Granularity Rules + +| Token | If equal | If different | +|-------------------------------|------------|--------------------------------------------------------------------------------------| +| Element name | `neutral` | Left → `correct`, right → `wrong`; all content (attrs, children, text) → `skipped` | +| Attribute name | `neutral` | Left attr name → `correct`, right attr name → `wrong` | +| Attribute value | `neutral` | Left attr name neutral, left value → `correct`; same on right → `wrong` | +| Text content | `neutral` | Left text → `correct`, right text → `wrong` | +| Element present only on left | — | Left subtree → `correct`, right → empty `` | +| Element present only on right | — | Right subtree → `wrong`, left → empty `` | + +Attribute **order is not significant**: + +## Output Format + +Each output string is pretty-printed HTML. XML special characters (`<`, `>`, `&`, `"`) inside span text are HTML-escaped. Indentation uses 2 spaces per level. Output does **not** include an XML declaration. + +Example shape: + +```html +<root> + <child attr="value"> + text here + </child> +</root> +``` + +## Dependencies + +```xml + + + org.xmlunit + xmlunit-core + 2.10.0 + + + + + org.junit.jupiter + junit-jupiter + 5.11.0 + test + +``` + +**XMLUnit 2.x** is the diffing engine. It produces a list of `Comparison` objects, each with: +- `getType()` — `ComparisonType` enum: `ELEMENT_TAG_NAME`, `ATTR_VALUE`, `ATTR_NAME_LOOKUP`, `TEXT_VALUE`, `CHILD_NODELIST_LENGTH`, `HAS_CHILD_NODES`, etc. +- `getControlDetails().getXPath()` — XPath of the affected node on the left side +- `getTestDetails().getXPath()` — XPath of the affected node on the right side + +## Algorithm + +### Step 1 — Diff (DiffEngine) + +``` +Diff diff = DiffBuilder + .compare(leftXml) + .withTest(rightXml) + .withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byName)) + .ignoreWhitespace() + .build(); + +For each Comparison c in diff.getDifferences(): + record (c.getControlDetails().getXPath(), c.getTestDetails().getXPath(), c.getType()) + into two maps: leftDiffs: XPath → ComparisonType + rightDiffs: XPath → ComparisonType +``` + +### Step 2 — Render (HtmlRenderer) + +Walk each DOM tree independently, pretty-printing to HTML. At each node, look up its XPath in the relevant diff map to determine its CSS class. + +**Element node:** +``` +xp = xpathOf(node) +if leftDiffs contains xp with type ELEMENT_TAG_NAME: + emit tag name as correct/wrong + emit all attributes + children recursively as skipped +else: + emit tag name as neutral + for each attribute (in document order): + emit based on attr-level diff lookup + recurse into children +``` + +**Text node:** +``` +xp = xpathOf(node) +if leftDiffs/rightDiffs contains xp with type TEXT_VALUE: + emit as correct / wrong +else: + emit as neutral +``` + +**Missing child (CHILD_NODELIST_LENGTH or similar):** +``` +emit present side as correct/wrong +emit absent side as empty +``` + +XPaths are computed from the DOM tree as each node is visited, matching the XPaths that XMLUnit generates (e.g. `/root[1]/child[1]`). + +### Step 3 — Output + +`XmlDiff.compare()` calls `DiffEngine`, then calls `HtmlRenderer` once for the left tree and once for the right tree, returning a `DiffResult`. + +## Test Cases + +| # | Scenario | Left class | Right class | +|---|---------------------------------------|------------|-------------| +| 1 | Identical simple elements | all `neutral` | all `neutral` | +| 2 | Differing text content | text `correct` | text `wrong` | +| 3 | Differing attribute value | value `correct` | value `wrong` (name neutral) | +| 4 | Differing attribute name | name `correct` | name `wrong` | +| 5 | Differing element name | name `correct`, children `skipped` | name `wrong`, children `skipped` | +| 6 | Extra child on left only | child `correct` | empty span | +| 7 | Extra child on right only | empty span | child `wrong` | +| 8 | Attribute order differs | first mismatch `correct` | first mismatch `wrong` | +| 9 | Nested elements, partial diff | only differing subtree marked | same | +| 10| Self-closing element, no diff | all `neutral` | all `neutral` | + +## Assumptions + +- Comments, processing instructions, and CDATA sections are ignored. +- Whitespace-only text nodes between elements are ignored (XMLUnit `ignoreWhitespace()`). +- Namespace prefixes are treated as plain text; no namespace-aware comparison. +- The library is stateless; `XmlDiff.compare()` is safe to call concurrently. \ No newline at end of file