This commit is contained in:
Shautvast 2026-03-13 10:52:43 +01:00
commit 011075d5ce
9 changed files with 873 additions and 0 deletions

36
.gitignore vendored Normal file
View file

@ -0,0 +1,36 @@
target/
!.mvn/wrapper/maven-wrapper.jar
!**/src/main/**/target/
!**/src/test/**/target/
.kotlin
### IntelliJ IDEA ###
.idea/
*.iws
*.iml
*.ipr
### Eclipse ###
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
### NetBeans ###
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
build/
!**/src/main/**/build/
!**/src/test/**/build/
### VS Code ###
.vscode/
### Mac OS ###
.DS_Store

View file

@ -0,0 +1,47 @@
package com.github.shautvast.xmldiff;
import org.xmlunit.builder.DiffBuilder;
import org.xmlunit.diff.*;
import java.util.HashMap;
import java.util.Map;
/**
* Runs XMLUnit's {@link DiffBuilder} and indexes the resulting differences into two
* XPath {@link ComparisonType} maps (one per side).
*
* <p>Children are matched by element name ({@link ElementSelectors#byName}), so:
* <ul>
* <li>{@link ComparisonType#CHILD_LOOKUP} a child has no name-match on the other side</li>
* <li>{@link ComparisonType#TEXT_VALUE} paired text nodes differ</li>
* <li>{@link ComparisonType#ATTR_VALUE} a named attribute's value differs</li>
* <li>{@link ComparisonType#ATTR_NAME_LOOKUP} an attribute exists only on one side</li>
* </ul>
*/
class DiffEngine {
private DiffEngine() {}
record DiffMaps(Map<String, ComparisonType> left, Map<String, ComparisonType> right) {}
static DiffMaps compute(String leftXml, String rightXml) {
Diff diff = DiffBuilder.compare(leftXml)
.withTest(rightXml)
.withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byName))
.ignoreWhitespace()
.build();
Map<String, ComparisonType> left = new HashMap<>();
Map<String, ComparisonType> right = new HashMap<>();
for (Difference d : diff.getDifferences()) {
Comparison c = d.getComparison();
String lxp = c.getControlDetails().getXPath();
String rxp = c.getTestDetails().getXPath();
if (lxp != null) left.put(lxp, c.getType());
if (rxp != null) right.put(rxp, c.getType());
}
return new DiffMaps(left, right);
}
}

View file

@ -0,0 +1,3 @@
package com.github.shautvast.xmldiff;
public record DiffResult(String leftHtml, String rightHtml) {}

View file

@ -0,0 +1,59 @@
package com.github.shautvast.xmldiff;
/**
* Builds an HTML string of nested spans.
* Consecutive spans with the same CSS class are merged for cleaner output.
*/
class HtmlBuilder {
private final StringBuilder sb = new StringBuilder();
private String currentClass = null;
private final StringBuilder currentContent = new StringBuilder();
void span(String cssClass, String content) {
if (cssClass.equals(currentClass)) {
currentContent.append(escape(content));
} else {
flush();
currentClass = cssClass;
currentContent.append(escape(content));
}
}
/** Emits a newline + indent, always in a neutral span. */
void newline(int indent) {
if (!"neutral".equals(currentClass)) {
flush();
currentClass = "neutral";
}
currentContent.append("\n").append(" ".repeat(indent));
}
/** Emits a bare empty span (placeholder for a node absent on this side). */
void emptySpan() {
flush();
sb.append("<span></span>");
}
String build() {
flush();
return sb.toString();
}
private void flush() {
if (currentClass != null && !currentContent.isEmpty()) {
sb.append("<span class=\"").append(currentClass).append("\">");
sb.append(currentContent);
sb.append("</span>");
}
currentContent.setLength(0);
currentClass = null;
}
static String escape(String s) {
return s.replace("&", "&amp;")
.replace("<", "&lt;")
.replace(">", "&gt;")
.replace("\"", "&quot;");
}
}

View file

@ -0,0 +1,348 @@
package com.github.shautvast.xmldiff;
import org.w3c.dom.*;
import org.xmlunit.diff.ComparisonType;
import java.util.*;
/**
* Walks two DOM trees in parallel and writes annotated HTML to two {@link HtmlBuilder}s,
* guided by XMLUnit diff maps.
*
* <h3>What XMLUnit drives</h3>
* <ul>
* <li>{@code CHILD_LOOKUP} identifies which children have no name-match on the other side.</li>
* <li>{@code TEXT_VALUE} drives correct/wrong on paired text nodes.</li>
* <li>{@code ATTR_VALUE} drives correct/wrong on attribute values when names match.</li>
* </ul>
*
* <h3>Child matching</h3>
* <ol>
* <li>XMLUnit {@code CHILD_LOOKUP} identifies structurally unmatched children.</li>
* <li>Those are paired positionally (so {@code <expected>} vs {@code <actual>} renders as a
* tag-name diff with {@code skipped} children, not as two independent missing nodes).</li>
* <li>Remaining children are paired by element name, mirroring XMLUnit's own strategy.</li>
* </ol>
*
* <h3>Attributes</h3>
* Matched by name (order is not significant). Values are compared via the XMLUnit diff maps.
*/
class HtmlRenderer {
private final HtmlBuilder left;
private final HtmlBuilder right;
private final Map<String, ComparisonType> leftDiffs;
private final Map<String, ComparisonType> rightDiffs;
HtmlRenderer(HtmlBuilder left, HtmlBuilder right,
Map<String, ComparisonType> leftDiffs,
Map<String, ComparisonType> rightDiffs) {
this.left = left;
this.right = right;
this.leftDiffs = leftDiffs;
this.rightDiffs = rightDiffs;
}
void render(Element leftEl, Element rightEl) {
String lxp = "/" + leftEl.getTagName() + "[1]";
String rxp = "/" + rightEl.getTagName() + "[1]";
renderElement(leftEl, rightEl, lxp, rxp, 0);
}
// -------------------------------------------------------------------------
// Element
// -------------------------------------------------------------------------
private void renderElement(Element leftEl, Element rightEl,
String lxp, String rxp, int indent) {
if (!leftEl.getTagName().equals(rightEl.getTagName())) {
renderTagNameDiff(leftEl, rightEl, indent);
return;
}
String tag = leftEl.getTagName();
left.span("neutral", "<" + tag);
right.span("neutral", "<" + tag);
compareAttributes(leftEl, rightEl, lxp, rxp);
List<Node> leftChildren = significantChildren(leftEl);
List<Node> rightChildren = significantChildren(rightEl);
if (leftChildren.isEmpty() && rightChildren.isEmpty()) {
left.span("neutral", "/>");
right.span("neutral", "/>");
return;
}
left.span("neutral", ">");
right.span("neutral", ">");
compareChildren(leftChildren, rightChildren, lxp, rxp, indent + 1);
left.newline(indent);
right.newline(indent);
left.span("neutral", "</" + tag + ">");
right.span("neutral", "</" + tag + ">");
}
/** Tag names differ: name → correct/wrong, all content → skipped. */
private void renderTagNameDiff(Element leftEl, Element rightEl, int indent) {
left.span("correct", "<" + leftEl.getTagName());
renderAttrsAsClass(left, leftEl, "skipped");
renderBodyAsSkipped(left, leftEl, indent);
right.span("wrong", "<" + rightEl.getTagName());
renderAttrsAsClass(right, rightEl, "skipped");
renderBodyAsSkipped(right, rightEl, indent);
}
private void renderBodyAsSkipped(HtmlBuilder builder, Element el, int indent) {
List<Node> children = significantChildren(el);
if (children.isEmpty()) {
builder.span("skipped", "/>");
} else {
builder.span("skipped", ">");
for (Node child : children) {
builder.newline(indent + 1);
renderSubtree(builder, child, "skipped", indent + 1);
}
builder.newline(indent);
builder.span("skipped", "</" + el.getTagName() + ">");
}
}
// -------------------------------------------------------------------------
// Attributes matched by name; values compared via XMLUnit diff maps
// -------------------------------------------------------------------------
private void compareAttributes(Element leftEl, Element rightEl, String lxp, String rxp) {
Map<String, String> leftAttrs = attrMap(leftEl);
Map<String, String> rightAttrs = attrMap(rightEl);
for (Map.Entry<String, String> e : leftAttrs.entrySet()) {
String name = e.getKey();
String leftVal = e.getValue();
String rightVal = rightAttrs.get(name);
if (rightVal == null) {
left.span("correct", " " + name + "=\"" + leftVal + "\"");
right.emptySpan();
} else {
boolean valueDiff = leftDiffs.get(lxp + "/@" + name) == ComparisonType.ATTR_VALUE
|| rightDiffs.get(rxp + "/@" + name) == ComparisonType.ATTR_VALUE;
if (valueDiff) {
left.span("neutral", " " + name + "=\"");
left.span("correct", leftVal);
left.span("neutral", "\"");
right.span("neutral", " " + name + "=\"");
right.span("wrong", rightVal);
right.span("neutral", "\"");
} else {
left.span("neutral", " " + name + "=\"" + leftVal + "\"");
right.span("neutral", " " + name + "=\"" + rightVal + "\"");
}
}
}
for (Map.Entry<String, String> e : rightAttrs.entrySet()) {
if (!leftAttrs.containsKey(e.getKey())) {
left.emptySpan();
right.span("wrong", " " + e.getKey() + "=\"" + e.getValue() + "\"");
}
}
}
// -------------------------------------------------------------------------
// Children paired using XMLUnit CHILD_LOOKUP + positional fallback
// -------------------------------------------------------------------------
private void compareChildren(List<Node> leftChildren, List<Node> rightChildren,
String lParentXPath, String rParentXPath, int indent) {
List<String> lxps = childXPaths(leftChildren, lParentXPath);
List<String> rxps = childXPaths(rightChildren, rParentXPath);
// Step 1: identify structurally unmatched children per XMLUnit
List<Integer> leftUnmatched = new ArrayList<>();
List<Integer> rightUnmatched = new ArrayList<>();
for (int i = 0; i < leftChildren.size(); i++) {
if (leftDiffs.get(lxps.get(i)) == ComparisonType.CHILD_LOOKUP)
leftUnmatched.add(i);
}
for (int i = 0; i < rightChildren.size(); i++) {
if (rightDiffs.get(rxps.get(i)) == ComparisonType.CHILD_LOOKUP)
rightUnmatched.add(i);
}
// Step 2: pair unmatched children positionally so differently-named siblings
// render as a tag-name diff (with skipped children) rather than independent missing nodes
Map<Integer, Integer> leftToRight = new LinkedHashMap<>();
Set<Integer> matchedRight = new LinkedHashSet<>();
int positional = Math.min(leftUnmatched.size(), rightUnmatched.size());
for (int i = 0; i < positional; i++) {
leftToRight.put(leftUnmatched.get(i), rightUnmatched.get(i));
matchedRight.add(rightUnmatched.get(i));
}
// Step 3: name-based matching for non-CHILD_LOOKUP element children
Set<Integer> leftUnmatchedSet = new HashSet<>(leftUnmatched);
Set<Integer> rightUnmatchedSet = new HashSet<>(rightUnmatched);
for (int li = 0; li < leftChildren.size(); li++) {
if (leftUnmatchedSet.contains(li) || leftToRight.containsKey(li)) continue;
if (!(leftChildren.get(li) instanceof Element le)) continue;
for (int ri = 0; ri < rightChildren.size(); ri++) {
if (rightUnmatchedSet.contains(ri) || matchedRight.contains(ri)) continue;
if (rightChildren.get(ri) instanceof Element re
&& re.getTagName().equals(le.getTagName())) {
leftToRight.put(li, ri);
matchedRight.add(ri);
break;
}
}
}
// Step 4: positional fallback for any remaining unmatched nodes (e.g. text)
List<Integer> stillLeft = new ArrayList<>(), stillRight = new ArrayList<>();
for (int li = 0; li < leftChildren.size(); li++)
if (!leftToRight.containsKey(li)) stillLeft.add(li);
for (int ri = 0; ri < rightChildren.size(); ri++)
if (!matchedRight.contains(ri)) stillRight.add(ri);
int extra = Math.min(stillLeft.size(), stillRight.size());
for (int i = 0; i < extra; i++) {
leftToRight.put(stillLeft.get(i), stillRight.get(i));
matchedRight.add(stillRight.get(i));
}
// Step 5: render left children in document order
Set<Integer> renderedRight = new LinkedHashSet<>();
for (int li = 0; li < leftChildren.size(); li++) {
left.newline(indent);
right.newline(indent);
Integer ri = leftToRight.get(li);
if (ri != null) {
renderNode(leftChildren.get(li), rightChildren.get(ri),
lxps.get(li), rxps.get(ri), indent);
renderedRight.add(ri);
} else {
renderSubtree(left, leftChildren.get(li), "correct", indent);
right.emptySpan();
}
}
// Step 6: render truly right-only children
for (int ri = 0; ri < rightChildren.size(); ri++) {
if (!renderedRight.contains(ri)) {
left.newline(indent);
right.newline(indent);
left.emptySpan();
renderSubtree(right, rightChildren.get(ri), "wrong", indent);
}
}
}
private void renderNode(Node ln, Node rn, String lxp, String rxp, int indent) {
if (ln instanceof Element le && rn instanceof Element re) {
renderElement(le, re, lxp, rxp, indent);
} else if (ln instanceof Text lt && rn instanceof Text rt) {
renderText(lt, rt, lxp, rxp);
} else {
renderSubtree(left, ln, "correct", indent);
renderSubtree(right, rn, "wrong", indent);
}
}
// -------------------------------------------------------------------------
// Text driven by XMLUnit TEXT_VALUE
// -------------------------------------------------------------------------
private void renderText(Text lt, Text rt, String lxp, String rxp) {
boolean differs = leftDiffs.get(lxp) == ComparisonType.TEXT_VALUE
|| rightDiffs.get(rxp) == ComparisonType.TEXT_VALUE;
String lc = lt.getTextContent().strip();
String rc = rt.getTextContent().strip();
if (differs) {
left.span("correct", lc);
right.span("wrong", rc);
} else {
left.span("neutral", lc);
right.span("neutral", rc);
}
}
// -------------------------------------------------------------------------
// Single-side subtree (one CSS class for all tokens)
// -------------------------------------------------------------------------
private void renderSubtree(HtmlBuilder builder, Node node, String cssClass, int indent) {
if (node instanceof Element el) {
builder.span(cssClass, "<" + el.getTagName());
renderAttrsAsClass(builder, el, cssClass);
List<Node> children = significantChildren(el);
if (children.isEmpty()) {
builder.span(cssClass, "/>");
} else {
builder.span(cssClass, ">");
for (Node child : children) {
builder.newline(indent + 1);
renderSubtree(builder, child, cssClass, indent + 1);
}
builder.newline(indent);
builder.span(cssClass, "</" + el.getTagName() + ">");
}
} else if (node instanceof Text text) {
String content = text.getTextContent().strip();
if (!content.isEmpty()) builder.span(cssClass, content);
}
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
private void renderAttrsAsClass(HtmlBuilder builder, Element el, String cssClass) {
attrMap(el).forEach((name, value) ->
builder.span(cssClass, " " + name + "=\"" + value + "\""));
}
private Map<String, String> attrMap(Element el) {
NamedNodeMap nnm = el.getAttributes();
Map<String, String> map = new LinkedHashMap<>();
for (int i = 0; i < nnm.getLength(); i++) {
Attr a = (Attr) nnm.item(i);
map.put(a.getName(), a.getValue());
}
return map;
}
private List<Node> significantChildren(Element el) {
NodeList nl = el.getChildNodes();
List<Node> result = new ArrayList<>();
for (int i = 0; i < nl.getLength(); i++) {
Node n = nl.item(i);
if (n.getNodeType() == Node.ELEMENT_NODE) {
result.add(n);
} else if (n.getNodeType() == Node.TEXT_NODE
&& !n.getTextContent().strip().isEmpty()) {
result.add(n);
}
}
return result;
}
/** Generates XMLUnit-compatible XPaths for a list of child nodes. */
private List<String> childXPaths(List<Node> children, String parentXPath) {
Map<String, Integer> elementCounts = new LinkedHashMap<>();
int textCount = 0;
List<String> result = new ArrayList<>(children.size());
for (Node child : children) {
if (child.getNodeType() == Node.ELEMENT_NODE) {
String tag = ((Element) child).getTagName();
int n = elementCounts.merge(tag, 1, Integer::sum);
result.add(parentXPath + "/" + tag + "[" + n + "]");
} else {
result.add(parentXPath + "/text()[" + (++textCount) + "]");
}
}
return result;
}
}

View file

@ -0,0 +1,61 @@
package com.github.shautvast.xmldiff;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.StringReader;
/**
* Public API for XML diffing.
*
* <p>Takes two XML strings (left = expected, right = actual) and returns two HTML strings
* representing a side-by-side diff. Each output is a tree of {@code <span>} elements
* annotated with CSS classes:
* <ul>
* <li>{@code neutral} token is identical on both sides</li>
* <li>{@code correct} token is on the left (expected) side and differs</li>
* <li>{@code wrong} token is on the right (actual) side and differs</li>
* <li>{@code skipped} child content of an element whose tag name differs</li>
* </ul>
*
* <p>Structural diffing is performed by <a href="https://www.xmlunit.org/">XMLUnit 2</a>
* using name-based child matching. Attribute order is not significant.
*/
public class XmlDiff {
private XmlDiff() {}
/**
* Compares two XML strings and returns annotated HTML for both sides.
*
* @param leftXml the expected XML
* @param rightXml the actual XML
* @return a {@link DiffResult} containing the left and right HTML strings
* @throws XmlDiffException if either string cannot be parsed as XML
*/
public static DiffResult compare(String leftXml, String rightXml) {
DiffEngine.DiffMaps maps = DiffEngine.compute(leftXml, rightXml);
Document leftDoc = parse(leftXml);
Document rightDoc = parse(rightXml);
HtmlBuilder leftBuilder = new HtmlBuilder();
HtmlBuilder rightBuilder = new HtmlBuilder();
new HtmlRenderer(leftBuilder, rightBuilder, maps.left(), maps.right())
.render(leftDoc.getDocumentElement(), rightDoc.getDocumentElement());
return new DiffResult(leftBuilder.build(), rightBuilder.build());
}
private static Document parse(String xml) {
try {
return DocumentBuilderFactory.newInstance()
.newDocumentBuilder()
.parse(new InputSource(new StringReader(xml)));
} catch (Exception e) {
throw new XmlDiffException("Failed to parse XML: " + e.getMessage(), e);
}
}
}

View file

@ -0,0 +1,7 @@
package com.github.shautvast.xmldiff;
public class XmlDiffException extends RuntimeException {
public XmlDiffException(String message, Throwable cause) {
super(message, cause);
}
}

View file

@ -0,0 +1,169 @@
package com.github.shautvast.xmldiff;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
class XmlDiffTest {
// -------------------------------------------------------------------------
// 1. Identical elements everything neutral
// -------------------------------------------------------------------------
@Test
void identicalSimple_allNeutral() {
assertNoDiff(XmlDiff.compare("<root/>", "<root/>"));
}
@Test
void identicalWithText_allNeutral() {
assertNoDiff(XmlDiff.compare("<root>hello</root>", "<root>hello</root>"));
}
@Test
void identicalNested_allNeutral() {
String xml = "<root><child attr=\"val\">text</child></root>";
assertNoDiff(XmlDiff.compare(xml, xml));
}
// -------------------------------------------------------------------------
// 2. Differing text content
// -------------------------------------------------------------------------
@Test
void differingText() {
DiffResult r = XmlDiff.compare("<root>expected</root>", "<root>actual</root>");
assertContains(r.leftHtml(), "correct", "expected");
assertContains(r.rightHtml(), "wrong", "actual");
assertAbsent(r.leftHtml(), "wrong");
assertAbsent(r.rightHtml(), "correct");
}
// -------------------------------------------------------------------------
// 3. Differing attribute value name neutral, value correct/wrong
// -------------------------------------------------------------------------
@Test
void differingAttributeValue() {
DiffResult r = XmlDiff.compare(
"<root attr=\"expected\"/>",
"<root attr=\"actual\"/>");
assertContains(r.leftHtml(), "correct", "expected");
assertContains(r.rightHtml(), "wrong", "actual");
// The attribute name itself must not be marked correct/wrong
assertFalse(r.leftHtml().contains("<span class=\"correct\">attr"),
"attr name should not be marked correct");
assertFalse(r.rightHtml().contains("<span class=\"wrong\">attr"),
"attr name should not be marked wrong");
}
// -------------------------------------------------------------------------
// 4. Attribute only on one side
// -------------------------------------------------------------------------
@Test
void attributeOnlyOnLeft() {
DiffResult r = XmlDiff.compare("<root x=\"1\"/>", "<root/>");
assertContains(r.leftHtml(), "correct", "x");
assertTrue(r.rightHtml().contains("<span></span>"), "right should have placeholder");
}
@Test
void attributeOnlyOnRight() {
DiffResult r = XmlDiff.compare("<root/>", "<root x=\"1\"/>");
assertTrue(r.leftHtml().contains("<span></span>"), "left should have placeholder");
assertContains(r.rightHtml(), "wrong", "x");
}
// -------------------------------------------------------------------------
// 5. Differing element name tag correct/wrong, children skipped
// -------------------------------------------------------------------------
@Test
void differingElementName() {
DiffResult r = XmlDiff.compare(
"<root><expected>text</expected></root>",
"<root><actual>text</actual></root>");
assertContains(r.leftHtml(), "correct", "expected");
assertContains(r.rightHtml(), "wrong", "actual");
assertTrue(r.leftHtml().contains("class=\"skipped\""), "left children should be skipped");
assertTrue(r.rightHtml().contains("class=\"skipped\""), "right children should be skipped");
}
// -------------------------------------------------------------------------
// 6. Extra child on left
// -------------------------------------------------------------------------
@Test
void extraChildOnLeft() {
DiffResult r = XmlDiff.compare("<root><child/></root>", "<root/>");
assertContains(r.leftHtml(), "correct", "child");
assertTrue(r.rightHtml().contains("<span></span>"), "right should have placeholder");
assertAbsent(r.rightHtml(), "correct");
}
// -------------------------------------------------------------------------
// 7. Extra child on right
// -------------------------------------------------------------------------
@Test
void extraChildOnRight() {
DiffResult r = XmlDiff.compare("<root/>", "<root><child/></root>");
assertTrue(r.leftHtml().contains("<span></span>"), "left should have placeholder");
assertContains(r.rightHtml(), "wrong", "child");
assertAbsent(r.leftHtml(), "wrong");
}
// -------------------------------------------------------------------------
// 8. Attribute order does not matter
// -------------------------------------------------------------------------
@Test
void attributeOrderDoesNotMatter() {
DiffResult r = XmlDiff.compare(
"<root x=\"1\" y=\"2\"/>",
"<root y=\"2\" x=\"1\"/>");
assertNoDiff(r);
}
// -------------------------------------------------------------------------
// 9. Nested elements only the differing subtree is marked
// -------------------------------------------------------------------------
@Test
void nestedPartialDiff() {
DiffResult r = XmlDiff.compare(
"<root><same/><diff>expected</diff></root>",
"<root><same/><diff>actual</diff></root>");
assertTrue(r.leftHtml().contains("class=\"neutral\""), "unchanged parts should be neutral");
assertContains(r.leftHtml(), "correct", "expected");
assertContains(r.rightHtml(), "wrong", "actual");
}
// -------------------------------------------------------------------------
// 10. Self-closing element, no diff
// -------------------------------------------------------------------------
@Test
void selfClosingNoDiff() {
assertNoDiff(XmlDiff.compare("<root><item id=\"1\"/></root>", "<root><item id=\"1\"/></root>"));
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
private void assertNoDiff(DiffResult r) {
assertAbsent(r.leftHtml(), "correct");
assertAbsent(r.leftHtml(), "wrong");
assertAbsent(r.leftHtml(), "skipped");
assertAbsent(r.rightHtml(), "correct");
assertAbsent(r.rightHtml(), "wrong");
assertAbsent(r.rightHtml(), "skipped");
}
private void assertContains(String html, String cssClass, String text) {
assertTrue(html.contains("class=\"" + cssClass + "\""),
"expected class=" + cssClass + " in: " + html);
assertTrue(html.contains(text),
"expected text '" + text + "' in: " + html);
}
private void assertAbsent(String html, String cssClass) {
assertFalse(html.contains("class=\"" + cssClass + "\""),
"unexpected class=" + cssClass + " in: " + html);
}
}

143
xmldiff.md Normal file
View file

@ -0,0 +1,143 @@
# xmldiff — Implementation Plan
## Goal
A Java library that takes two XML strings (left = expected, right = actual) and produces two HTML strings suitable for rendering a side-by-side diff. Each output is a `<span>` tree with inner spans annotated with CSS classes.
## CSS Classes
| Class | Meaning |
|------------|-----------------------------------------------------------|
| `neutral` | This token is identical in both sides |
| `correct` | This token is on the **left** side and differs from right |
| `wrong` | This token is on the **right** side and differs from left |
| `skipped` | Child content of an element whose **tag name** differs |
## Diff Granularity Rules
| Token | If equal | If different |
|-------------------------------|------------|--------------------------------------------------------------------------------------|
| Element name | `neutral` | Left → `correct`, right → `wrong`; all content (attrs, children, text) → `skipped` |
| Attribute name | `neutral` | Left attr name → `correct`, right attr name → `wrong` |
| Attribute value | `neutral` | Left attr name neutral, left value → `correct`; same on right → `wrong` |
| Text content | `neutral` | Left text → `correct`, right text → `wrong` |
| Element present only on left | — | Left subtree → `correct`, right → empty `<span></span>` |
| Element present only on right | — | Right subtree → `wrong`, left → empty `<span></span>` |
Attribute **order is not significant**:
## Output Format
Each output string is pretty-printed HTML. XML special characters (`<`, `>`, `&`, `"`) inside span text are HTML-escaped. Indentation uses 2 spaces per level. Output does **not** include an XML declaration.
Example shape:
```html
<span class="neutral">&lt;root&gt;
&lt;child </span><span class="correct">attr</span><span class="neutral">="</span><span class="correct">value</span><span class="neutral">"&gt;
</span><span class="correct">text here</span><span class="neutral">
&lt;/child&gt;
&lt;/root&gt;</span>
```
## Dependencies
```xml
<!-- XML diffing -->
<dependency>
<groupId>org.xmlunit</groupId>
<artifactId>xmlunit-core</artifactId>
<version>2.10.0</version>
</dependency>
<!-- Testing -->
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<version>5.11.0</version>
<scope>test</scope>
</dependency>
```
**XMLUnit 2.x** is the diffing engine. It produces a list of `Comparison` objects, each with:
- `getType()``ComparisonType` enum: `ELEMENT_TAG_NAME`, `ATTR_VALUE`, `ATTR_NAME_LOOKUP`, `TEXT_VALUE`, `CHILD_NODELIST_LENGTH`, `HAS_CHILD_NODES`, etc.
- `getControlDetails().getXPath()` — XPath of the affected node on the left side
- `getTestDetails().getXPath()` — XPath of the affected node on the right side
## Algorithm
### Step 1 — Diff (DiffEngine)
```
Diff diff = DiffBuilder
.compare(leftXml)
.withTest(rightXml)
.withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byName))
.ignoreWhitespace()
.build();
For each Comparison c in diff.getDifferences():
record (c.getControlDetails().getXPath(), c.getTestDetails().getXPath(), c.getType())
into two maps: leftDiffs: XPath → ComparisonType
rightDiffs: XPath → ComparisonType
```
### Step 2 — Render (HtmlRenderer)
Walk each DOM tree independently, pretty-printing to HTML. At each node, look up its XPath in the relevant diff map to determine its CSS class.
**Element node:**
```
xp = xpathOf(node)
if leftDiffs contains xp with type ELEMENT_TAG_NAME:
emit tag name as correct/wrong
emit all attributes + children recursively as skipped
else:
emit tag name as neutral
for each attribute (in document order):
emit based on attr-level diff lookup
recurse into children
```
**Text node:**
```
xp = xpathOf(node)
if leftDiffs/rightDiffs contains xp with type TEXT_VALUE:
emit as correct / wrong
else:
emit as neutral
```
**Missing child (CHILD_NODELIST_LENGTH or similar):**
```
emit present side as correct/wrong
emit absent side as empty <span></span>
```
XPaths are computed from the DOM tree as each node is visited, matching the XPaths that XMLUnit generates (e.g. `/root[1]/child[1]`).
### Step 3 — Output
`XmlDiff.compare()` calls `DiffEngine`, then calls `HtmlRenderer` once for the left tree and once for the right tree, returning a `DiffResult`.
## Test Cases
| # | Scenario | Left class | Right class |
|---|---------------------------------------|------------|-------------|
| 1 | Identical simple elements | all `neutral` | all `neutral` |
| 2 | Differing text content | text `correct` | text `wrong` |
| 3 | Differing attribute value | value `correct` | value `wrong` (name neutral) |
| 4 | Differing attribute name | name `correct` | name `wrong` |
| 5 | Differing element name | name `correct`, children `skipped` | name `wrong`, children `skipped` |
| 6 | Extra child on left only | child `correct` | empty span |
| 7 | Extra child on right only | empty span | child `wrong` |
| 8 | Attribute order differs | first mismatch `correct` | first mismatch `wrong` |
| 9 | Nested elements, partial diff | only differing subtree marked | same |
| 10| Self-closing element, no diff | all `neutral` | all `neutral` |
## Assumptions
- Comments, processing instructions, and CDATA sections are ignored.
- Whitespace-only text nodes between elements are ignored (XMLUnit `ignoreWhitespace()`).
- Namespace prefixes are treated as plain text; no namespace-aware comparison.
- The library is stateless; `XmlDiff.compare()` is safe to call concurrently.