From f9906e3e59db77098e75a3b8b1efcd572874ded7 Mon Sep 17 00:00:00 2001 From: Niels Bertram Date: Sat, 7 Mar 2026 02:34:26 +1000 Subject: [PATCH 1/3] remove unused imports --- .../plugins/design/builder/scanner/CatalogFileScanner.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/scanner/CatalogFileScanner.java b/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/scanner/CatalogFileScanner.java index ac17feb..ba1dd08 100644 --- a/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/scanner/CatalogFileScanner.java +++ b/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/scanner/CatalogFileScanner.java @@ -24,7 +24,6 @@ import io.fares.maven.plugins.utils.CollectionUtils; import io.github.classgraph.ClassGraph; import io.github.classgraph.ScanResult; -import org.apache.maven.artifact.DependencyResolutionRequiredException; import org.apache.maven.model.Resource; import org.eclipse.aether.RepositorySystemSession; import org.eclipse.aether.repository.RemoteRepository; @@ -34,15 +33,12 @@ import java.io.File; import java.io.IOException; -import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.nio.charset.StandardCharsets; import java.util.*; import java.util.regex.Pattern; -import static java.text.MessageFormat.format; - public final class CatalogFileScanner { private static final Logger log = LoggerFactory.getLogger(CatalogFileScanner.class); From 48a824d6f754069d41225b18a163b4856c5ec737 Mon Sep 17 00:00:00 2001 From: Niels Bertram Date: Sat, 7 Mar 2026 12:58:40 +1000 Subject: [PATCH 2/3] replace xml crawler with more efficient and accurate flattening approach --- core/pom.xml | 5 - .../design/builder/MavenCatalogResolver.java | 45 ++-- .../builder/flattener/CatalogUriResolver.java | 50 ++-- .../flattener/FlattenImportPathMojo.java | 36 +-- .../flattener/SimpleNameCrawlerListener.java | 43 +-- .../flattener/StreamingXmlFlattener.java | 246 ++++++++++++++++++ .../builder/flattener/XmlLinkRules.java | 200 ++++++++++++++ .../flattener/StreamingXmlFlattenerTest.java | 141 ++++++++++ parent/pom.xml | 7 - 9 files changed, 665 insertions(+), 108 deletions(-) rename core/src/main/java/io/fares/design/builder/JlibsResolverBridge.java => maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/CatalogUriResolver.java (68%) create mode 100644 maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/StreamingXmlFlattener.java create mode 100644 maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/XmlLinkRules.java create mode 100644 maven-plugin/src/test/java/io/fares/maven/plugins/design/builder/flattener/StreamingXmlFlattenerTest.java diff --git a/core/pom.xml b/core/pom.xml index 3f1ae8c..254104f 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -48,11 +48,6 @@ xerces xercesImpl - - in.jlibs - jlibs-xml-crawler - - io.github.classgraph classgraph diff --git a/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/MavenCatalogResolver.java b/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/MavenCatalogResolver.java index ec87e35..c8a878a 100644 --- a/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/MavenCatalogResolver.java +++ b/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/MavenCatalogResolver.java @@ -19,18 +19,8 @@ package io.fares.maven.plugins.design.builder; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.File; -import java.net.URI; -import java.net.URL; -import java.net.MalformedURLException; -import java.net.URISyntaxException; -import java.util.Objects; - -import static java.text.MessageFormat.format; - +import io.fares.maven.plugins.design.builder.flattener.DependencyResource; +import io.fares.maven.plugins.design.builder.flattener.ResourceEntry; import org.apache.maven.model.Dependency; import org.apache.maven.plugin.MojoExecutionException; import org.apache.xml.resolver.CatalogManager; @@ -40,11 +30,20 @@ import org.eclipse.aether.artifact.DefaultArtifact; import org.eclipse.aether.impl.ArtifactResolver; import org.eclipse.aether.resolution.ArtifactRequest; -import org.eclipse.aether.resolution.ArtifactResult; import org.eclipse.aether.resolution.ArtifactResolutionException; +import org.eclipse.aether.resolution.ArtifactResult; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import io.fares.maven.plugins.design.builder.flattener.DependencyResource; -import io.fares.maven.plugins.design.builder.flattener.ResourceEntry; +import java.io.File; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.Objects; +import java.util.Optional; + +import static java.text.MessageFormat.format; /** * A {@link CatalogResolver} that is capable to convert a maven: @@ -54,6 +53,7 @@ public final class MavenCatalogResolver extends CatalogResolver { public static final String URI_SCHEME_MAVEN = "maven"; + public static final String URI_SCHEME_CLASSPATH = "classpath"; Logger log = LoggerFactory.getLogger(MavenCatalogResolver.class); @@ -68,13 +68,12 @@ public final class MavenCatalogResolver extends CatalogResolver { */ private ClassLoader classLoader; - // FIXME move classloader construction to here public MavenCatalogResolver() { - + this.classLoader = Thread.currentThread().getContextClassLoader(); } /** - * Construct a JlibsResolverBridge that is capable of resolving resources inside + * Construct a resolver that is capable of resolving resources inside * Maven a list of {@link Dependency}. * * @param catalogManager The {@link CatalogManager} to be used to resolve any entity @@ -89,7 +88,7 @@ public MavenCatalogResolver(CatalogManager catalogManager, } /** - * Construct a JlibsResolverBridge that is capable of resolving resources inside + * Construct a resolver that is capable of resolving resources inside * Maven a list of {@link Dependency}. * * @param catalogManager The {@link CatalogManager} to be used to resolve any entity @@ -102,18 +101,12 @@ public MavenCatalogResolver(CatalogManager catalogManager, RepositorySystemSession repositorySystemSession, ArtifactResolver artifactResolver, ClassLoader classloader) { - // catalog gets initialized super(Objects.requireNonNull(catalogManager, "The catalog manager must not be null.")); + this.classLoader = Optional.ofNullable(classloader).orElseGet(() -> Thread.currentThread().getContextClassLoader()); this.artifactResolver = Objects.requireNonNull(artifactResolver, "Artifact resolver must not be null."); this.repositorySystemSession = Objects.requireNonNull(repositorySystemSession, "Repository system session must not be null."); - if (classloader != null) { - this.classLoader = classloader; - } else { - this.classLoader = Thread.currentThread().getContextClassLoader(); - } - } @Override diff --git a/core/src/main/java/io/fares/design/builder/JlibsResolverBridge.java b/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/CatalogUriResolver.java similarity index 68% rename from core/src/main/java/io/fares/design/builder/JlibsResolverBridge.java rename to maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/CatalogUriResolver.java index ea30d79..e6ed731 100644 --- a/core/src/main/java/io/fares/design/builder/JlibsResolverBridge.java +++ b/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/CatalogUriResolver.java @@ -17,47 +17,40 @@ * under the License. */ -package io.fares.design.builder; +package io.fares.maven.plugins.design.builder.flattener; +import org.apache.xml.resolver.Catalog; +import org.apache.xml.resolver.tools.CatalogResolver; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.xml.sax.InputSource; import java.io.File; import java.net.URI; import java.net.URL; import java.util.Objects; -import org.apache.xml.resolver.Catalog; -import org.apache.xml.resolver.tools.CatalogResolver; - -import org.xml.sax.EntityResolver; -import org.xml.sax.InputSource; - -import jlibs.xml.sax.crawl.XMLCrawler; - /** - * Implements the "non-standard" JLibs resolver interface delegating through to a configured SAX @{@link EntityResolver}. + * Resolves import/include links through the configured catalog resolver. */ -public class JlibsResolverBridge implements XMLCrawler.Resolver { +public class CatalogUriResolver { - private static final Logger log = LoggerFactory.getLogger(JlibsResolverBridge.class); + private static final Logger log = LoggerFactory.getLogger(CatalogUriResolver.class); - private CatalogResolver resolver; + private final CatalogResolver resolver; - public JlibsResolverBridge(CatalogResolver resolver) { - this.resolver = Objects.requireNonNull(resolver); + public CatalogUriResolver(CatalogResolver resolver) { + this.resolver = Objects.requireNonNull(resolver, "catalog resolver cannot be null"); } - @Override public String resolve(String namespace, String base, String location) { - try { InputSource source = resolver.resolveEntity(namespace, location); if (source != null) { return new URL(source.getSystemId()).toExternalForm(); } - } catch (Exception ignore) { - log.warn("Failed to resolve entity systemId " + location, ignore); + } catch (Exception e) { + log.warn("Failed to resolve entity systemId {}", location, e); } Catalog catalog = resolver.getCatalog(); @@ -67,23 +60,21 @@ public String resolve(String namespace, String base, String location) { if (result != null) { return result; } - } catch (Exception ignore) { - log.warn("Failed to resolve uri " + location, ignore); + } catch (Exception e) { + log.warn("Failed to resolve uri {}", location, e); } try { - // check if we have both, if not the publicId will whatever is in location String publicId = namespace != null ? namespace : location; String systemId = namespace != null ? location : null; String result = catalog.resolvePublic(publicId, systemId); if (result != null) { return result; } - } catch (Exception ignore) { - log.debug("Failed to resolve uri " + location, ignore); + } catch (Exception e) { + log.debug("Failed to resolve uri {}", location, e); } - // last change file based import relative to the parent if (base != null && location != null) { String relativeResource; if (location.lastIndexOf('/') > -1 && location.lastIndexOf('/') < location.length()) { @@ -98,20 +89,15 @@ public String resolve(String namespace, String base, String location) { return f.getAbsolutePath(); } } else { - // blindly trust, could be a related resource on the web, no way to check return uri.toString(); } } - if (log.isWarnEnabled()) + if (log.isWarnEnabled()) { log.warn("Unable to resolve publicId={} systemId={}", namespace, location); + } return null; - - } - - public EntityResolver getResolver() { - return resolver; } } diff --git a/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/FlattenImportPathMojo.java b/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/FlattenImportPathMojo.java index bdafe7d..6197222 100644 --- a/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/FlattenImportPathMojo.java +++ b/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/FlattenImportPathMojo.java @@ -20,13 +20,11 @@ package io.fares.maven.plugins.design.builder.flattener; -import io.fares.design.builder.JlibsResolverBridge; import io.fares.maven.plugins.design.builder.MavenCatalogResolver; import io.fares.maven.plugins.design.builder.MavenCatalogResolverFactory; import io.fares.maven.plugins.design.builder.scanner.CatalogFileScanner; import io.fares.maven.plugins.design.builder.scanner.CatalogFileScannerFactory; import io.fares.maven.plugins.design.builder.scanner.SimpleSourceInclusionScanner; -import jlibs.xml.sax.crawl.XMLCrawler; import org.apache.maven.artifact.repository.ArtifactRepository; import org.apache.maven.model.Resource; import org.apache.maven.plugin.AbstractMojo; @@ -38,7 +36,6 @@ import org.eclipse.aether.RepositorySystem; import org.eclipse.aether.RepositorySystemSession; import org.eclipse.aether.repository.RemoteRepository; -import org.xml.sax.InputSource; import java.io.File; import java.io.IOException; @@ -81,6 +78,7 @@ public class FlattenImportPathMojo extends AbstractMojo { @Component private CatalogFileScannerFactory catalogFileScannerFactory; + /** * The entry point to Aether, i.e. the component doing all the work. */ @@ -160,6 +158,7 @@ public class FlattenImportPathMojo extends AbstractMojo { @Parameter(property = "error.halt", alias = "haltOnError", defaultValue = "false") private boolean haltOnError; + /** * This flag will cause the content of existing imports to be overridden if * it was already flattened. Please make sure you always use -U clean @@ -236,7 +235,7 @@ public void execute() throws MojoExecutionException { } // the crawler is our friend as he can flatten path references of - // all sorts of XML documents including xsd and wsdl imports + // all sorts of XML documents, including xsd and wsdl imports if (getLog().isDebugEnabled() || verbose) { getLog().info("Following catalogs have been provided: "); for (URL catalogFile : catalogFiles) { @@ -247,9 +246,12 @@ public void execute() throws MojoExecutionException { MavenCatalogResolver resolver = mavenCatalogResolverFactory.newInstance(repositorySystemSession, catalogFiles); // r = createXercesResolver(catalogURLs); - JlibsResolverBridge resolverBridge = new JlibsResolverBridge(resolver); + CatalogUriResolver resolverBridge = new CatalogUriResolver(resolver); + StreamingXmlFlattener crawler = new StreamingXmlFlattener( + resolverBridge, + new SimpleNameCrawlerListener(outputDirectory, overrideExistingReference)); - // either flatten file or sources + // either flatten a file or sources Set artifacts = new HashSet<>(10); if (sourceDirectory == null && flattenTarget == null) { @@ -294,19 +296,12 @@ public void execute() throws MojoExecutionException { try { if (getLog().isDebugEnabled() || verbose) getLog().info("Flatten file: " + targetFile.toExternalForm()); - // FIXME do a proper URI check - InputSource source = new InputSource(targetFile.toExternalForm()); - XMLCrawler crawler = new XMLCrawler(); - crawler.setResolver(resolverBridge); - crawler.crawl(source, new SimpleNameCrawlerListener( - outputDirectory, overrideExistingReference), null); + crawler.crawl(targetFile); } catch (Throwable e) { errorEncountered.add(e); getLog().error("Failed processing " + targetFile, e); if (haltOnError) throw e; - } finally { - // FIXME no longer needed? crawler.reset(); } } @@ -329,9 +324,9 @@ public void execute() throws MojoExecutionException { @SuppressWarnings("unused") private XMLCatalogResolver createXercesResolver(List catalogsArg) - throws IOException, URISyntaxException { + throws URISyntaxException { - if (catalogsArg.size() > 0 && (getLog().isDebugEnabled() || verbose)) { + if (!catalogsArg.isEmpty() && ( getLog().isDebugEnabled() || verbose )) { getLog().debug("Adding catalogs to resolver: "); } @@ -380,12 +375,7 @@ protected SimpleSourceInclusionScanner getSourceInclusionScanner() { } - - /* - * - * Getters and Setters - * - */ + // region Getters and Setters public MavenProject getProject() { return project; @@ -419,4 +409,6 @@ public void setMavenCatalogResolverFactory(MavenCatalogResolverFactory mavenCata this.mavenCatalogResolverFactory = mavenCatalogResolverFactory; } + // endregion + } diff --git a/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/SimpleNameCrawlerListener.java b/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/SimpleNameCrawlerListener.java index 8564194..ad0a4e9 100644 --- a/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/SimpleNameCrawlerListener.java +++ b/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/SimpleNameCrawlerListener.java @@ -19,17 +19,14 @@ package io.fares.maven.plugins.design.builder.flattener; -import jlibs.core.io.FileUtil; -import jlibs.core.lang.StringUtil; -import jlibs.xml.sax.crawl.CrawlerListener; -import jlibs.xml.sax.crawl.DefaultCrawlerListener; +import io.fares.maven.plugins.utils.StringUtils; import java.io.File; import java.net.URI; import java.net.URL; import java.util.StringTokenizer; -public class SimpleNameCrawlerListener implements CrawlerListener { +public class SimpleNameCrawlerListener { /** * Setting this to false will result in every file @@ -47,12 +44,10 @@ public SimpleNameCrawlerListener(File dir, boolean overrideExistingFile) { this.overrideExistingFile = overrideExistingFile; } - @Override public boolean doCrawl(URL url) { return true; } - @Override public File toFile(URL url, String extension) { /** * FIXME this will blow up when a reference url from an app server that @@ -66,7 +61,7 @@ public File toFile(URL url, String extension) { if (overrideExistingFile) return new File(dir, fileName); else - return FileUtil.findFreeFile(new File(dir, fileName)); + return findFreeFile(new File(dir, fileName)); } @@ -83,8 +78,7 @@ private String suggestFile(URI uri, String extension) { } /** - * Works out the file system path without stuffing the name up like the - * {@link DefaultCrawlerListener} does. + * Works out the file system path without stuffing the name up. * * @param path the parent path to use for the file base * @param extension the extension to give to the file @@ -93,7 +87,7 @@ private String suggestFile(URI uri, String extension) { */ private String suggestDirFile(String path, String extension) { - String tokens[] = StringUtil.getTokens(path, "/", true); + String tokens[] = StringUtils.split(path, '/', true); String file = tokens[tokens.length - 1]; int dot = file.lastIndexOf("."); @@ -113,15 +107,13 @@ private String suggestDirFile(String path, String extension) { * @return a new file handle */ String suggestGeneratedUri(String path, String extension) { - String tokens[] = StringUtil.getTokens(path, "/", true); + String tokens[] = StringUtils.split(path, '/', true); - String parts[] = StringUtil.getTokens(tokens[tokens.length - 1], "?", - true); + String parts[] = StringUtils.split(tokens[tokens.length - 1], '?', true); // String wsdlName = parts[0]; - String attrib[] = StringUtil.getTokens(parts[parts.length - 1], "&", - true); + String attrib[] = StringUtils.split(parts[parts.length - 1], '&', true); if (attrib.length == 3) { String typeAttrib = attrib[1]; @@ -164,4 +156,23 @@ private String[] splitMe(String token) { } + private File findFreeFile(File file) { + if (!file.exists()) { + return file; + } + + final String name = file.getName(); + final int dot = name.lastIndexOf('.'); + final String base = dot == -1 ? name : name.substring(0, dot); + final String extension = dot == -1 ? "" : name.substring(dot); + + int i = 1; + File candidate = new File(file.getParentFile(), base + i + extension); + while (candidate.exists()) { + i++; + candidate = new File(file.getParentFile(), base + i + extension); + } + return candidate; + } + } diff --git a/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/StreamingXmlFlattener.java b/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/StreamingXmlFlattener.java new file mode 100644 index 0000000..a3e5b29 --- /dev/null +++ b/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/StreamingXmlFlattener.java @@ -0,0 +1,246 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.fares.maven.plugins.design.builder.flattener; + +import javax.xml.namespace.QName; +import javax.xml.stream.*; +import javax.xml.stream.events.Attribute; +import javax.xml.stream.events.StartElement; +import javax.xml.stream.events.XMLEvent; +import java.io.*; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Path; +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Deque; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +/** + * Flattens XML imports/includes while preserving comments and whitespace by streaming events. + */ +public class StreamingXmlFlattener { + + private final CatalogUriResolver resolver; + + private final SimpleNameCrawlerListener fileMapper; + + private final XmlLinkRules linkRules = new XmlLinkRules(); + + private final XMLInputFactory inputFactory; + + private final XMLOutputFactory outputFactory; + + private final XMLEventFactory eventFactory; + + private final Map crawled = new HashMap<>(); + + public StreamingXmlFlattener(CatalogUriResolver resolver, + SimpleNameCrawlerListener fileMapper) { + this.resolver = resolver; + this.fileMapper = fileMapper; + this.inputFactory = XMLInputFactory.newFactory(); + this.outputFactory = XMLOutputFactory.newFactory(); + this.eventFactory = XMLEventFactory.newFactory(); + + this.inputFactory.setProperty(XMLInputFactory.IS_COALESCING, false); + this.inputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, true); + this.inputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false); + this.inputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, true); + } + + public File crawl(URL sourceUrl) throws IOException { + File existing = crawled.get(sourceUrl); + if (existing != null) { + return existing; + } + + try (InputStream raw = openInputStream(sourceUrl); + InputStream in = new BufferedInputStream(raw)) { + XMLEventReader reader = inputFactory.createXMLEventReader(sourceUrl.toExternalForm(), in); + List preamble = new ArrayList<>(); + Deque path = new ArrayDeque<>(); + File currentOutputFile = null; + XMLEventWriter writer = null; + + while (reader.hasNext()) { + XMLEvent event = reader.nextEvent(); + + if (currentOutputFile == null) { + preamble.add(event); + if (event.isStartElement()) { + StartElement root = event.asStartElement(); + String extension = linkRules.extensionForRoot(root.getName()); + currentOutputFile = fileMapper.toFile(sourceUrl, extension); + if (currentOutputFile.getParentFile() != null && !currentOutputFile.getParentFile().exists()) { + boolean created = currentOutputFile.getParentFile().mkdirs(); + if (!created) { + throw new IOException("Failed to create output directory " + currentOutputFile.getParentFile().getAbsolutePath()); + } + } + crawled.put(sourceUrl, currentOutputFile); + writer = outputFactory.createXMLEventWriter(new FileOutputStream(currentOutputFile), "UTF-8"); + for (XMLEvent buffered : preamble) { + XMLEvent toWrite = buffered; + if (buffered.isStartElement()) { + toWrite = processStartElement(buffered.asStartElement(), path, sourceUrl, currentOutputFile); + } + writer.add(toWrite); + } + preamble.clear(); + } + continue; + } + + if (event.isStartElement()) { + StartElement startElement = processStartElement(event.asStartElement(), path, sourceUrl, currentOutputFile); + writer.add(startElement); + continue; + } + + if (event.isEndElement() && !path.isEmpty()) { + path.removeLast(); + } + + writer.add(event); + } + + if (writer != null) { + writer.flush(); + writer.close(); + } else { + throw new IOException("Input source does not contain any XML start element: " + sourceUrl.toExternalForm()); + } + + return currentOutputFile; + } catch (XMLStreamException e) { + throw new IOException("Failed to flatten XML " + sourceUrl.toExternalForm(), e); + } + } + + private StartElement processStartElement(StartElement startElement, + Deque path, + URL currentDocumentUrl, + File currentOutputFile) throws IOException { + path.addLast(startElement.getName()); + XmlLinkRules.LinkReference linkReference = linkRules.detect(path, startElement); + if (linkReference == null) { + return startElement; + } + + String resolvedLocation = resolveAbsoluteLink( + linkReference.namespace, + currentDocumentUrl, + linkReference.location); + if (resolvedLocation == null || resolvedLocation.trim().isEmpty()) { + return startElement; + } + + URL linkedUrl = toURL(resolvedLocation); + File linkedFile = crawled.get(linkedUrl); + if (linkedFile == null && fileMapper.doCrawl(linkedUrl)) { + linkedFile = crawl(linkedUrl); + } + + String replacement = resolvedLocation; + if (linkedFile != null) { + replacement = relativize(currentOutputFile.getParentFile(), linkedFile); + } + + return replaceAttribute(startElement, linkReference.locationAttribute, replacement); + } + + private StartElement replaceAttribute(StartElement startElement, + QName attributeName, + String replacement) { + List rewritten = new ArrayList<>(); + Iterator attributes = startElement.getAttributes(); + while (attributes.hasNext()) { + Attribute attribute = (Attribute) attributes.next(); + QName name = attribute.getName(); + if (attributeName.getLocalPart().equals(name.getLocalPart()) + && ( attributeName.getNamespaceURI().isEmpty() + || attributeName.getNamespaceURI().equals(name.getNamespaceURI()) )) { + String ns = name.getNamespaceURI() == null ? "" : name.getNamespaceURI(); + String prefix = name.getPrefix() == null ? "" : name.getPrefix(); + rewritten.add(eventFactory.createAttribute(prefix, ns, name.getLocalPart(), replacement)); + } else { + rewritten.add(attribute); + } + } + + return eventFactory.createStartElement( + startElement.getName().getPrefix(), + startElement.getName().getNamespaceURI(), + startElement.getName().getLocalPart(), + rewritten.iterator(), + startElement.getNamespaces(), + startElement.getNamespaceContext() + ); + } + + private String resolveAbsoluteLink(String namespace, URL currentDocumentUrl, String location) { + String resolved = resolver.resolve(namespace, currentDocumentUrl.toExternalForm(), location); + if (resolved != null) { + return resolved; + } + try { + URI baseUri = currentDocumentUrl.toURI(); + return baseUri.resolve(location).toString(); + } catch (URISyntaxException e) { + return location; + } + } + + private URL toURL(String value) throws MalformedURLException { + if (value == null || value.trim().isEmpty()) { + return null; + } + try { + return new URL(value.trim()); + } catch (MalformedURLException ignore) { + return new File(value.trim()).toURI().toURL(); + } + } + + private InputStream openInputStream(URL url) throws IOException { + if ("file".equalsIgnoreCase(url.getProtocol())) { + try { + return new FileInputStream(new File(url.toURI())); + } catch (URISyntaxException e) { + return url.openStream(); + } + } + return url.openStream(); + } + + private String relativize(File fromDir, File toFile) { + Path from = fromDir.toPath(); + Path to = toFile.toPath(); + String relative = from.relativize(to).toString(); + return relative.replace(File.separatorChar, '/'); + } + +} diff --git a/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/XmlLinkRules.java b/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/XmlLinkRules.java new file mode 100644 index 0000000..6441531 --- /dev/null +++ b/maven-plugin/src/main/java/io/fares/maven/plugins/design/builder/flattener/XmlLinkRules.java @@ -0,0 +1,200 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.fares.maven.plugins.design.builder.flattener; + +import javax.xml.namespace.QName; +import javax.xml.stream.events.Attribute; +import javax.xml.stream.events.StartElement; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Deque; +import java.util.Iterator; +import java.util.List; + +/** + * Encapsulates XML import/include link rules for flattening. + */ +final class XmlLinkRules { + + private static final String INCLUDE = "include"; + + private static final String IMPORT = "import"; + + private static final String NS_XSD = "http://www.w3.org/2001/XMLSchema"; + + private static final String NS_WSDL = "http://schemas.xmlsoap.org/wsdl/"; + + private static final String NS_XSL = "http://www.w3.org/1999/XSL/Transform"; + + private static final QName QN_SCHEMA_ELEMENT = new QName(NS_XSD, "schema"); + + private static final QName QN_SCHEMA_IMPORT = new QName(NS_XSD, IMPORT); + + private static final QName QN_SCHEMA_INCLUDE = new QName(NS_XSD, INCLUDE); + + private static final QName QN_WSDL_ELEMENT = new QName(NS_WSDL, "definitions"); + + private static final QName QN_WSDL_TYPES = new QName(NS_WSDL, "types"); + + private static final QName QN_WSDL_IMPORT = new QName(NS_WSDL, IMPORT); + + private static final QName QN_WSDL_INCLUDE = new QName(NS_WSDL, INCLUDE); + + private static final QName QN_XSL_ELEMENT = new QName(NS_XSL, "stylesheet"); + + private static final QName QN_XSL_IMPORT = new QName(NS_XSL, IMPORT); + + private static final QName QN_XSL_INCLUDE = new QName(NS_XSL, INCLUDE); + + private static final QName ATTR_NAMESPACE = new QName("namespace"); + + private static final QName ATTR_SCHEMA_LOCATION = new QName("schemaLocation"); + + private static final QName ATTR_LOCATION = new QName("location"); + + private static final QName ATTR_HREF = new QName("href"); + + private final List rules = Arrays.asList( + new LinkRule(new QName[]{ QN_SCHEMA_ELEMENT, QN_SCHEMA_IMPORT }, ATTR_NAMESPACE, ATTR_SCHEMA_LOCATION), + new LinkRule(new QName[]{ QN_SCHEMA_ELEMENT, QN_SCHEMA_INCLUDE }, null, ATTR_SCHEMA_LOCATION), + new LinkRule(new QName[]{ QN_WSDL_ELEMENT, QN_WSDL_IMPORT }, ATTR_NAMESPACE, ATTR_LOCATION), + new LinkRule(new QName[]{ QN_WSDL_ELEMENT, QN_WSDL_INCLUDE }, null, ATTR_LOCATION), + new LinkRule(new QName[]{ QN_WSDL_ELEMENT, QN_WSDL_TYPES, QN_SCHEMA_ELEMENT, QN_SCHEMA_IMPORT }, ATTR_NAMESPACE, ATTR_SCHEMA_LOCATION), + new LinkRule(new QName[]{ QN_WSDL_ELEMENT, QN_WSDL_TYPES, QN_SCHEMA_ELEMENT, QN_SCHEMA_INCLUDE }, null, ATTR_SCHEMA_LOCATION), + new LinkRule(new QName[]{ QN_XSL_ELEMENT, QN_XSL_IMPORT }, null, ATTR_HREF), + new LinkRule(new QName[]{ QN_XSL_ELEMENT, QN_XSL_INCLUDE }, null, ATTR_HREF) + ); + + String extensionForRoot(QName root) { + if (QN_SCHEMA_ELEMENT.equals(root)) { + return "xsd"; + } + if (QN_WSDL_ELEMENT.equals(root)) { + return "wsdl"; + } + if (QN_XSL_ELEMENT.equals(root)) { + return "xsl"; + } + return "xml"; + } + + LinkReference detect(Deque path, StartElement startElement) { + LinkRule rule = findRule(path); + if (rule == null) { + return null; + } + + Attribute locationAttr = findAttribute(startElement, rule.locationAttribute); + if (locationAttr == null) { + return null; + } + + String location = locationAttr.getValue(); + if (location == null || location.trim().isEmpty()) { + return null; + } + + String namespace = null; + if (rule.namespaceAttribute != null) { + Attribute namespaceAttr = findAttribute(startElement, rule.namespaceAttribute); + if (namespaceAttr != null) { + namespace = namespaceAttr.getValue(); + } + } + + return new LinkReference(namespace, location, rule.locationAttribute); + } + + private LinkRule findRule(Deque path) { + if (path.isEmpty()) { + return null; + } + for (LinkRule rule : rules) { + if (rule.matches(path)) { + return rule; + } + } + return null; + } + + private Attribute findAttribute(StartElement element, QName attrName) { + Iterator attributes = element.getAttributes(); + while (attributes.hasNext()) { + Attribute attribute = (Attribute) attributes.next(); + QName name = attribute.getName(); + if (attrName.getLocalPart().equals(name.getLocalPart())) { + String expectedNamespace = attrName.getNamespaceURI(); + String namespace = name.getNamespaceURI(); + if (expectedNamespace == null || expectedNamespace.isEmpty() + || expectedNamespace.equals(namespace)) { + return attribute; + } + } + } + return null; + } + + static final class LinkReference { + + final String namespace; + + final String location; + + final QName locationAttribute; + + private LinkReference(String namespace, String location, QName locationAttribute) { + this.namespace = namespace; + this.location = location; + this.locationAttribute = locationAttribute; + } + } + + private static final class LinkRule { + + private final List path; + + private final QName namespaceAttribute; + + private final QName locationAttribute; + + private LinkRule(QName[] path, QName namespaceAttribute, QName locationAttribute) { + this.path = new ArrayList<>(Arrays.asList(path)); + this.namespaceAttribute = namespaceAttribute; + this.locationAttribute = locationAttribute; + } + + private boolean matches(Deque candidatePath) { + if (candidatePath.size() != path.size()) { + return false; + } + int index = 0; + for (QName qName : candidatePath) { + QName expected = path.get(index++); + if (!expected.getLocalPart().equals(qName.getLocalPart())) { + return false; + } + if (!expected.getNamespaceURI().equals(qName.getNamespaceURI())) { + return false; + } + } + return true; + } + } +} diff --git a/maven-plugin/src/test/java/io/fares/maven/plugins/design/builder/flattener/StreamingXmlFlattenerTest.java b/maven-plugin/src/test/java/io/fares/maven/plugins/design/builder/flattener/StreamingXmlFlattenerTest.java new file mode 100644 index 0000000..c8ced6b --- /dev/null +++ b/maven-plugin/src/test/java/io/fares/maven/plugins/design/builder/flattener/StreamingXmlFlattenerTest.java @@ -0,0 +1,141 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.fares.maven.plugins.design.builder.flattener; + +import org.apache.xml.resolver.tools.CatalogResolver; +import org.junit.Test; + +import java.io.File; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +public class StreamingXmlFlattenerTest { + + @Test + public void testFlattenPreservesCommentsAndWhitespace() throws Exception { + Path tempDir = Files.createTempDirectory("streaming-xml-flattener"); + Path sourceDir = Files.createDirectories(tempDir.resolve("src")); + Path outputDir = Files.createDirectories(tempDir.resolve("out")); + + Path wsdl = sourceDir.resolve("service.wsdl"); + Path xsd = sourceDir.resolve("schema.xsd"); + + Files.write(wsdl, ( + "\n" + + "\n" + + "\n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + " \n" + + "\n" + ).getBytes(StandardCharsets.UTF_8)); + + Files.write(xsd, ( + "\n" + + "\n" + + " \n" + + " \n" + + "\n" + ).getBytes(StandardCharsets.UTF_8)); + + CatalogUriResolver resolver = new CatalogUriResolver(new CatalogResolver()) { + @Override + public String resolve(String namespace, String base, String location) { + return null; + } + }; + + StreamingXmlFlattener flattener = new StreamingXmlFlattener( + resolver, + new SimpleNameCrawlerListener(outputDir.toFile(), true) + ); + + File rootOutput = flattener.crawl(wsdl.toUri().toURL()); + assertNotNull(rootOutput); + assertTrue(rootOutput.exists()); + + File importedOutput = outputDir.resolve("schema.xsd").toFile(); + assertTrue(importedOutput.exists()); + + String flattenedWsdl = new String(Files.readAllBytes(rootOutput.toPath()), StandardCharsets.UTF_8); + assertTrue(flattenedWsdl.contains("")); + assertTrue(flattenedWsdl.contains("")); + assertTrue(flattenedWsdl.contains(" ")); + assertTrue(flattenedWsdl.contains("schemaLocation=\"schema.xsd\"")); + + String flattenedXsd = new String(Files.readAllBytes(importedOutput.toPath()), StandardCharsets.UTF_8); + assertTrue(flattenedXsd.contains("")); + assertTrue(flattenedXsd.contains("\n \n" + + "\n" + + " \n" + + "\n" + ).getBytes(StandardCharsets.UTF_8)); + + Files.write(nestedSchema, ( + "\n" + + "\n" + ).getBytes(StandardCharsets.UTF_8)); + + CatalogUriResolver resolver = new CatalogUriResolver(new CatalogResolver()) { + @Override + public String resolve(String namespace, String base, String location) { + return null; + } + }; + + StreamingXmlFlattener flattener = new StreamingXmlFlattener( + resolver, + new SimpleNameCrawlerListener(outputDir.toFile(), true) + ); + + URL source = rootSchema.toUri().toURL(); + File flattened = flattener.crawl(source); + assertTrue(flattened.exists()); + + String xml = new String(Files.readAllBytes(flattened.toPath()), StandardCharsets.UTF_8); + assertTrue(xml.contains("schemaLocation=\"nested.xsd\"")); + + assertEquals(outputDir.resolve("root.xsd").toFile().getAbsolutePath(), flattened.getAbsolutePath()); + assertTrue(outputDir.resolve("nested.xsd").toFile().exists()); + } +} diff --git a/parent/pom.xml b/parent/pom.xml index 417c2e4..aa80831 100644 --- a/parent/pom.xml +++ b/parent/pom.xml @@ -35,7 +35,6 @@ 3.11.0 3.3.0 1.1.0 - 2.2.3 4.8.165 1.2 2.12.2 @@ -103,12 +102,6 @@ serializer ${xalan.version} - - - in.jlibs - jlibs-xml-crawler - ${jlibs.version} - org.junit From f68e079d21e1cd4ec66eea239ac0799d2817f4c6 Mon Sep 17 00:00:00 2001 From: Niels Bertram Date: Sat, 7 Mar 2026 13:00:03 +1000 Subject: [PATCH 3/3] added design draft for better flattening support --- docs/flatten-file-collision-design-draft1.md | 111 ++++++++ docs/flatten-file-collision-enhancement.md | 256 +++++++++++++++++++ 2 files changed, 367 insertions(+) create mode 100644 docs/flatten-file-collision-design-draft1.md create mode 100644 docs/flatten-file-collision-enhancement.md diff --git a/docs/flatten-file-collision-design-draft1.md b/docs/flatten-file-collision-design-draft1.md new file mode 100644 index 0000000..0c73c16 --- /dev/null +++ b/docs/flatten-file-collision-design-draft1.md @@ -0,0 +1,111 @@ +# Flatten File Collision Design Draft 1 + +## Context +This document is the implementation-oriented companion to the enhancement problem statement: +- [Flatten File Collision Enhancement](/Users/bertramn/workspaces/fares-io/oss/design-builder/docs/flatten-file-collision-enhancement.md) + +It focuses on component interactions, call flow, and component responsibilities. + +## Sequence Diagram +```mermaid +--- +config: + mirrorActors: false +--- +sequenceDiagram + autonumber + actor U as User + participant M as FlattenImportPathMojo + participant F as StreamingXmlFlattener + participant XR as XmlLinkRules + participant CR as CatalogUriResolver + participant RP as FlattenPlanner + participant FP as FlattenPlan + participant FW as FlattenWriter + participant CO as CollisionRegistry + participant FN as FilenameAssigner + participant OR as ResourceOriginNormalizer + participant HS as ContentHasher + participant RM as ReferenceRewriteMap + participant FS as FileSystem + + U->>M: execute(config: FlattenConfig) -> void + + M->>M: parseCollisionPolicy(config) -> CollisionPolicy + + alt collisionPolicy == auto + M->>RP: buildPlan(roots: List, cfg: FlattenConfig) -> FlattenPlan + + loop for each discovered resource link + RP->>CR: resolve(namespace: String, base: String, location: String) -> ResolvedResource + RP->>OR: normalize(resolved: ResolvedResource) -> ResourceOrigin + RP->>XR: detect(path: Deque, start: StartElement) -> LinkReference? + RP->>FN: proposeName(resource: ResolvedResource, root: QName) -> String + RP->>CO: registerCandidate(name: String, identity: IdentityKey, origin: ResourceOrigin) -> CollisionDecision + alt decision requires exact compare + RP->>HS: hashRawBytes(uri: URI) -> ContentHash + RP->>CO: resolveByHash(name: String, hash: ContentHash, identity: IdentityKey) -> CollisionDecision + end + RP->>RM: addRewrite(fromRef: ReferenceRef, toName: String) -> void + end + + RP-->>M: plan: FlattenPlan + M->>M: logPlan(plan: FlattenPlan) -> void + + M->>FW: write(plan: FlattenPlan, cfg: FlattenConfig) -> WriteReport + loop each planned resource + FW->>FS: openRead(uri: URI) -> InputStream + FW->>F: rewriteAndWrite(in: InputStream, rewrites: ReferenceRewriteMap, outName: String) -> WriteResult + F->>XR: detect(path: Deque, start: StartElement) -> LinkReference? + F->>RM: lookup(ref: ReferenceRef) -> String + F->>FS: writeFile(path: Path, bytes/events) -> void + end + FW-->>M: report: WriteReport + + else collisionPolicy in [warn, fail, rename] + M->>F: crawl(roots: List, cfg: FlattenConfig) -> CrawlReport + loop each resolved reference + F->>CR: resolve(namespace: String, base: String, location: String) -> ResolvedResource + F->>OR: normalize(resolved: ResolvedResource) -> ResourceOrigin + F->>FN: proposeName(resource: ResolvedResource, root: QName) -> String + F->>CO: registerOrCheck(name: String, identity: IdentityKey, origin: ResourceOrigin, policy: CollisionPolicy) -> CollisionDecision + alt origin ambiguous or conflict + F->>HS: hashRawBytes(uri: URI) -> ContentHash + F->>CO: compareAndDecide(name: String, hash: ContentHash, policy: CollisionPolicy) -> CollisionDecision + end + F->>FS: writeFile(path: Path, bytes/events) -> void + end + F-->>M: report: CrawlReport + end + + M-->>U: result(report: ExecutionReport) -> void +``` + +## Component Responsibilities +| Component | Type | Responsibility | +|---|---|---| +| `FlattenImportPathMojo` | Existing (change) | Parse collision config, select execution mode (`warn/fail/rename/auto`), orchestrate planner/writer or single-pass flattener, report summary. | +| `CatalogUriResolver` | Existing (change) | Resolve references and return structured resolver result (URI + method + trace + origin hints). | +| `ResolvedResource` | New | Resolver output model (`resolvedUri`, resolution method/trace, optional origin metadata). | +| `ResourceOrigin` | New | Canonical source-origin model for Tier-1 equivalence checks. | +| `ResourceOriginNormalizer` | New | Normalize resolver results into canonical `ResourceOrigin` values. | +| `StreamingXmlFlattener` | Existing (change) | Stream parse/rewrite, recurse resources, apply final filename mapping, preserve comments/whitespace events. | +| `XmlLinkRules` | Existing (newly introduced) | Detect import/include/href points and extract link references from XML start elements. | +| `FlattenPlanner` | New | Stage-1 graph discovery and collision planning for `auto` mode. | +| `FlattenPlan` | New | Final immutable plan: resource graph, filename assignment, rewrite mapping, diagnostics. | +| `FlattenWriter` | New | Stage-2 execution for `auto`: materialize files and rewrites exactly per plan. | +| `CollisionRegistry` | New | Track filename-to-identity/hash bindings and detect/enforce collisions by policy. | +| `CollisionPolicy` | New | Enum + behavior wiring for `warn`, `fail`, `rename`, `auto`. | +| `ContentHasher` | New | Compute raw-byte hashes (lazy/on-demand for Tier-2 exact checks). | +| `FilenameAssigner` | New | Provide readable base names and deterministic hash-suffix names for collision cases. | +| `ReferenceRewriteMap` | New | Store source-reference to final-filename rewrite targets. | +| `SimpleNameCrawlerListener` | Existing (change) | Maintain readable base filename proposal logic for flattened outputs. | +| `CollisionDiagnosticsFormatter` | New | Format collision and planning diagnostics for logs and errors. | +| `CollisionCatalogFixtures` | New (tests) | Test fixtures for ns1/ns2 same-name-different-content catalog scenarios. | +| `FlattenCollisionPolicyTests` | New (tests) | Verify `warn`, `fail`, `rename`, `auto` behavior and rewrite correctness. | +| `OriginVsHashComparisonTests` | New (tests) | Verify Tier-1 origin short-circuit and Tier-2 hash fallback correctness. | + +## Notes +1. `auto` mode is designed for deterministic, zero-backtracking output decisions. +2. Default mode remains filename-preserving for non-collision resources. +3. Tier-1 origin comparison optimizes performance; Tier-2 hashing remains the correctness guard. diff --git a/docs/flatten-file-collision-enhancement.md b/docs/flatten-file-collision-enhancement.md new file mode 100644 index 0000000..b39c1ea --- /dev/null +++ b/docs/flatten-file-collision-enhancement.md @@ -0,0 +1,256 @@ +# Flatten File Collision Enhancement + +## Status +- Proposed +- Owner: `design-builder-maven-plugin` flatten workflow +- Companion technical draft: [Flatten File Collision Design Draft 1](/Users/bertramn/workspaces/fares-io/oss/design-builder/docs/flatten-file-collision-design-draft1.md) + +## Problem Statement +The flatten goal writes resolved XML resources (XSD/WSDL/XSL) into a single target folder. +Today, collisions can happen when two different resolved resources produce the same target filename (for example `Order.xsd`), especially when XML Catalog rules resolve references differently by namespace/system/public/suffix mappings. + +With overwrite behavior enabled, later writes may silently replace earlier content. This can produce broken flattened outputs and difficult-to-diagnose defects. + +## Design Constraints +1. Default behavior should keep flattened filenames readable and unchanged wherever possible. +2. Design tools and downstream consumers should continue to work without unexpected filename changes. +3. Collision detection must use resolved identity and content, not only the requested reference string. +4. Hash-based renamed outputs should be optional. + +## Goals +1. Detect filename collisions deterministically during flattening. +2. Prevent silent data corruption from different content being written to the same output filename. +3. Preserve existing output names by default when no true collision exists. +4. Provide clear diagnostics when catalog resolution causes ambiguity. + +## Non-Goals +1. Preserve byte-for-byte lexical formatting of serialized XML. +2. Replace XML catalog semantics or resolver strategy in this iteration (resolver behavior remains authoritative input). +3. Force global filename renaming in non-collision scenarios. + +## Proposed Behavior + +### Output Identity Tracking +Maintain an in-memory registry during a flatten run: +- Key: output filename (for example `Order.xsd`) +- Value: + - first resolved identity key + - first content hash + - source trace (resolved URI + resolver details) + +Also track: +- resolved identity key to assigned filename mapping + +### Identity Key +Identity key should be resolver-aware: +- canonical resolved URI (required) +- optional resolver trace metadata (if available) + +If resolver trace details are not available, canonical resolved URI remains the base identity. + +### Two-Tier Identity and Comparison Strategy +To improve performance, collision adjudication should use a two-tier strategy: + +1. Tier 1 (cheap): origin equivalence +- Compare normalized resource origin derived from resolver output, for example: + - same local file path + - same jar file path + entry path + - same resolved artifact coordinates/path for Maven-backed resources +- If origins are equivalent, treat as same source candidate without immediate byte hashing. + +2. Tier 2 (exact): content hash on demand +- If origin is different or uncertain, compute hash of raw bytes from resolved URI. +- Use hash comparison to determine whether content is actually equal or conflicting. + +This preserves correctness while avoiding unnecessary hashing in common equivalent-origin cases. + +### Content Hash +Use hash of resolved source bytes (not serialized output) as collision guard. +Prefer lazy hash computation: +1. Compute immediately for resources in known collision groups. +2. Defer for unambiguous resources until needed. + +### Collision Decision +When writing resource `R` to target filename `F`: +1. If `F` not seen before: write and register. +2. If `F` seen and hash matches existing: treat as equivalent content; reuse existing mapping. +3. If `F` seen and hash differs: treat as collision and apply configured policy. + +## Collision Policy +Add new configuration: +- `flatten.collisionPolicy`: `warn | fail | rename | auto` + +Default: +- `warn` (safe migration path with visibility) + +Behavior: +1. `warn` +- Keep first file bound to filename. +- Emit warning with both identities/hashes/resolution traces. +- Continue processing. + +2. `fail` +- Throw `MojoExecutionException` on first differing-content collision. +- No silent corruption. + +3. `rename` +- Keep first file as-is. +- Write conflicting resource with deterministic readable hash suffix: + - `Order__a1b2c3.xsd` (3-byte hex suffix = 6 hex chars) +- Rewrite references to the renamed file. + +4. `auto` +- Use a two-stage execution model (plan, then write). +- Detect all collision groups before writing any output file. +- Keep non-collision filenames unchanged. +- For each collision group, deterministically assign readable hash-suffixed names to all conflicting variants in that group. +- Rewrite all affected references consistently in a single write phase. +- Emit a plan summary before writing. + +## Optional Hash-Suffix Naming +Hash suffix mode is opt-in through `collisionPolicy=rename` or `collisionPolicy=auto`. + +Naming rules: +1. Keep base readable filename. +2. Append short hash suffix only for conflicting variants. +3. Deterministic mapping within a run. +4. If rare short-hash collision occurs, extend suffix length for that filename group. + +## Two-Stage Auto-Resolution Option + +### Motivation +Single-pass flattening can discover collisions late, after an earlier conflicting file has already been written with the plain name. +This creates consistency problems if a later collision requires renaming. + +### Memory Model +The two-stage approach does not require retaining full XML documents in memory. + +Design intent: +1. Stream each resource once during planning. +2. Extract link metadata (`import`/`include` references) while streaming. +3. Compute content hash from streamed raw bytes. +4. Retain only compact metadata: +- resolved identity +- hash +- proposed/assigned filename +- reference edges and rewrite targets + +This keeps memory proportional to resource count and graph metadata, not XML payload size. + +### Performance Model +The planner should be optimized for large dependency graphs: +1. Cache resolver-origin fingerprint per resolved URI. +2. Avoid duplicate fetch/hash for already-seen resolved URIs. +3. Use Tier 1 origin comparison first. +4. Use Tier 2 byte hashing only for ambiguous or conflicting filename groups. + +### Approach +`collisionPolicy=auto` performs flattening in two stages: + +1. Stage 1: Analyze and Plan +- Crawl and resolve full dependency graph without writing final output files. +- Collect: + - resolved identities + - canonical source hashes + - proposed default filenames + - all references that must be rewritten +- Detect filename collision groups. +- Build final deterministic filename assignment: + - keep plain filename for non-collision entries + - assign hash-suffixed names for colliding entries + +2. Stage 2: Write and Rewrite +- Execute writes using the finalized filename plan. +- Rewrite all references against the finalized mapping. +- No backtracking/retroactive rename during write phase. + +### User Visibility +Before Stage 2, log a concise execution plan: +- total resources discovered +- number of collision groups +- each collision group and chosen output names +- whether any hash suffix expansion was required + +## Resolver Contract Enhancement +Enhance resolver output from plain string URI to a structured result: +- `resolvedUri` +- `resolutionMethod` (for example entity/uri/public/fallback) +- `resolutionTrace` (best-effort diagnostic details) + +The flattener uses this for: +- identity calculation +- collision diagnostics + +## Backward Compatibility +1. No filename changes in default non-collision scenarios. +2. Existing behavior remains for unique files. +3. Collision handling becomes explicit and configurable. +4. `auto` mode is opt-in and does not alter default behavior. + +## Test Plan + +### Required New Collision Test +Add a dedicated integration-style flatten test that reproduces same-name/different-content collision using XML Catalog entries. + +Catalog setup: +```xml + + + + +``` + +Fixture setup: +1. `ns1/Order.xsd` and `ns2/Order.xsd` must have different structures/content. +2. Two source schemas import: + - `schemaLocation="urn:ns1:Order.xsd"` + - `schemaLocation="urn:ns2:Order.xsd"` +3. Flatten target output directory is a single folder. + +Expected assertions: +1. `collisionPolicy=warn` +- Warning emitted for `Order.xsd` collision. +- Build continues. + +2. `collisionPolicy=fail` +- Build fails with clear collision details. + +3. `collisionPolicy=rename` +- First `Order.xsd` remains. +- Conflicting file written as `Order__.xsd`. +- Rewritten import points to renamed file. + +4. `collisionPolicy=auto` +- No files written during planning stage. +- Plan reports a collision group for `Order.xsd`. +- Final write uses consistent deterministic names for both variants (for example `Order__a1b2c3.xsd` and `Order__d4e5f6.xsd`). +- All rewritten references point to planned names. + +### Additional Tests +1. Same filename + same content from two references should not warn/fail. +2. Deterministic naming across repeated runs. +3. Diagnostic message includes both resolved URIs. + +## Implementation Outline +1. Introduce `CollisionRegistry` in flatten pipeline. +2. Add resolver result model (`ResolvedResource`). +3. Add collision policy config parsing in `FlattenImportPathMojo`. +4. Add planning model: +- `FlattenPlan` (resource graph + filename assignments + rewrite map) +- `FlattenPlanner` (stage 1) +- `FlattenWriter` (stage 2) +5. Add origin fingerprint model: +- `ResourceOrigin` (file/jar/artifact/path details) +- comparison helper for Tier 1 equivalence checks +6. Integrate two-tier comparison (origin first, hash on demand). +7. Integrate single-pass behavior for `warn|fail|rename`; two-stage behavior for `auto`. +8. Add tests for warn/fail/rename/auto policies and Tier 1/Tier 2 comparison paths. + +## Observability +Log collision diagnostics with: +- target filename +- first and second resolved URI +- first and second content hash +- selected collision policy +- final output filename chosen +- planning summary (for `auto`)