diff --git a/0002-fix-XmlStreamReader-can-not-parse-XML-document-with-multi-line.patch b/0002-fix-XmlStreamReader-can-not-parse-XML-document-with-multi-line.patch new file mode 100644 index 0000000000000000000000000000000000000000..fc214e92d6e432619eb0aba3ff962c7204ff9505 --- /dev/null +++ b/0002-fix-XmlStreamReader-can-not-parse-XML-document-with-multi-line.patch @@ -0,0 +1,110 @@ +From de897fa292afdb9e07d2a4a5fc8e7b1eb1898890 Mon Sep 17 00:00:00 2001 +From: wang--ge +Date: Mon, 9 Dec 2024 17:00:12 +0800 +Subject: [PATCH] XmlStreamReader can't parse XML document with multi-line + +--- + src/changes/changes.xml | 3 +++ + .../apache/commons/io/input/XmlStreamReader.java | 16 +++++++++++----- + .../commons/io/input/XmlStreamReaderTest.java | 10 ++++++++++ + 3 files changed, 24 insertions(+), 5 deletions(-) + +diff --git a/src/changes/changes.xml b/src/changes/changes.xml +index 9657c77..2928337 100644 +--- a/src/changes/changes.xml ++++ b/src/changes/changes.xml +@@ -79,6 +79,9 @@ The type attribute can be add,update,fix,remove. + + + ++ ++ XmlStreamReader can't parse XML document with multi-line prolog #550. ++ + + XmlStreamReader encoding match RE is too strict. + +diff --git a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java +index 2b9b379..ff16987 100644 +--- a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java ++++ b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java +@@ -214,6 +214,16 @@ public class XmlStreamReader extends Reader { + *

+ * See also the XML specification. + *

++ *

++ * Note the documented pattern is: ++ *

++ *
++     * EncName   ::=   [A-Za-z] ([A-Za-z0-9._] | '-')*
++     * 
++ *

++ * However this does not match all the aliases that are supported by Java. ++ * For example, '437', 'ISO_8859-1:1987' and 'ebcdic-de-273+euro'. ++ *

+ */ + public static final Pattern ENCODING_PATTERN = Pattern.compile( + // @formatter:off +@@ -223,10 +233,6 @@ public class XmlStreamReader extends Reader { + + "((?:\"[A-Za-z0-9][A-Za-z0-9._+:-]*\")" // double-quoted + + "|(?:'[A-Za-z0-9][A-Za-z0-9._+:-]*'))", // single-quoted + Pattern.MULTILINE); +- // N.B. the documented pattern is +- // EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* +- // However this does not match all the aliases that are supported by Java. +- // e.g. '437', 'ISO_8859-1:1987' and 'ebcdic-de-273+euro' + // @formatter:on + + private static final String RAW_EX_1 = "Illegal encoding, BOM [{0}] XML guess [{1}] XML prolog [{2}] encoding mismatch"; +@@ -325,7 +331,7 @@ public class XmlStreamReader extends Reader { + inputStream.reset(); + final BufferedReader bReader = new BufferedReader(new StringReader(xmlProlog.substring(0, firstGT + 1))); + final StringBuilder prolog = new StringBuilder(); +- IOConsumer.forEach(bReader.lines(), prolog::append); ++ IOConsumer.forEach(bReader.lines(), l -> prolog.append(l).append(' ')); + final Matcher m = ENCODING_PATTERN.matcher(prolog); + if (m.find()) { + encoding = m.group(1).toUpperCase(Locale.ROOT); +diff --git a/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java b/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java +index 63d587a..de986c9 100644 +--- a/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java ++++ b/src/test/java/org/apache/commons/io/input/XmlStreamReaderTest.java +@@ -60,6 +60,8 @@ public class XmlStreamReaderTest { + private static final String UTF_32LE = "UTF-32LE"; + private static final String UTF_32BE = "UTF-32BE"; + private static final String UTF_8 = StandardCharsets.UTF_8.name(); ++ ++ private static final String XML6 = "xml-prolog-encoding-new-line"; + private static final String XML5 = "xml-prolog-encoding-spaced-single-quotes"; + private static final String XML4 = "xml-prolog-encoding-single-quotes"; + private static final String XML3 = "xml-prolog-encoding-double-quotes"; +@@ -102,6 +104,8 @@ public class XmlStreamReaderTest { + + private static final MessageFormat XML_WITH_PROLOG = new MessageFormat( + "\n{2}"); ++ private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_NEW_LINES = new MessageFormat( ++ "\n{2}"); + + private static final MessageFormat XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES = new MessageFormat( + "\n{2}"); +@@ -123,6 +127,7 @@ public class XmlStreamReaderTest { + XMLs.put(XML3, XML_WITH_PROLOG_AND_ENCODING_DOUBLE_QUOTES); + XMLs.put(XML4, XML_WITH_PROLOG_AND_ENCODING_SINGLE_QUOTES); + XMLs.put(XML5, XML_WITH_PROLOG_AND_ENCODING_SPACED_SINGLE_QUOTES); ++ XMLs.put(XML6, XML_WITH_PROLOG_AND_ENCODING_NEW_LINES); + } + + /** +@@ -624,5 +629,10 @@ public class XmlStreamReaderTest { + xmlReader = new XmlStreamReader(is); + assertEquals(xmlReader.getEncoding(), encoding); + xmlReader.close(); ++ ++ is = getXmlInputStream("no-bom", XML6, encoding, encoding); ++ xmlReader = new XmlStreamReader(is); ++ assertEquals(xmlReader.getEncoding(), encoding); ++ xmlReader.close(); + } + } +-- +2.46.0 + diff --git a/apache-commons-io.spec b/apache-commons-io.spec index ed4d31cad8c1aef87e4ee0c7e86ab4e0d986e575..ef218f1290c9aef4f7e3ee45c32fdbcb1361df8a 100644 --- a/apache-commons-io.spec +++ b/apache-commons-io.spec @@ -1,12 +1,13 @@ Name: apache-commons-io Epoch: 1 Version: 2.15.1 -Release: 1 +Release: 2 Summary: A library of utilities for developing IO functionality. License: ASL 2.0 URL: http://commons.apache.org/proper/commons-io Source0: http://archive.apache.org/dist/commons/io/source/commons-io-%{version}-src.tar.gz Patch0: 0001-remove-undefined-parameter-from-maven-surefire-plugi.patch +Patch1: 0002-fix-XmlStreamReader-can-not-parse-XML-document-with-multi-line.patch BuildArch: noarch BuildRequires: mvn(org.apache.maven.plugins:maven-antrun-plugin) maven-local BuildRequires: mvn(org.apache.commons:commons-parent:pom:) mvn(junit:junit) @@ -66,6 +67,9 @@ xmvn test --batch-mode --offline verify %doc RELEASE-NOTES.txt %changelog +* Mon Dec 09 2024 Ge Wang - 1:2.15.1-2 +- Fix XmlStreamReader parse flaw + * Tue Nov 12 2024 Ge Wang - 1:2.15.1-1 - Update to version 2.15.1