diff --git a/0001-Adapt-to-libxml2-2.10.4-and-later.patch b/0001-Adapt-to-libxml2-2.10.4-and-later.patch new file mode 100644 index 0000000000000000000000000000000000000000..5b47d41c6d293be4743fc58a78728b397ae755ba --- /dev/null +++ b/0001-Adapt-to-libxml2-2.10.4-and-later.patch @@ -0,0 +1,251 @@ +From a1669cfdefa1e9762e7a8297f4413ecbf373888d Mon Sep 17 00:00:00 2001 +From: han-guangyu +Date: Mon, 20 May 2024 13:57:13 +0800 +Subject: [PATCH 1/1] Adapt to libxml2 2.10.4 and later + +Change HTML "prefix" handling in ElementPath to let +"element.find('part1:part2')" search for "part1:part2" instead of just +"part2" with an unknown prefix. + +Also adapt the HTML "prefix" parsing test to make it work in libxml2 +2.10.4 and later, where HTML "prefixes" are kept as part of the tag +name by the parser. +--- + CHANGES.txt | 12 ++++++++++++ + src/lxml/_elementpath.py | 21 +++++++++++---------- + src/lxml/apihelpers.pxi | 7 +++++++ + src/lxml/etree.pyx | 8 ++++---- + src/lxml/includes/tree.pxd | 11 +++++++++++ + src/lxml/tests/test_etree.py | 26 ++++++++++++++++++++++---- + 6 files changed, 67 insertions(+), 18 deletions(-) + +diff --git a/CHANGES.txt b/CHANGES.txt +index 4dd1055..0e47581 100644 +--- a/CHANGES.txt ++++ b/CHANGES.txt +@@ -24,6 +24,18 @@ Other changes + + * Built with Cython 0.29.37. + ++Bugs fixed in openEuler ++ ++---------- ++* With libxml2 2.10.4 and later (as provided by the lxml 5.0 binary wheels), ++ parsing HTML tags with "prefixes" no longer builds a namespace dictionary ++ in ``nsmap`` but considers the ``prefix:name`` string the actual tag name. ++ With older libxml2 versions, since 2.9.11, the prefix was removed. Before ++ that, the prefix was parsed as XML prefix. ++ ++ lxml 5.0 does not try to hide this difference but now changes the ElementPath ++ implementation to let ``element.find("part1:part2")`` search for the tag ++ ``part1:part2`` in documents parsed as HTML, instead of looking only for ``part2``. + + 4.9.3 (2023-07-05) + ================== +diff --git a/src/lxml/_elementpath.py b/src/lxml/_elementpath.py +index eabd81c..24b8e2b 100644 +--- a/src/lxml/_elementpath.py ++++ b/src/lxml/_elementpath.py +@@ -71,14 +71,14 @@ xpath_tokenizer_re = re.compile( + r"\s+" + ) + +-def xpath_tokenizer(pattern, namespaces=None): ++def xpath_tokenizer(pattern, namespaces=None, with_prefixes=True): + # ElementTree uses '', lxml used None originally. + default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None + parsing_attribute = False + for token in xpath_tokenizer_re.findall(pattern): + ttype, tag = token + if tag and tag[0] != "{": +- if ":" in tag: ++ if ":" in tag and with_prefixes: + prefix, uri = tag.split(":", 1) + try: + if not namespaces: +@@ -251,7 +251,7 @@ ops = { + _cache = {} + + +-def _build_path_iterator(path, namespaces): ++def _build_path_iterator(path, namespaces, with_prefixes=True): + """compile selector pattern""" + if path[-1:] == "/": + path += "*" # implicit all (FIXME: keep this?) +@@ -279,7 +279,7 @@ def _build_path_iterator(path, namespaces): + + if path[:1] == "/": + raise SyntaxError("cannot use absolute path on element") +- stream = iter(xpath_tokenizer(path, namespaces)) ++ stream = iter(xpath_tokenizer(path, namespaces, with_prefixes=with_prefixes)) + try: + _next = stream.next + except AttributeError: +@@ -308,7 +308,8 @@ def _build_path_iterator(path, namespaces): + ## + # Iterate over the matching nodes + +-def iterfind(elem, path, namespaces=None): ++def iterfind(elem, path, namespaces=None, with_prefixes=True): ++ selector = _build_path_iterator(path, namespaces, with_prefixes=with_prefixes) + selector = _build_path_iterator(path, namespaces) + result = iter((elem,)) + for select in selector: +@@ -319,8 +320,8 @@ def iterfind(elem, path, namespaces=None): + ## + # Find first matching object. + +-def find(elem, path, namespaces=None): +- it = iterfind(elem, path, namespaces) ++def find(elem, path, namespaces=None, with_prefixes=True): ++ it = iterfind(elem, path, namespaces, with_prefixes=with_prefixes) + try: + return next(it) + except StopIteration: +@@ -330,15 +331,15 @@ def find(elem, path, namespaces=None): + ## + # Find all matching objects. + +-def findall(elem, path, namespaces=None): ++def findall(elem, path, namespaces=None, with_prefixes=True): + return list(iterfind(elem, path, namespaces)) + + + ## + # Find text for first matching object. + +-def findtext(elem, path, default=None, namespaces=None): +- el = find(elem, path, namespaces) ++def findtext(elem, path, default=None, namespaces=None, with_prefixes=True): ++ el = find(elem, path, namespaces, with_prefixes=with_prefixes) + if el is None: + return default + else: +diff --git a/src/lxml/apihelpers.pxi b/src/lxml/apihelpers.pxi +index 9fae9fb..35b3187 100644 +--- a/src/lxml/apihelpers.pxi ++++ b/src/lxml/apihelpers.pxi +@@ -15,6 +15,13 @@ cdef void displayNode(xmlNode* c_node, indent): + finally: + return # swallow any exceptions + ++cdef inline bint _isHtmlDocument(_Element element) except -1: ++ cdef xmlNode* c_node = element._c_node ++ return ( ++ c_node is not NULL and c_node.doc is not NULL and ++ c_node.doc.properties & tree.XML_DOC_HTML != 0 ++ ) ++ + cdef inline int _assertValidNode(_Element element) except -1: + assert element._c_node is not NULL, u"invalid Element proxy at %s" % id(element) + +diff --git a/src/lxml/etree.pyx b/src/lxml/etree.pyx +index ff8ec9f..a2a776c 100644 +--- a/src/lxml/etree.pyx ++++ b/src/lxml/etree.pyx +@@ -1554,7 +1554,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: + """ + if isinstance(path, QName): + path = (path).text +- return _elementpath.find(self, path, namespaces) ++ return _elementpath.find(self, path, namespaces, with_prefixes=not _isHtmlDocument(self)) + + def findtext(self, path, default=None, namespaces=None): + u"""findtext(self, path, default=None, namespaces=None) +@@ -1567,7 +1567,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: + """ + if isinstance(path, QName): + path = (path).text +- return _elementpath.findtext(self, path, default, namespaces) ++ return _elementpath.findtext(self, path, default, namespaces, with_prefixes=not _isHtmlDocument(self)) + + def findall(self, path, namespaces=None): + u"""findall(self, path, namespaces=None) +@@ -1580,7 +1580,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: + """ + if isinstance(path, QName): + path = (path).text +- return _elementpath.findall(self, path, namespaces) ++ return _elementpath.findall(self, path, namespaces, with_prefixes=not _isHtmlDocument(self)) + + def iterfind(self, path, namespaces=None): + u"""iterfind(self, path, namespaces=None) +@@ -1593,7 +1593,7 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]: + """ + if isinstance(path, QName): + path = (path).text +- return _elementpath.iterfind(self, path, namespaces) ++ return _elementpath.iterfind(self, path, namespaces, with_prefixes=not _isHtmlDocument(self)) + + def xpath(self, _path, *, namespaces=None, extensions=None, + smart_strings=True, **_variables): +diff --git a/src/lxml/includes/tree.pxd b/src/lxml/includes/tree.pxd +index 010af80..5f21329 100644 +--- a/src/lxml/includes/tree.pxd ++++ b/src/lxml/includes/tree.pxd +@@ -153,6 +153,16 @@ cdef extern from "libxml/tree.h": + XML_INTERNAL_PARAMETER_ENTITY= 4 + XML_EXTERNAL_PARAMETER_ENTITY= 5 + XML_INTERNAL_PREDEFINED_ENTITY= 6 ++ ctypedef enum xmlDocProperties: ++ XML_DOC_WELLFORMED = 1 # /* document is XML well formed */ ++ XML_DOC_NSVALID = 2 # /* document is Namespace valid */ ++ XML_DOC_OLD10 = 4 # /* parsed with old XML-1.0 parser */ ++ XML_DOC_DTDVALID = 8 # /* DTD validation was successful */ ++ XML_DOC_XINCLUDE = 16 # /* XInclude substitution was done */ ++ XML_DOC_USERBUILT = 32 # /* Document was built using the API ++ # and not by parsing an instance */ ++ XML_DOC_INTERNAL = 64 # /* built for internal processing */ ++ XML_DOC_HTML = 128 # /* parsed or built HTML document */ + + ctypedef struct xmlNs: + const_xmlChar* href +@@ -274,6 +284,7 @@ cdef extern from "libxml/tree.h": + void* _private + xmlDtd* intSubset + xmlDtd* extSubset ++ int properties + + ctypedef struct xmlAttr: + void* _private +diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py +index 9eab5bf..2afc07e 100644 +--- a/src/lxml/tests/test_etree.py ++++ b/src/lxml/tests/test_etree.py +@@ -3121,11 +3121,29 @@ class ETreeOnlyTestCase(HelperTestCase): + + def test_html_prefix_nsmap(self): + etree = self.etree +- el = etree.HTML('aa').find('.//page-description') +- if etree.LIBXML_VERSION < (2, 9, 11): +- self.assertEqual({'hha': None}, el.nsmap) ++ el = etree.HTML('aa') ++ pd = el[-1] ++ while len(pd): ++ pd = pd[-1] ++ ++ if etree.LIBXML_VERSION >= (2, 10, 4): ++ # "Prefix" is kept as part of the tag name. ++ self.assertEqual("hha:page-description", pd.tag) ++ self.assertIsNone(el.find('.//page-description')) ++ self.assertIsNotNone(el.find('.//hha:page-description')) # no namespaces! ++ for e in el.iter(): ++ self.assertEqual({}, e.nsmap) ++ elif etree.LIBXML_VERSION >= (2, 9, 11): ++ # "Prefix" is stripped. ++ self.assertEqual("page-description", pd.tag) ++ self.assertIsNotNone(el.find('.//page-description')) ++ for e in el.iter(): ++ self.assertEqual({}, e.nsmap) + else: +- self.assertEqual({}, el.nsmap) ++ # "Prefix" is parsed as XML prefix. ++ self.assertEqual("page-description", pd.tag) ++ pd = el.find('.//page-description') ++ self.assertEqual({'hha': None}, pd.nsmap) + + def test_getchildren(self): + Element = self.etree.Element +-- +2.43.0 + diff --git a/Skip-failing-test_iterparse_utf16_bom.patch b/Skip-failing-test_iterparse_utf16_bom.patch deleted file mode 100644 index 55d0aab5b932ec27a42e8ea9684b35ef9e91baaa..0000000000000000000000000000000000000000 --- a/Skip-failing-test_iterparse_utf16_bom.patch +++ /dev/null @@ -1,32 +0,0 @@ -From 1e096eeabcb6f3995c8e9da6f544e7f9f5ff5f08 Mon Sep 17 00:00:00 2001 -From: zhuofeng -Date: Wed, 9 Aug 2023 15:22:11 +0800 -Subject: [PATCH] Skip failing test_iterparse_utf16_bom - ---- - src/lxml/tests/test_io.py | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/src/lxml/tests/test_io.py b/src/lxml/tests/test_io.py -index 8fac41d..2b5d0de 100644 ---- a/src/lxml/tests/test_io.py -+++ b/src/lxml/tests/test_io.py -@@ -4,6 +4,7 @@ IO test cases that apply to both etree and ElementTree - - - import unittest -+from unittest import skip - import tempfile, gzip, os, os.path, gc, shutil - - from .common_imports import ( -@@ -305,6 +306,7 @@ class _IOTestCaseBase(HelperTestCase): - os.unlink(f.name) - self.assertEqual(utext, root.text) - -+ @skip - def test_iterparse_utf16_bom(self): - utext = 'Søk på nettet' - uxml = '

%s

' % utext --- -2.33.0 - diff --git a/lxml-5.1.0.tar.gz b/lxml-4.9.4.tar.gz similarity index 33% rename from lxml-5.1.0.tar.gz rename to lxml-4.9.4.tar.gz index d43cdef635556f8ac67d4e7c6fb5adc8c1ba2329..bf045929a9f3435068d4da19bee336cd8aa56239 100644 Binary files a/lxml-5.1.0.tar.gz and b/lxml-4.9.4.tar.gz differ diff --git a/python-lxml.spec b/python-lxml.spec index e2a59437244122f160dba257723cc7d5cf71158b..5185bd8d99619a9d28bb67a69a62d300be10a5d0 100644 --- a/python-lxml.spec +++ b/python-lxml.spec @@ -6,14 +6,14 @@ the simplicity of a native Python API, mostly compatible but superior to the wel The latest release works with all CPython versions from 2.7 to 3.7. Name: python-lxml -Version: 5.1.0 +Version: 4.9.4 Release: 1 Summary: XML processing library combining libxml2/libxslt with the ElementTree API License: BSD URL: https://github.com/lxml/lxml -Source0: https://files.pythonhosted.org/packages/2b/b4/bbccb250adbee490553b6a52712c46c20ea1ba533a643f1424b27ffc6845/lxml-5.1.0.tar.gz +Source0: https://files.pythonhosted.org/packages/84/14/c2070b5e37c650198de8328467dd3d1681e80986f81ba0fea04fc4ec9883/lxml-4.9.4.tar.gz -Patch0: Skip-failing-test_iterparse_utf16_bom.patch +Patch0: 0001-Adapt-to-libxml2-2.10.4-and-later.patch %description %{_description} @@ -67,7 +67,8 @@ mv %{buildroot}/filelist.lst . mv %{buildroot}/doclist.lst . %check -make test +cp -a build/lib.%{python3_platform}-*/* src/ +python3 test.py %files -n python3-lxml -f filelist.lst %license doc/licenses/*.txt LICENSES.txt @@ -77,6 +78,12 @@ make test %doc README.rst src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt %changelog +* Mon May 13 2024 Han Guangyu - 4.9.4-1 +- Downgrade to 4.9.4 to adapt Cython 0.29.35 in wallaby project and Python 3.11 +- Bug fix +- Built with Cython 0.29.30 to adapt to changes in Python 3.11 and 3.12 +- Wheels include zlib 1.2.12, libxml2 2.9.14 and libxslt 1.1.35 + * Fri Apr 19 2024 zhengting - 5.1.0-1 - upgrade version to 5.1.0: - use version 5.1.0 to fix build error for Cython and OpenStack