summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--gnu/local.mk4
-rw-r--r--gnu/packages/patches/libxml2-parent-pointers.patch228
-rw-r--r--gnu/packages/patches/libxml2-terminating-newline.patch33
-rw-r--r--gnu/packages/patches/libxml2-xpath-recursion-limit.patch20
-rw-r--r--gnu/packages/patches/python-libxml2-python39-compat.patch94
-rw-r--r--gnu/packages/xml.scm25
6 files changed, 300 insertions, 104 deletions
diff --git a/gnu/local.mk b/gnu/local.mk
index 239a32d1fd..45abbd6223 100644
--- a/gnu/local.mk
+++ b/gnu/local.mk
@@ -1356,6 +1356,9 @@ dist_patch_DATA =						\
   %D%/packages/patches/libutils-remove-damaging-includes.patch	\
   %D%/packages/patches/libvdpau-va-gl-unbundle.patch		\
   %D%/packages/patches/libvpx-CVE-2016-2818.patch		\
+  %D%/packages/patches/libxml2-parent-pointers.patch		\
+  %D%/packages/patches/libxml2-terminating-newline.patch	\
+  %D%/packages/patches/libxml2-xpath-recursion-limit.patch	\
   %D%/packages/patches/libxml2-xpath0-Add-option-xpath0.patch	\
   %D%/packages/patches/libxslt-generated-ids.patch		\
   %D%/packages/patches/libxt-guix-search-paths.patch		\
@@ -1587,7 +1590,6 @@ dist_patch_DATA =						\
   %D%/packages/patches/python-pep8-stdlib-tokenize-compat.patch \
   %D%/packages/patches/python-pyfakefs-remove-bad-test.patch	\
   %D%/packages/patches/python-flint-includes.patch		\
-  %D%/packages/patches/python-libxml2-python39-compat.patch	\
   %D%/packages/patches/python-libxml2-utf8.patch		\
   %D%/packages/patches/python-matplotlib-run-under-wayland-gtk3.patch	\
   %D%/packages/patches/python-mediafile-wavpack.patch		\
diff --git a/gnu/packages/patches/libxml2-parent-pointers.patch b/gnu/packages/patches/libxml2-parent-pointers.patch
new file mode 100644
index 0000000000..1f0615c512
--- /dev/null
+++ b/gnu/packages/patches/libxml2-parent-pointers.patch
@@ -0,0 +1,228 @@
+Fix a regression in 2.9.12 where some corrupt XML structures were handled
+incorrectly:
+
+  https://gitlab.gnome.org/GNOME/libxml2/-/issues/255
+
+This is an amalgamation of these upstream commits:
+
+  https://gitlab.gnome.org/GNOME/libxml2/-/commit/85b1792e37b131e7a51af98a37f92472e8de5f3f
+  https://gitlab.gnome.org/GNOME/libxml2/-/commit/13ad8736d294536da4cbcd70a96b0a2fbf47070c
+
+diff --git a/HTMLtree.c b/HTMLtree.c
+--- a/HTMLtree.c
++++ b/HTMLtree.c
+@@ -744,7 +744,7 @@ void
+ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+ 	                 xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
+                          int format) {
+-    xmlNodePtr root;
++    xmlNodePtr root, parent;
+     xmlAttrPtr attr;
+     const htmlElemDesc * info;
+ 
+@@ -755,6 +755,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+     }
+ 
+     root = cur;
++    parent = cur->parent;
+     while (1) {
+         switch (cur->type) {
+         case XML_HTML_DOCUMENT_NODE:
+@@ -762,13 +763,25 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+             if (((xmlDocPtr) cur)->intSubset != NULL) {
+                 htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
+             }
+-            if (cur->children != NULL) {
++            /* Always validate cur->parent when descending. */
++            if ((cur->parent == parent) && (cur->children != NULL)) {
++                parent = cur;
+                 cur = cur->children;
+                 continue;
+             }
+             break;
+ 
+         case XML_ELEMENT_NODE:
++            /*
++             * Some users like lxml are known to pass nodes with a corrupted
++             * tree structure. Fall back to a recursive call to handle this
++             * case.
++             */
++            if ((cur->parent != parent) && (cur->children != NULL)) {
++                htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
++                break;
++            }
++
+             /*
+              * Get specific HTML info for that node.
+              */
+@@ -817,6 +830,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+                     (cur->name != NULL) &&
+                     (cur->name[0] != 'p')) /* p, pre, param */
+                     xmlOutputBufferWriteString(buf, "\n");
++                parent = cur;
+                 cur = cur->children;
+                 continue;
+             }
+@@ -825,9 +839,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+                 (info != NULL) && (!info->isinline)) {
+                 if ((cur->next->type != HTML_TEXT_NODE) &&
+                     (cur->next->type != HTML_ENTITY_REF_NODE) &&
+-                    (cur->parent != NULL) &&
+-                    (cur->parent->name != NULL) &&
+-                    (cur->parent->name[0] != 'p')) /* p, pre, param */
++                    (parent != NULL) &&
++                    (parent->name != NULL) &&
++                    (parent->name[0] != 'p')) /* p, pre, param */
+                     xmlOutputBufferWriteString(buf, "\n");
+             }
+ 
+@@ -842,9 +856,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+                 break;
+             if (((cur->name == (const xmlChar *)xmlStringText) ||
+                  (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
+-                ((cur->parent == NULL) ||
+-                 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
+-                  (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
++                ((parent == NULL) ||
++                 ((xmlStrcasecmp(parent->name, BAD_CAST "script")) &&
++                  (xmlStrcasecmp(parent->name, BAD_CAST "style"))))) {
+                 xmlChar *buffer;
+ 
+                 buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
+@@ -902,13 +916,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+                 break;
+             }
+ 
+-            /*
+-             * The parent should never be NULL here but we want to handle
+-             * corrupted documents gracefully.
+-             */
+-            if (cur->parent == NULL)
+-                return;
+-            cur = cur->parent;
++            cur = parent;
++            /* cur->parent was validated when descending. */
++            parent = cur->parent;
+ 
+             if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
+                 (cur->type == XML_DOCUMENT_NODE)) {
+@@ -939,9 +949,9 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+                     (cur->next != NULL)) {
+                     if ((cur->next->type != HTML_TEXT_NODE) &&
+                         (cur->next->type != HTML_ENTITY_REF_NODE) &&
+-                        (cur->parent != NULL) &&
+-                        (cur->parent->name != NULL) &&
+-                        (cur->parent->name[0] != 'p')) /* p, pre, param */
++                        (parent != NULL) &&
++                        (parent->name != NULL) &&
++                        (parent->name[0] != 'p')) /* p, pre, param */
+                         xmlOutputBufferWriteString(buf, "\n");
+                 }
+             }
+diff --git a/xmlsave.c b/xmlsave.c
+--- a/xmlsave.c
++++ b/xmlsave.c
+@@ -847,7 +847,7 @@ htmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+ static void
+ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+     int format = ctxt->format;
+-    xmlNodePtr tmp, root, unformattedNode = NULL;
++    xmlNodePtr tmp, root, unformattedNode = NULL, parent;
+     xmlAttrPtr attr;
+     xmlChar *start, *end;
+     xmlOutputBufferPtr buf;
+@@ -856,6 +856,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+     buf = ctxt->buf;
+ 
+     root = cur;
++    parent = cur->parent;
+     while (1) {
+         switch (cur->type) {
+         case XML_DOCUMENT_NODE:
+@@ -868,7 +869,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+             break;
+ 
+         case XML_DOCUMENT_FRAG_NODE:
+-            if (cur->children != NULL) {
++            /* Always validate cur->parent when descending. */
++            if ((cur->parent == parent) && (cur->children != NULL)) {
++                parent = cur;
+                 cur = cur->children;
+                 continue;
+             }
+@@ -887,7 +890,18 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+             break;
+ 
+         case XML_ELEMENT_NODE:
+-	    if ((cur != root) && (ctxt->format == 1) && (xmlIndentTreeOutput))
++            /*
++             * Some users like lxml are known to pass nodes with a corrupted
++             * tree structure. Fall back to a recursive call to handle this
++             * case.
++             */
++            if ((cur->parent != parent) && (cur->children != NULL)) {
++                xmlNodeDumpOutputInternal(ctxt, cur);
++                break;
++            }
++
++	    if ((ctxt->level > 0) && (ctxt->format == 1) &&
++                (xmlIndentTreeOutput))
+ 		xmlOutputBufferWrite(buf, ctxt->indent_size *
+ 				     (ctxt->level > ctxt->indent_nr ?
+ 				      ctxt->indent_nr : ctxt->level),
+@@ -942,6 +956,7 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+                 xmlOutputBufferWrite(buf, 1, ">");
+                 if (ctxt->format == 1) xmlOutputBufferWrite(buf, 1, "\n");
+                 if (ctxt->level >= 0) ctxt->level++;
++                parent = cur;
+                 cur = cur->children;
+                 continue;
+             }
+@@ -1058,13 +1073,9 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+                 break;
+             }
+ 
+-            /*
+-             * The parent should never be NULL here but we want to handle
+-             * corrupted documents gracefully.
+-             */
+-            if (cur->parent == NULL)
+-                return;
+-            cur = cur->parent;
++            cur = parent;
++            /* cur->parent was validated when descending. */
++            parent = cur->parent;
+ 
+             if (cur->type == XML_ELEMENT_NODE) {
+                 if (ctxt->level > 0) ctxt->level--;
+diff --git a/xmlsave.c b/xmlsave.c
+--- a/xmlsave.c
++++ b/xmlsave.c
+@@ -890,6 +890,13 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+             break;
+ 
+         case XML_ELEMENT_NODE:
++	    if ((cur != root) && (ctxt->format == 1) &&
++                (xmlIndentTreeOutput))
++		xmlOutputBufferWrite(buf, ctxt->indent_size *
++				     (ctxt->level > ctxt->indent_nr ?
++				      ctxt->indent_nr : ctxt->level),
++				     ctxt->indent);
++
+             /*
+              * Some users like lxml are known to pass nodes with a corrupted
+              * tree structure. Fall back to a recursive call to handle this
+@@ -900,13 +907,6 @@ xmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
+                 break;
+             }
+ 
+-	    if ((ctxt->level > 0) && (ctxt->format == 1) &&
+-                (xmlIndentTreeOutput))
+-		xmlOutputBufferWrite(buf, ctxt->indent_size *
+-				     (ctxt->level > ctxt->indent_nr ?
+-				      ctxt->indent_nr : ctxt->level),
+-				     ctxt->indent);
+-
+             xmlOutputBufferWrite(buf, 1, "<");
+             if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
+                 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
diff --git a/gnu/packages/patches/libxml2-terminating-newline.patch b/gnu/packages/patches/libxml2-terminating-newline.patch
new file mode 100644
index 0000000000..3f5c88dd4e
--- /dev/null
+++ b/gnu/packages/patches/libxml2-terminating-newline.patch
@@ -0,0 +1,33 @@
+Fix a regression in 2.9.12 where serializing empty HTML documents would
+not add a terminating newline.
+
+  https://gitlab.gnome.org/GNOME/libxml2/-/issues/266
+
+Taken from upstream:
+
+  https://gitlab.gnome.org/GNOME/libxml2/-/commit/92d9ab4c28842a09ca2b76d3ff2f933e01b6cd6f
+
+diff --git a/HTMLtree.c b/HTMLtree.c
+--- a/HTMLtree.c
++++ b/HTMLtree.c
+@@ -763,11 +763,15 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
+             if (((xmlDocPtr) cur)->intSubset != NULL) {
+                 htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
+             }
+-            /* Always validate cur->parent when descending. */
+-            if ((cur->parent == parent) && (cur->children != NULL)) {
+-                parent = cur;
+-                cur = cur->children;
+-                continue;
++            if (cur->children != NULL) {
++                /* Always validate cur->parent when descending. */
++                if (cur->parent == parent) {
++                    parent = cur;
++                    cur = cur->children;
++                    continue;
++                }
++            } else {
++                xmlOutputBufferWriteString(buf, "\n");
+             }
+             break;
+ 
diff --git a/gnu/packages/patches/libxml2-xpath-recursion-limit.patch b/gnu/packages/patches/libxml2-xpath-recursion-limit.patch
new file mode 100644
index 0000000000..051196c635
--- /dev/null
+++ b/gnu/packages/patches/libxml2-xpath-recursion-limit.patch
@@ -0,0 +1,20 @@
+Fix recursion accounting in XPath expressions:
+
+  https://gitlab.gnome.org/GNOME/libxml2/-/issues/264
+
+Taken from upstream:
+
+  https://gitlab.gnome.org/GNOME/libxml2/-/commit/3e1aad4fe584747fd7d17cc7b2863a78e2d21a77
+
+diff --git a/xpath.c b/xpath.c
+--- a/xpath.c
++++ b/xpath.c
+@@ -10983,7 +10983,7 @@ xmlXPathCompileExpr(xmlXPathParserContextPtr ctxt, int sort) {
+     }
+ 
+     if (xpctxt != NULL)
+-        xpctxt->depth -= 1;
++        xpctxt->depth -= 10;
+ }
+ 
+ /**
diff --git a/gnu/packages/patches/python-libxml2-python39-compat.patch b/gnu/packages/patches/python-libxml2-python39-compat.patch
deleted file mode 100644
index a707ea3434..0000000000
--- a/gnu/packages/patches/python-libxml2-python39-compat.patch
+++ /dev/null
@@ -1,94 +0,0 @@
-https://gitlab.gnome.org/GNOME/libxml2/-/commit/e4fb36841800038c289997432ca547c9bfef9db1.patch
-
-From e4fb36841800038c289997432ca547c9bfef9db1 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= <miro@hroncok.cz>
-Date: Fri, 28 Feb 2020 12:48:14 +0100
-Subject: [PATCH] Parenthesize Py<type>_Check() in ifs
-
-In C, if expressions should be parenthesized.
-PyLong_Check, PyUnicode_Check etc. happened to expand to a parenthesized
-expression before, but that's not API to rely on.
-
-Since Python 3.9.0a4 it needs to be parenthesized explicitly.
-
-Fixes https://gitlab.gnome.org/GNOME/libxml2/issues/149
----
- python/libxml.c |  4 ++--
- python/types.c  | 12 ++++++------
- 2 files changed, 8 insertions(+), 8 deletions(-)
-
-diff --git a/python/libxml.c b/python/libxml.c
-index bc676c4e..81e709f3 100644
---- a/python/libxml.c
-+++ b/python/libxml.c
-@@ -294,7 +294,7 @@ xmlPythonFileReadRaw (void * context, char * buffer, int len) {
- 	lenread = PyBytes_Size(ret);
- 	data = PyBytes_AsString(ret);
- #ifdef PyUnicode_Check
--    } else if PyUnicode_Check (ret) {
-+    } else if (PyUnicode_Check (ret)) {
- #if PY_VERSION_HEX >= 0x03030000
-         Py_ssize_t size;
- 	const char *tmp;
-@@ -359,7 +359,7 @@ xmlPythonFileRead (void * context, char * buffer, int len) {
- 	lenread = PyBytes_Size(ret);
- 	data = PyBytes_AsString(ret);
- #ifdef PyUnicode_Check
--    } else if PyUnicode_Check (ret) {
-+    } else if (PyUnicode_Check (ret)) {
- #if PY_VERSION_HEX >= 0x03030000
-         Py_ssize_t size;
- 	const char *tmp;
-diff --git a/python/types.c b/python/types.c
-index c2bafeb1..ed284ec7 100644
---- a/python/types.c
-+++ b/python/types.c
-@@ -602,16 +602,16 @@ libxml_xmlXPathObjectPtrConvert(PyObject *obj)
-     if (obj == NULL) {
-         return (NULL);
-     }
--    if PyFloat_Check (obj) {
-+    if (PyFloat_Check (obj)) {
-         ret = xmlXPathNewFloat((double) PyFloat_AS_DOUBLE(obj));
--    } else if PyLong_Check(obj) {
-+    } else if (PyLong_Check(obj)) {
- #ifdef PyLong_AS_LONG
-         ret = xmlXPathNewFloat((double) PyLong_AS_LONG(obj));
- #else
-         ret = xmlXPathNewFloat((double) PyInt_AS_LONG(obj));
- #endif
- #ifdef PyBool_Check
--    } else if PyBool_Check (obj) {
-+    } else if (PyBool_Check (obj)) {
- 
-         if (obj == Py_True) {
-           ret = xmlXPathNewBoolean(1);
-@@ -620,14 +620,14 @@ libxml_xmlXPathObjectPtrConvert(PyObject *obj)
-           ret = xmlXPathNewBoolean(0);
-         }
- #endif
--    } else if PyBytes_Check (obj) {
-+    } else if (PyBytes_Check (obj)) {
-         xmlChar *str;
- 
-         str = xmlStrndup((const xmlChar *) PyBytes_AS_STRING(obj),
-                          PyBytes_GET_SIZE(obj));
-         ret = xmlXPathWrapString(str);
- #ifdef PyUnicode_Check
--    } else if PyUnicode_Check (obj) {
-+    } else if (PyUnicode_Check (obj)) {
- #if PY_VERSION_HEX >= 0x03030000
-         xmlChar *str;
- 	const char *tmp;
-@@ -650,7 +650,7 @@ libxml_xmlXPathObjectPtrConvert(PyObject *obj)
- 	ret = xmlXPathWrapString(str);
- #endif
- #endif
--    } else if PyList_Check (obj) {
-+    } else if (PyList_Check (obj)) {
-         int i;
-         PyObject *node;
-         xmlNodePtr cur;
--- 
-GitLab
-
diff --git a/gnu/packages/xml.scm b/gnu/packages/xml.scm
index f07f9b91b1..092d238626 100644
--- a/gnu/packages/xml.scm
+++ b/gnu/packages/xml.scm
@@ -184,14 +184,17 @@ hierarchical form with variable field lengths.")
 (define-public libxml2
   (package
     (name "libxml2")
-    (version "2.9.10")
+    (version "2.9.12")
     (source (origin
              (method url-fetch)
              (uri (string-append "ftp://xmlsoft.org/libxml2/libxml2-"
                                  version ".tar.gz"))
              (sha256
               (base32
-               "07xynh8hcxb2yb1fs051xrgszjvj37wnxvxgsj10rzmqzy9y3zma"))))
+               "14hxwzmf5xqppx77z7i0ni9lpzg1a84dqpf8j8l1fvy570g6imn8"))
+             (patches (search-patches "libxml2-parent-pointers.patch"
+                                      "libxml2-terminating-newline.patch"
+                                      "libxml2-xpath-recursion-limit.patch"))))
     (build-system gnu-build-system)
     (outputs '("out" "static" "doc"))
     (arguments
@@ -217,8 +220,7 @@ hierarchical form with variable field lengths.")
                         ;; file such that Libtool does the right thing when both
                         ;; the shared and static variants are available.
                         (substitute* (string-append src "/lib/libxml2.la")
-                          (("^old_library='libxml2.a'") "old_library=''"))
-                        #t))))))
+                          (("^old_library='libxml2.a'") "old_library=''"))))))))
     (home-page "http://www.xmlsoft.org/")
     (synopsis "C parser for XML")
     (inputs `(("xz" ,xz)))
@@ -374,8 +376,7 @@ It uses libxml2 to access the XML files.")
     (source (origin
               (inherit (package-source libxml2))
               (patches
-                (append (search-patches "python-libxml2-python39-compat.patch"
-                                        "python-libxml2-utf8.patch")
+                (append (search-patches "python-libxml2-utf8.patch")
                         (origin-patches (package-source libxml2))))))
     (build-system python-build-system)
     (outputs '("out"))
@@ -397,8 +398,7 @@ It uses libxml2 to access the XML files.")
                  (format #f "ROOT = r'~a'" libxml2))
                 ;; For 'iconv.h'.
                 (("/opt/include")
-                 (string-append glibc "/include"))))
-            #t)))))
+                 (string-append glibc "/include")))))))))
     (inputs `(("libxml2" ,libxml2)))
     (synopsis "Python bindings for the libxml2 library")))
 
@@ -2603,7 +2603,14 @@ because lxml.etree already has its own implementation of XPath 1.0.")
        (method url-fetch)
        (uri (pypi-uri "lxml" version))
        (sha256
-        (base32 "0s14r1w2x9sdlcsw8mxiqgw4rz5zs5lpqpxrfyn4a1mkndqqbdrr"))))
+        (base32 "0s14r1w2x9sdlcsw8mxiqgw4rz5zs5lpqpxrfyn4a1mkndqqbdrr"))
+       ;; Adapt a test to libxml2 2.9.12, taken from this commit:
+       ;; https://github.com/lxml/lxml/commit/852ed1092bd80b6b9a51db24371047e
+       (modules '((guix build utils)))
+       (snippet
+        '(substitute* "src/lxml/tests/test_etree.py"
+             (("self\\.assertEqual\\(\\{'hha': None\\}, el\\.nsmap\\)")
+              "self.assertEqual({}, el.nsmap)")))))
     (build-system python-build-system)
     (arguments
      `(#:phases (modify-phases %standard-phases