gnu: python-sgmllib3k: Add Python >=3.9 compatibility.

* gnu/packages/patches/python-sgmllib3k-assertions.patch: New file. * gnu/local.mk: Register it. * gnu/packages/python-xyz.scm (python-sgmllib3k): Use it.
author: Lars-Dominik Braun <lars@6xq.net> 2023-03-18 09:49:51 +0100
committer: Lars-Dominik Braun <lars@6xq.net> 2023-03-18 09:49:51 +0100
commit: cfccd6fe5ae00d7e81cd755be55d51ff3bf17186 (patch)
tree: e933c9e6f2aca0b5583beeba1200a7a7e06b5c6a
parent: cc56be2f3858487cf1d8acfb345942f0784221ee (diff)
download: guix-cfccd6fe5ae00d7e81cd755be55d51ff3bf17186.tar.gz
3 files changed, 224 insertions, 1 deletions
diff --git a/gnu/local.mk b/gnu/local.mk
index ff35978f07..d63e288a48 100644
--- a/gnu/local.mk
+++ b/gnu/local.mk
@@ -1640,6 +1640,7 @@ dist_patch_DATA =						\
   %D%/packages/patches/python-random2-getrandbits-test.patch		\
   %D%/packages/patches/python-poppler-qt5-fix-build.patch	\
   %D%/packages/patches/python-pypdf-annotate-tests-appropriately.patch	\
+  %D%/packages/patches/python-sgmllib3k-assertions.patch	\
   %D%/packages/patches/python-telingo-fix-comparison.patch	\
   %D%/packages/patches/python-w3lib-fix-test-failure.patch	\
   %D%/packages/patches/sdcc-disable-non-free-code.patch		\
diff --git a/gnu/packages/patches/python-sgmllib3k-assertions.patch b/gnu/packages/patches/python-sgmllib3k-assertions.patch
new file mode 100644
index 0000000000..872f1c62c9
--- /dev/null
+++ b/gnu/packages/patches/python-sgmllib3k-assertions.patch
@@ -0,0 +1,221 @@
+Restores compatibility with Python >=3.9,
+which removed the custom .error() method in
+https://github.com/python/cpython/commit/e34bbfd61f405eef89e8aa50672b0b25022de320
+
+Despite the big diff, only a try…except clause is added.
+
+--- source/sgmllib.py	2023-03-18 08:57:58.726240606 +0100
++++ source/sgmllib.py	2023-03-18 09:02:01.667568916 +0100
+@@ -101,113 +101,116 @@
+         """Handle the remaining data."""
+         self.goahead(1)
+ 
+-    def error(self, message):
+-        raise SGMLParseError(message)
+-
+     # Internal -- handle data as far as reasonable.  May leave state
+     # and data to be processed by a subsequent call.  If 'end' is
+     # true, force handling all data as if followed by EOF marker.
+     def goahead(self, end):
+-        rawdata = self.rawdata
+-        i = 0
+-        n = len(rawdata)
+-        while i < n:
+-            if self.nomoretags:
+-                self.handle_data(rawdata[i:n])
+-                i = n
+-                break
+-            match = interesting.search(rawdata, i)
+-            if match: j = match.start()
+-            else: j = n
+-            if i < j:
+-                self.handle_data(rawdata[i:j])
+-            i = j
+-            if i == n: break
+-            if rawdata[i] == '<':
+-                if starttagopen.match(rawdata, i):
++        try:
++            rawdata = self.rawdata
++            i = 0
++            n = len(rawdata)
++            while i < n:
++                if self.nomoretags:
++                    self.handle_data(rawdata[i:n])
++                    i = n
++                    break
++                match = interesting.search(rawdata, i)
++                if match: j = match.start()
++                else: j = n
++                if i < j:
++                    self.handle_data(rawdata[i:j])
++                i = j
++                if i == n: break
++                if rawdata[i] == '<':
++                    if starttagopen.match(rawdata, i):
++                        if self.literal:
++                            self.handle_data(rawdata[i])
++                            i = i+1
++                            continue
++                        k = self.parse_starttag(i)
++                        if k < 0: break
++                        i = k
++                        continue
++                    if rawdata.startswith("</", i):
++                        k = self.parse_endtag(i)
++                        if k < 0: break
++                        i = k
++                        self.literal = 0
++                        continue
++                    if self.literal:
++                        if n > (i + 1):
++                            self.handle_data("<")
++                            i = i+1
++                        else:
++                            # incomplete
++                            break
++                        continue
++                    if rawdata.startswith("<!--", i):
++                            # Strictly speaking, a comment is --.*--
++                            # within a declaration tag <!...>.
++                            # This should be removed,
++                            # and comments handled only in parse_declaration.
++                        k = self.parse_comment(i)
++                        if k < 0: break
++                        i = k
++                        continue
++                    if rawdata.startswith("<?", i):
++                        k = self.parse_pi(i)
++                        if k < 0: break
++                        i = i+k
++                        continue
++                    if rawdata.startswith("<!", i):
++                        # This is some sort of declaration; in "HTML as
++                        # deployed," this should only be the document type
++                        # declaration ("<!DOCTYPE html...>").
++                        k = self.parse_declaration(i)
++                        if k < 0: break
++                        i = k
++                        continue
++                elif rawdata[i] == '&':
+                     if self.literal:
+                         self.handle_data(rawdata[i])
+                         i = i+1
+                         continue
+-                    k = self.parse_starttag(i)
+-                    if k < 0: break
+-                    i = k
+-                    continue
+-                if rawdata.startswith("</", i):
+-                    k = self.parse_endtag(i)
+-                    if k < 0: break
+-                    i = k
+-                    self.literal = 0
+-                    continue
+-                if self.literal:
+-                    if n > (i + 1):
+-                        self.handle_data("<")
+-                        i = i+1
+-                    else:
+-                        # incomplete
+-                        break
+-                    continue
+-                if rawdata.startswith("<!--", i):
+-                        # Strictly speaking, a comment is --.*--
+-                        # within a declaration tag <!...>.
+-                        # This should be removed,
+-                        # and comments handled only in parse_declaration.
+-                    k = self.parse_comment(i)
+-                    if k < 0: break
+-                    i = k
+-                    continue
+-                if rawdata.startswith("<?", i):
+-                    k = self.parse_pi(i)
+-                    if k < 0: break
+-                    i = i+k
+-                    continue
+-                if rawdata.startswith("<!", i):
+-                    # This is some sort of declaration; in "HTML as
+-                    # deployed," this should only be the document type
+-                    # declaration ("<!DOCTYPE html...>").
+-                    k = self.parse_declaration(i)
+-                    if k < 0: break
+-                    i = k
+-                    continue
+-            elif rawdata[i] == '&':
+-                if self.literal:
++                    match = charref.match(rawdata, i)
++                    if match:
++                        name = match.group(1)
++                        self.handle_charref(name)
++                        i = match.end(0)
++                        if rawdata[i-1] != ';': i = i-1
++                        continue
++                    match = entityref.match(rawdata, i)
++                    if match:
++                        name = match.group(1)
++                        self.handle_entityref(name)
++                        i = match.end(0)
++                        if rawdata[i-1] != ';': i = i-1
++                        continue
++                else:
++                    self.error('neither < nor & ??')
++                # We get here only if incomplete matches but
++                # nothing else
++                match = incomplete.match(rawdata, i)
++                if not match:
+                     self.handle_data(rawdata[i])
+                     i = i+1
+                     continue
+-                match = charref.match(rawdata, i)
+-                if match:
+-                    name = match.group(1)
+-                    self.handle_charref(name)
+-                    i = match.end(0)
+-                    if rawdata[i-1] != ';': i = i-1
+-                    continue
+-                match = entityref.match(rawdata, i)
+-                if match:
+-                    name = match.group(1)
+-                    self.handle_entityref(name)
+-                    i = match.end(0)
+-                    if rawdata[i-1] != ';': i = i-1
+-                    continue
+-            else:
+-                self.error('neither < nor & ??')
+-            # We get here only if incomplete matches but
+-            # nothing else
+-            match = incomplete.match(rawdata, i)
+-            if not match:
+-                self.handle_data(rawdata[i])
+-                i = i+1
+-                continue
+-            j = match.end(0)
+-            if j == n:
+-                break # Really incomplete
+-            self.handle_data(rawdata[i:j])
+-            i = j
+-        # end while
+-        if end and i < n:
+-            self.handle_data(rawdata[i:n])
+-            i = n
+-        self.rawdata = rawdata[i:]
+-        # XXX if end: check for empty stack
++                j = match.end(0)
++                if j == n:
++                    break # Really incomplete
++                self.handle_data(rawdata[i:j])
++                i = j
++            # end while
++            if end and i < n:
++                self.handle_data(rawdata[i:n])
++                i = n
++            self.rawdata = rawdata[i:]
++            # XXX if end: check for empty stack
++        except AssertionError as e:
++            # The .error() method, which threw the custom SGMLParseError was removed
++            # by https://github.com/python/cpython/issues/76025. So we have to catch
++            # _markupbase’s AssertionError and translate it into the old one.
++            raise SGMLParseError (e.args[0]) from e
+ 
+     # Extensions for the DOCTYPE scanner:
+     _decl_otherchars = '='
diff --git a/gnu/packages/python-xyz.scm b/gnu/packages/python-xyz.scm
index 941a74cc75..d637b073df 100644
--- a/gnu/packages/python-xyz.scm
+++ b/gnu/packages/python-xyz.scm
@@ -29072,7 +29072,8 @@ supports x86_64 instructions up to AVX-512 and SHA.")
          (file-name (git-file-name name version))
          (sha256
           (base32
-           "0bzf6pv85dzfxfysm6zbj8m40hp0xzr9h8qlk4hp3nmy88rznqvr"))))
+           "0bzf6pv85dzfxfysm6zbj8m40hp0xzr9h8qlk4hp3nmy88rznqvr"))
+         (patches (search-patches "python-sgmllib3k-assertions.patch"))))
       (build-system python-build-system)
       (home-page "https://github.com/hsoft/sgmllib")
       (synopsis "Python 3 port of sgmllib")
author	Lars-Dominik Braun <lars@6xq.net>	2023-03-18 09:49:51 +0100
committer	Lars-Dominik Braun <lars@6xq.net>	2023-03-18 09:49:51 +0100
commit	cfccd6fe5ae00d7e81cd755be55d51ff3bf17186 (patch)
tree	e933c9e6f2aca0b5583beeba1200a7a7e06b5c6a
parent	cc56be2f3858487cf1d8acfb345942f0784221ee (diff)
download	guix-cfccd6fe5ae00d7e81cd755be55d51ff3bf17186.tar.gz