Support Atom XHTML content

author: Nguyễn Gia Phong <mcsinyx@disroot.org> 2022-11-09 17:44:59 +0900
committer: Nguyễn Gia Phong <mcsinyx@disroot.org> 2022-11-09 17:47:33 +0900
commit: 0f62bd4871c66903744baea93f650923a0b40af0 (patch)
tree: dbce82bb375ff6f162e26128d6a919246264d59d
parent: f88e73b0387c65d6440a3ca2540e79d79c555b48 (diff)
download: fead-0f62bd4871c66903744baea93f650923a0b40af0.tar.gz
1 files changed, 7 insertions, 2 deletions
diff --git a/src/fead.py b/src/fead.py
index 6efbe11..600dff0 100755
--- a/src/fead.py
+++ b/src/fead.py
@@ -33,7 +33,8 @@ from textwrap import shorten
 from urllib.error import HTTPError
 from urllib.parse import urljoin, urlsplit
 from warnings import warn
-from xml.etree.ElementTree import fromstring as parse_xml
+from xml.etree.ElementTree import (fromstring as parse_xml,
+                                   tostring as unparse_xml)
 
 REQUEST = 'GET {} HTTP/1.0\r\nHost: {}\r\n\r\n'
 HTML_TAG = regex('<.+?>')
@@ -119,7 +120,11 @@ def parse_atom_entry(xml):
         elif child.tag.endswith('Atom}summary'):
             summary = child.text
         elif child.tag.endswith('Atom}content') and not summary:
-            summary = child.text
+            if child.attrib.get('type') == 'xhtml':
+                assert len(child) == 1 and child[0].tag.endswith('xhtml}div')
+                summary = unparse_xml(child[0]).decode()
+            else:
+                summary = child.text
     return title, link, time, summary
author	Nguyễn Gia Phong <mcsinyx@disroot.org>	2022-11-09 17:44:59 +0900
committer	Nguyễn Gia Phong <mcsinyx@disroot.org>	2022-11-09 17:47:33 +0900
commit	0f62bd4871c66903744baea93f650923a0b40af0 (patch)
tree	dbce82bb375ff6f162e26128d6a919246264d59d
parent	f88e73b0387c65d6440a3ca2540e79d79c555b48 (diff)
download	fead-0f62bd4871c66903744baea93f650923a0b40af0.tar.gz