From 0f62bd4871c66903744baea93f650923a0b40af0 Mon Sep 17 00:00:00 2001 From: Nguyễn Gia Phong Date: Wed, 9 Nov 2022 17:44:59 +0900 Subject: Support Atom XHTML content --- src/fead.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'src/fead.py') diff --git a/src/fead.py b/src/fead.py index 6efbe11..600dff0 100755 --- a/src/fead.py +++ b/src/fead.py @@ -33,7 +33,8 @@ from textwrap import shorten from urllib.error import HTTPError from urllib.parse import urljoin, urlsplit from warnings import warn -from xml.etree.ElementTree import fromstring as parse_xml +from xml.etree.ElementTree import (fromstring as parse_xml, + tostring as unparse_xml) REQUEST = 'GET {} HTTP/1.0\r\nHost: {}\r\n\r\n' HTML_TAG = regex('<.+?>') @@ -119,7 +120,11 @@ def parse_atom_entry(xml): elif child.tag.endswith('Atom}summary'): summary = child.text elif child.tag.endswith('Atom}content') and not summary: - summary = child.text + if child.attrib.get('type') == 'xhtml': + assert len(child) == 1 and child[0].tag.endswith('xhtml}div') + summary = unparse_xml(child[0]).decode() + else: + summary = child.text return title, link, time, summary -- cgit 1.4.1