From 5d768106b7083ee3d9a66e6e947ecfc8dfeb35f4 Mon Sep 17 00:00:00 2001 From: Nguyễn Gia Phong Date: Thu, 23 Dec 2021 17:43:01 +0700 Subject: Improve formatting --- src/formbox.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/src/formbox.py b/src/formbox.py index cdb0c76..39e6132 100644 --- a/src/formbox.py +++ b/src/formbox.py @@ -17,6 +17,8 @@ from argparse import ArgumentParser from email.header import decode_header +from email.utils import parsedate_to_datetime +from functools import partial from itertools import starmap from mailbox import mbox from pathlib import Path @@ -25,13 +27,10 @@ from urllib.parse import quote from bleach import clean, linkify from markdown import markdown -markdown_tags = ["h1", "h2", "h3", "h4", "h5", "h6", - "b", "i", "strong", "em", "tt", - "p", "br", - "blockquote", "code", "pre", "hr", - "ul", "ol", "li", "dd", "dt", - "a", - "sub", "sup"] +sanitise = partial(clean, tags=('a', 'code', 'em', 'strong', 'sub', 'sup', + 'blockquote', 'p', 'pre', 'ul', 'ol', 'li'), + protocols=('ftp', 'gemini', 'gopher', 'http', 'https', + 'irc', 'ircs', 'mailto', 'matrix', 'xmpp')) def extract(archive, parent): @@ -51,14 +50,13 @@ def decode(header): def render(template, forest, parent): for self, children in forest: message_id = self['Message-Id'] - body = clean(linkify(markdown(self.get_payload())), tags=markdown_tags) - rendered_children = render(template, children, message_id) + date = parsedate_to_datetime(self['Date']).date().isoformat() author, address = decode(self['From']) + body = sanitise(linkify(markdown(self.get_payload(), + output_format='html5'))) + rendered_children = render(template, children, message_id) yield template.format(message_id=quote(message_id), - date=self['Date'], - author=author, - parent=parent, - subject=self['Subject'], body=body, + date=date, author=author, body=body, children='\n'.join(rendered_children)) -- cgit 1.4.1