diff options
author | Nguyễn Gia Phong <mcsinyx@disroot.org> | 2021-12-23 17:43:01 +0700 |
---|---|---|
committer | Nguyễn Gia Phong <mcsinyx@disroot.org> | 2021-12-23 17:43:01 +0700 |
commit | 5d768106b7083ee3d9a66e6e947ecfc8dfeb35f4 (patch) | |
tree | c813f8f749355cbb51e372ec0f89fcfbdc6bb92d /src | |
parent | b886e5f8a11ce4a4f82c0b79453ac42c171599a3 (diff) | |
download | formbox-e83e289a6c39b390b9a4bb79e31f5e9a75d28d81.tar.gz |
Improve formatting 0.0.1
Diffstat (limited to 'src')
-rw-r--r-- | src/formbox.py | 24 |
1 files changed, 11 insertions, 13 deletions
diff --git a/src/formbox.py b/src/formbox.py index cdb0c76..39e6132 100644 --- a/src/formbox.py +++ b/src/formbox.py @@ -17,6 +17,8 @@ from argparse import ArgumentParser from email.header import decode_header +from email.utils import parsedate_to_datetime +from functools import partial from itertools import starmap from mailbox import mbox from pathlib import Path @@ -25,13 +27,10 @@ from urllib.parse import quote from bleach import clean, linkify from markdown import markdown -markdown_tags = ["h1", "h2", "h3", "h4", "h5", "h6", - "b", "i", "strong", "em", "tt", - "p", "br", - "blockquote", "code", "pre", "hr", - "ul", "ol", "li", "dd", "dt", - "a", - "sub", "sup"] +sanitise = partial(clean, tags=('a', 'code', 'em', 'strong', 'sub', 'sup', + 'blockquote', 'p', 'pre', 'ul', 'ol', 'li'), + protocols=('ftp', 'gemini', 'gopher', 'http', 'https', + 'irc', 'ircs', 'mailto', 'matrix', 'xmpp')) def extract(archive, parent): @@ -51,14 +50,13 @@ def decode(header): def render(template, forest, parent): for self, children in forest: message_id = self['Message-Id'] - body = clean(linkify(markdown(self.get_payload())), tags=markdown_tags) - rendered_children = render(template, children, message_id) + date = parsedate_to_datetime(self['Date']).date().isoformat() author, address = decode(self['From']) + body = sanitise(linkify(markdown(self.get_payload(), + output_format='html5'))) + rendered_children = render(template, children, message_id) yield template.format(message_id=quote(message_id), - date=self['Date'], - author=author, - parent=parent, - subject=self['Subject'], body=body, + date=date, author=author, body=body, children='\n'.join(rendered_children)) |