summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--src/formbox.py24
1 files changed, 11 insertions, 13 deletions
diff --git a/src/formbox.py b/src/formbox.py
index cdb0c76..39e6132 100644
--- a/src/formbox.py
+++ b/src/formbox.py
@@ -17,6 +17,8 @@
 
 from argparse import ArgumentParser
 from email.header import decode_header
+from email.utils import parsedate_to_datetime
+from functools import partial
 from itertools import starmap
 from mailbox import mbox
 from pathlib import Path
@@ -25,13 +27,10 @@ from urllib.parse import quote
 from bleach import clean, linkify
 from markdown import markdown
 
-markdown_tags = ["h1", "h2", "h3", "h4", "h5", "h6",
-                 "b", "i", "strong", "em", "tt",
-                 "p", "br",
-                 "blockquote", "code", "pre", "hr",
-                 "ul", "ol", "li", "dd", "dt",
-                 "a",
-                 "sub", "sup"]
+sanitise = partial(clean, tags=('a', 'code', 'em', 'strong', 'sub', 'sup',
+                                'blockquote', 'p', 'pre', 'ul', 'ol', 'li'),
+                   protocols=('ftp', 'gemini', 'gopher', 'http', 'https',
+                              'irc', 'ircs', 'mailto', 'matrix', 'xmpp'))
 
 
 def extract(archive, parent):
@@ -51,14 +50,13 @@ def decode(header):
 def render(template, forest, parent):
     for self, children in forest:
         message_id = self['Message-Id']
-        body = clean(linkify(markdown(self.get_payload())), tags=markdown_tags)
-        rendered_children = render(template, children, message_id)
+        date = parsedate_to_datetime(self['Date']).date().isoformat()
         author, address = decode(self['From'])
+        body = sanitise(linkify(markdown(self.get_payload(),
+                                         output_format='html5')))
+        rendered_children = render(template, children, message_id)
         yield template.format(message_id=quote(message_id),
-                              date=self['Date'],
-                              author=author,
-                              parent=parent,
-                              subject=self['Subject'], body=body,
+                              date=date, author=author, body=body,
                               children='\n'.join(rendered_children))