about summary refs log tree commit diff
path: root/formbox.py
diff options
context:
space:
mode:
Diffstat (limited to 'formbox.py')
-rw-r--r--formbox.py73
1 files changed, 73 insertions, 0 deletions
diff --git a/formbox.py b/formbox.py
new file mode 100644
index 0000000..cdb0c76
--- /dev/null
+++ b/formbox.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python
+# Format mbox as HTML/XML
+# Copyright (C) 2021  Nguyễn Gia Phong
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+from argparse import ArgumentParser
+from email.header import decode_header
+from itertools import starmap
+from mailbox import mbox
+from pathlib import Path
+from urllib.parse import quote
+
+from bleach import clean, linkify
+from markdown import markdown
+
+markdown_tags = ["h1", "h2", "h3", "h4", "h5", "h6",
+                 "b", "i", "strong", "em", "tt",
+                 "p", "br",
+                 "blockquote", "code", "pre", "hr",
+                 "ul", "ol", "li", "dd", "dt",
+                 "a",
+                 "sub", "sup"]
+
+
+def extract(archive, parent):
+    for message_id, message in archive.copy().items():
+        # TODO: handle multipart
+        if message['In-Reply-To'] != parent: continue
+        archive.pop(message_id)
+        yield message, extract(archive, message_id)
+
+
+def decode(header):
+    for string, charset in decode_header(header):
+        encoding = 'utf-8' if charset is None else charset
+        yield string.decode(encoding)
+
+
+def render(template, forest, parent):
+    for self, children in forest:
+        message_id = self['Message-Id']
+        body = clean(linkify(markdown(self.get_payload())), tags=markdown_tags)
+        rendered_children = render(template, children, message_id)
+        author, address = decode(self['From'])
+        yield template.format(message_id=quote(message_id),
+                              date=self['Date'],
+                              author=author,
+                              parent=parent,
+                              subject=self['Subject'], body=body,
+                              children='\n'.join(rendered_children))
+
+
+parser = ArgumentParser()
+parser.add_argument('mbox')
+parser.add_argument('id')
+parser.add_argument('template', type=Path)
+args = parser.parse_args()
+
+archive = {m['Message-Id']: m for m in mbox(args.mbox)}
+template = args.template.read_text()
+print(*render(template, extract(archive, args.id), args.id), sep='', end='')