diff options
Diffstat (limited to 'formbox.py')
-rw-r--r-- | formbox.py | 73 |
1 files changed, 73 insertions, 0 deletions
diff --git a/formbox.py b/formbox.py new file mode 100644 index 0000000..cdb0c76 --- /dev/null +++ b/formbox.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python +# Format mbox as HTML/XML +# Copyright (C) 2021 Nguyễn Gia Phong +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. + +from argparse import ArgumentParser +from email.header import decode_header +from itertools import starmap +from mailbox import mbox +from pathlib import Path +from urllib.parse import quote + +from bleach import clean, linkify +from markdown import markdown + +markdown_tags = ["h1", "h2", "h3", "h4", "h5", "h6", + "b", "i", "strong", "em", "tt", + "p", "br", + "blockquote", "code", "pre", "hr", + "ul", "ol", "li", "dd", "dt", + "a", + "sub", "sup"] + + +def extract(archive, parent): + for message_id, message in archive.copy().items(): + # TODO: handle multipart + if message['In-Reply-To'] != parent: continue + archive.pop(message_id) + yield message, extract(archive, message_id) + + +def decode(header): + for string, charset in decode_header(header): + encoding = 'utf-8' if charset is None else charset + yield string.decode(encoding) + + +def render(template, forest, parent): + for self, children in forest: + message_id = self['Message-Id'] + body = clean(linkify(markdown(self.get_payload())), tags=markdown_tags) + rendered_children = render(template, children, message_id) + author, address = decode(self['From']) + yield template.format(message_id=quote(message_id), + date=self['Date'], + author=author, + parent=parent, + subject=self['Subject'], body=body, + children='\n'.join(rendered_children)) + + +parser = ArgumentParser() +parser.add_argument('mbox') +parser.add_argument('id') +parser.add_argument('template', type=Path) +args = parser.parse_args() + +archive = {m['Message-Id']: m for m in mbox(args.mbox)} +template = args.template.read_text() +print(*render(template, extract(archive, args.id), args.id), sep='', end='') |