#!/usr/bin/env python # Format mbox as HTML/XML # Copyright (C) 2021 Nguyễn Gia Phong # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . from argparse import ArgumentParser from email.header import decode_header from itertools import starmap from mailbox import mbox from pathlib import Path from urllib.parse import quote from bleach import clean, linkify from markdown import markdown markdown_tags = ["h1", "h2", "h3", "h4", "h5", "h6", "b", "i", "strong", "em", "tt", "p", "br", "blockquote", "code", "pre", "hr", "ul", "ol", "li", "dd", "dt", "a", "sub", "sup"] def extract(archive, parent): for message_id, message in archive.copy().items(): # TODO: handle multipart if message['In-Reply-To'] != parent: continue archive.pop(message_id) yield message, extract(archive, message_id) def decode(header): for string, charset in decode_header(header): encoding = 'utf-8' if charset is None else charset yield string.decode(encoding) def render(template, forest, parent): for self, children in forest: message_id = self['Message-Id'] body = clean(linkify(markdown(self.get_payload())), tags=markdown_tags) rendered_children = render(template, children, message_id) author, address = decode(self['From']) yield template.format(message_id=quote(message_id), date=self['Date'], author=author, parent=parent, subject=self['Subject'], body=body, children='\n'.join(rendered_children)) parser = ArgumentParser() parser.add_argument('mbox') parser.add_argument('id') parser.add_argument('template', type=Path) args = parser.parse_args() archive = {m['Message-Id']: m for m in mbox(args.mbox)} template = args.template.read_text() print(*render(template, extract(archive, args.id), args.id), sep='', end='')