#!/usr/bin/env python
# Format mbox as HTML/XML
# Copyright (C) 2021 Nguyễn Gia Phong
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see .
from argparse import ArgumentParser
from email.header import decode_header
from itertools import starmap
from mailbox import mbox
from pathlib import Path
from urllib.parse import quote
from bleach import clean, linkify
from markdown import markdown
markdown_tags = ["h1", "h2", "h3", "h4", "h5", "h6",
"b", "i", "strong", "em", "tt",
"p", "br",
"blockquote", "code", "pre", "hr",
"ul", "ol", "li", "dd", "dt",
"a",
"sub", "sup"]
def extract(archive, parent):
for message_id, message in archive.copy().items():
# TODO: handle multipart
if message['In-Reply-To'] != parent: continue
archive.pop(message_id)
yield message, extract(archive, message_id)
def decode(header):
for string, charset in decode_header(header):
encoding = 'utf-8' if charset is None else charset
yield string.decode(encoding)
def render(template, forest, parent):
for self, children in forest:
message_id = self['Message-Id']
body = clean(linkify(markdown(self.get_payload())), tags=markdown_tags)
rendered_children = render(template, children, message_id)
author, address = decode(self['From'])
yield template.format(message_id=quote(message_id),
date=self['Date'],
author=author,
parent=parent,
subject=self['Subject'], body=body,
children='\n'.join(rendered_children))
parser = ArgumentParser()
parser.add_argument('mbox')
parser.add_argument('id')
parser.add_argument('template', type=Path)
args = parser.parse_args()
archive = {m['Message-Id']: m for m in mbox(args.mbox)}
template = args.template.read_text()
print(*render(template, extract(archive, args.id), args.id), sep='', end='')