summary refs log tree commit diff
path: root/src/formbox.py
blob: cdb0c765f168462c4a9b948f76dec590a229a8eb (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env python
# Format mbox as HTML/XML
# Copyright (C) 2021  Nguyễn Gia Phong
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

from argparse import ArgumentParser
from email.header import decode_header
from itertools import starmap
from mailbox import mbox
from pathlib import Path
from urllib.parse import quote

from bleach import clean, linkify
from markdown import markdown

markdown_tags = ["h1", "h2", "h3", "h4", "h5", "h6",
                 "b", "i", "strong", "em", "tt",
                 "p", "br",
                 "blockquote", "code", "pre", "hr",
                 "ul", "ol", "li", "dd", "dt",
                 "a",
                 "sub", "sup"]


def extract(archive, parent):
    for message_id, message in archive.copy().items():
        # TODO: handle multipart
        if message['In-Reply-To'] != parent: continue
        archive.pop(message_id)
        yield message, extract(archive, message_id)


def decode(header):
    for string, charset in decode_header(header):
        encoding = 'utf-8' if charset is None else charset
        yield string.decode(encoding)


def render(template, forest, parent):
    for self, children in forest:
        message_id = self['Message-Id']
        body = clean(linkify(markdown(self.get_payload())), tags=markdown_tags)
        rendered_children = render(template, children, message_id)
        author, address = decode(self['From'])
        yield template.format(message_id=quote(message_id),
                              date=self['Date'],
                              author=author,
                              parent=parent,
                              subject=self['Subject'], body=body,
                              children='\n'.join(rendered_children))


parser = ArgumentParser()
parser.add_argument('mbox')
parser.add_argument('id')
parser.add_argument('template', type=Path)
args = parser.parse_args()

archive = {m['Message-Id']: m for m in mbox(args.mbox)}
template = args.template.read_text()
print(*render(template, extract(archive, args.id), args.id), sep='', end='')