about summary refs log tree commit diff homepage
path: root/_libs/formbox/format
blob: bad40dba52701bef610cbb805974a7c013424366 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python
# Format mbox as HTML/XML
# Copyright (C) 2021  Nguyễn Gia Phong
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

from argparse import ArgumentParser
from email.header import decode_header
from email.utils import parsedate_to_datetime
from functools import partial
from itertools import starmap
from mailbox import mbox
from pathlib import Path
from urllib.parse import quote, unquote

from bleach import clean, linkify
from markdown import markdown

sanitise = partial(clean, tags=('a', 'code', 'em', 'strong', 'sub', 'sup',
                                'blockquote', 'p', 'pre', 'ul', 'ol', 'li'),
                   protocols=('ftp', 'gemini', 'gopher', 'http', 'https',
                              'irc', 'ircs', 'mailto', 'matrix', 'xmpp'))


def extract(archive, parent):
    """Recursively extract emails in reply to given message ID."""
    for message_id, message in archive.copy().items():
        # TODO: handle multipart
        if message['In-Reply-To'] != parent: continue
        archive.pop(message_id)
        yield message, extract(archive, message_id)


def decode(header):
    """Return the decoded email header."""
    for string, charset in decode_header(header):
        encoding = 'utf-8' if charset is None else charset
        yield string.decode(encoding)


def render(template, forest, parent):
    """Render the thread recursively based on given template."""
    for self, children in forest:
        message_id = self['Message-Id']
        date = parsedate_to_datetime(self['Date']).date().isoformat()
        author, address = decode(self['From'])
        body = sanitise(linkify(markdown(self.get_payload(),
                                         output_format='html5')))
        rendered_children = render(template, children, message_id)
        yield template.format(message_id=quote(message_id),
                              date=date, author=author, body=body,
                              children='\n'.join(rendered_children))


if __name__ == '__main__':
    parser = ArgumentParser()
    parser.add_argument('mbox')
    parser.add_argument('id', type=unquote)
    parser.add_argument('template', type=Path)
    args = parser.parse_args()

    archive = {m['Message-Id']: m for m in mbox(args.mbox)}
    template = args.template.read_text()
    print(*render(template, extract(archive, args.id), args.id),
          sep='', end='')