From 0085141cda79fe44aa0d04c8686849542023d99c Mon Sep 17 00:00:00 2001 From: Nguyễn Gia Phong Date: Wed, 29 Dec 2021 21:09:08 +0700 Subject: Polish and document --- src/formbox.py | 56 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/formbox.py b/src/formbox.py index 39e6132..16c0221 100644 --- a/src/formbox.py +++ b/src/formbox.py @@ -19,10 +19,9 @@ from argparse import ArgumentParser from email.header import decode_header from email.utils import parsedate_to_datetime from functools import partial -from itertools import starmap from mailbox import mbox from pathlib import Path -from urllib.parse import quote +from urllib.parse import quote, unquote, urlencode from bleach import clean, linkify from markdown import markdown @@ -34,6 +33,7 @@ sanitise = partial(clean, tags=('a', 'code', 'em', 'strong', 'sub', 'sup', def extract(archive, parent): + """Recursively extract emails in reply to given message ID.""" for message_id, message in archive.copy().items(): # TODO: handle multipart if message['In-Reply-To'] != parent: continue @@ -42,30 +42,58 @@ def extract(archive, parent): def decode(header): + """Return the decoded email header.""" for string, charset in decode_header(header): encoding = 'utf-8' if charset is None else charset yield string.decode(encoding) +def reply_to(message): + """Return mailto parameters for replying to the given email.""" + yield 'In-Reply-To', message['Message-ID'] + yield 'Cc', message.get('Reply-To', message['From']) + subject = message['Subject'] + if subject is None: return + if subject.lower().startswith('re:'): + yield 'Subject', subject + else: + yield 'Subject', f'Re: {subject}' + + +def date(message): + """Parse given email's Date header.""" + return parsedate_to_datetime(message['Date']).date() + + def render(template, forest, parent): + """Render the thread recursively based on given template.""" for self, children in forest: message_id = self['Message-Id'] - date = parsedate_to_datetime(self['Date']).date().isoformat() - author, address = decode(self['From']) + try: + author, address = decode(self['From']) + except ValueError: + author = self['From'] body = sanitise(linkify(markdown(self.get_payload(), output_format='html5'))) rendered_children = render(template, children, message_id) yield template.format(message_id=quote(message_id), - date=date, author=author, body=body, - children='\n'.join(rendered_children)) + mailto_params=urlencode(dict(reply_to(self))), + date=date(self).isoformat(), author=author, + body=body, children='\n'.join(rendered_children)) + + +def main(): + """Parse command-line arguments and pass them to routines.""" + parser = ArgumentParser(description='format mbox as HTML/XML') + parser.add_argument('mbox', help='path to mbox file') + parser.add_argument('id', type=unquote, help='root message ID') + parser.add_argument('template', type=Path, help='path to template') + args = parser.parse_args() + archive = {m['Message-Id']: m for m in sorted(mbox(args.mbox), key=date)} + template = args.template.read_text() + print(*render(template, extract(archive, args.id), args.id), + sep='', end='') -parser = ArgumentParser() -parser.add_argument('mbox') -parser.add_argument('id') -parser.add_argument('template', type=Path) -args = parser.parse_args() -archive = {m['Message-Id']: m for m in mbox(args.mbox)} -template = args.template.read_text() -print(*render(template, extract(archive, args.id), args.id), sep='', end='') +if __name__ == '__main__': main() -- cgit 1.4.1