From 0085141cda79fe44aa0d04c8686849542023d99c Mon Sep 17 00:00:00 2001 From: Nguyễn Gia Phong Date: Wed, 29 Dec 2021 21:09:08 +0700 Subject: Polish and document --- README.md | 17 +++++++++++++++++ pyproject.toml | 7 ++++--- src/formbox.py | 56 ++++++++++++++++++++++++++++++++++++++++++-------------- 3 files changed, 63 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 77fe625..0638e19 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,23 @@ This Python package depends on [bleach] for HTML sanitising and [markdown] for, well, rendering Markdown to HTML. It is, however, not designed to work with HTML emails with all those CSS and Java scripts. +## Usage + +```console +$ formbox --help +usage: formbox [-h] mbox id template + +format mbox as HTML/XML + +positional arguments: + mbox path to mbox file + id root message ID + template path to template + +optional arguments: + -h, --help show this help message and exit +``` + ## Copying This program is free software: you can redistribute it and/or modify diff --git a/pyproject.toml b/pyproject.toml index 6faa001..2cdba01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "flit_core.buildapi" [project] name = "formbox" -version = "0.0.1" +version = "0.1.0" description = "Format mbox as HTML/XML" readme = "README.md" requires-python = ">=3.7" @@ -13,10 +13,11 @@ authors = [ { name = "Nguyễn Gia Phong", email = "mcsinyx@disroot.org" } ] maintainers = [ { name = "Nguyễn Gia Phong", email = "mcsinyx@disroot.org" } ] keywords = [ "email", "format", "html", "mbox", "template", "xml" ] classifiers = [ - "Development Status :: 3 - Alpha", + "Development Status :: 4 - Beta", "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)", "Operating System :: OS Independent", "Programming Language :: Python", "Topic :: Utilities" ] dependencies = [ "bleach", "markdown" ] -urls = { Git = "https://git.sr.ht/~cnx/formbox" } +urls = { SourceHut = "https://sr.ht/~cnx/formbox" } +scripts = { formbox = "formbox:main" } diff --git a/src/formbox.py b/src/formbox.py index 39e6132..16c0221 100644 --- a/src/formbox.py +++ b/src/formbox.py @@ -19,10 +19,9 @@ from argparse import ArgumentParser from email.header import decode_header from email.utils import parsedate_to_datetime from functools import partial -from itertools import starmap from mailbox import mbox from pathlib import Path -from urllib.parse import quote +from urllib.parse import quote, unquote, urlencode from bleach import clean, linkify from markdown import markdown @@ -34,6 +33,7 @@ sanitise = partial(clean, tags=('a', 'code', 'em', 'strong', 'sub', 'sup', def extract(archive, parent): + """Recursively extract emails in reply to given message ID.""" for message_id, message in archive.copy().items(): # TODO: handle multipart if message['In-Reply-To'] != parent: continue @@ -42,30 +42,58 @@ def extract(archive, parent): def decode(header): + """Return the decoded email header.""" for string, charset in decode_header(header): encoding = 'utf-8' if charset is None else charset yield string.decode(encoding) +def reply_to(message): + """Return mailto parameters for replying to the given email.""" + yield 'In-Reply-To', message['Message-ID'] + yield 'Cc', message.get('Reply-To', message['From']) + subject = message['Subject'] + if subject is None: return + if subject.lower().startswith('re:'): + yield 'Subject', subject + else: + yield 'Subject', f'Re: {subject}' + + +def date(message): + """Parse given email's Date header.""" + return parsedate_to_datetime(message['Date']).date() + + def render(template, forest, parent): + """Render the thread recursively based on given template.""" for self, children in forest: message_id = self['Message-Id'] - date = parsedate_to_datetime(self['Date']).date().isoformat() - author, address = decode(self['From']) + try: + author, address = decode(self['From']) + except ValueError: + author = self['From'] body = sanitise(linkify(markdown(self.get_payload(), output_format='html5'))) rendered_children = render(template, children, message_id) yield template.format(message_id=quote(message_id), - date=date, author=author, body=body, - children='\n'.join(rendered_children)) + mailto_params=urlencode(dict(reply_to(self))), + date=date(self).isoformat(), author=author, + body=body, children='\n'.join(rendered_children)) + + +def main(): + """Parse command-line arguments and pass them to routines.""" + parser = ArgumentParser(description='format mbox as HTML/XML') + parser.add_argument('mbox', help='path to mbox file') + parser.add_argument('id', type=unquote, help='root message ID') + parser.add_argument('template', type=Path, help='path to template') + args = parser.parse_args() + archive = {m['Message-Id']: m for m in sorted(mbox(args.mbox), key=date)} + template = args.template.read_text() + print(*render(template, extract(archive, args.id), args.id), + sep='', end='') -parser = ArgumentParser() -parser.add_argument('mbox') -parser.add_argument('id') -parser.add_argument('template', type=Path) -args = parser.parse_args() -archive = {m['Message-Id']: m for m in mbox(args.mbox)} -template = args.template.read_text() -print(*render(template, extract(archive, args.id), args.id), sep='', end='') +if __name__ == '__main__': main() -- cgit 1.4.1