From b886e5f8a11ce4a4f82c0b79453ac42c171599a3 Mon Sep 17 00:00:00 2001 From: Nguyễn Gia Phong Date: Mon, 20 Dec 2021 14:47:58 +0700 Subject: Briefly describe the program --- .gitignore | 1 + README.md | 22 ++++++++++++++++++ formbox.py | 73 ---------------------------------------------------------- pyproject.toml | 2 +- src/formbox.py | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 97 insertions(+), 74 deletions(-) create mode 100644 .gitignore create mode 100644 README.md delete mode 100644 formbox.py create mode 100644 src/formbox.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..849ddff --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +dist/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..77fe625 --- /dev/null +++ b/README.md @@ -0,0 +1,22 @@ +# formbox + +This tiny script formats an [mbox] as HTML or XML. It is intended +for rendering email replies on websites and their [RSS] feed. + +## Prerequisites + +This Python package depends on [bleach] for HTML sanitising +and [markdown] for, well, rendering Markdown to HTML. It is, however, +not designed to work with HTML emails with all those CSS and Java scripts. + +## Copying + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU Affero General Public License as published +by the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +[mbox]: https://en.wikipedia.org/wiki/Mbox +[RSS]: https://www.rssboard.org +[bleach]: https://bleach.readthedocs.io +[markdown]: https://python-markdown.github.io diff --git a/formbox.py b/formbox.py deleted file mode 100644 index cdb0c76..0000000 --- a/formbox.py +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/env python -# Format mbox as HTML/XML -# Copyright (C) 2021 Nguyễn Gia Phong -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -from argparse import ArgumentParser -from email.header import decode_header -from itertools import starmap -from mailbox import mbox -from pathlib import Path -from urllib.parse import quote - -from bleach import clean, linkify -from markdown import markdown - -markdown_tags = ["h1", "h2", "h3", "h4", "h5", "h6", - "b", "i", "strong", "em", "tt", - "p", "br", - "blockquote", "code", "pre", "hr", - "ul", "ol", "li", "dd", "dt", - "a", - "sub", "sup"] - - -def extract(archive, parent): - for message_id, message in archive.copy().items(): - # TODO: handle multipart - if message['In-Reply-To'] != parent: continue - archive.pop(message_id) - yield message, extract(archive, message_id) - - -def decode(header): - for string, charset in decode_header(header): - encoding = 'utf-8' if charset is None else charset - yield string.decode(encoding) - - -def render(template, forest, parent): - for self, children in forest: - message_id = self['Message-Id'] - body = clean(linkify(markdown(self.get_payload())), tags=markdown_tags) - rendered_children = render(template, children, message_id) - author, address = decode(self['From']) - yield template.format(message_id=quote(message_id), - date=self['Date'], - author=author, - parent=parent, - subject=self['Subject'], body=body, - children='\n'.join(rendered_children)) - - -parser = ArgumentParser() -parser.add_argument('mbox') -parser.add_argument('id') -parser.add_argument('template', type=Path) -args = parser.parse_args() - -archive = {m['Message-Id']: m for m in mbox(args.mbox)} -template = args.template.read_text() -print(*render(template, extract(archive, args.id), args.id), sep='', end='') diff --git a/pyproject.toml b/pyproject.toml index 9256938..6faa001 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ authors = [ { name = "Nguyễn Gia Phong", email = "mcsinyx@disroot.org" } ] maintainers = [ { name = "Nguyễn Gia Phong", email = "mcsinyx@disroot.org" } ] keywords = [ "email", "format", "html", "mbox", "template", "xml" ] classifiers = [ - "Development Status :: 5 - Production/Stable", + "Development Status :: 3 - Alpha", "License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)", "Operating System :: OS Independent", "Programming Language :: Python", diff --git a/src/formbox.py b/src/formbox.py new file mode 100644 index 0000000..cdb0c76 --- /dev/null +++ b/src/formbox.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python +# Format mbox as HTML/XML +# Copyright (C) 2021 Nguyễn Gia Phong +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +from argparse import ArgumentParser +from email.header import decode_header +from itertools import starmap +from mailbox import mbox +from pathlib import Path +from urllib.parse import quote + +from bleach import clean, linkify +from markdown import markdown + +markdown_tags = ["h1", "h2", "h3", "h4", "h5", "h6", + "b", "i", "strong", "em", "tt", + "p", "br", + "blockquote", "code", "pre", "hr", + "ul", "ol", "li", "dd", "dt", + "a", + "sub", "sup"] + + +def extract(archive, parent): + for message_id, message in archive.copy().items(): + # TODO: handle multipart + if message['In-Reply-To'] != parent: continue + archive.pop(message_id) + yield message, extract(archive, message_id) + + +def decode(header): + for string, charset in decode_header(header): + encoding = 'utf-8' if charset is None else charset + yield string.decode(encoding) + + +def render(template, forest, parent): + for self, children in forest: + message_id = self['Message-Id'] + body = clean(linkify(markdown(self.get_payload())), tags=markdown_tags) + rendered_children = render(template, children, message_id) + author, address = decode(self['From']) + yield template.format(message_id=quote(message_id), + date=self['Date'], + author=author, + parent=parent, + subject=self['Subject'], body=body, + children='\n'.join(rendered_children)) + + +parser = ArgumentParser() +parser.add_argument('mbox') +parser.add_argument('id') +parser.add_argument('template', type=Path) +args = parser.parse_args() + +archive = {m['Message-Id']: m for m in mbox(args.mbox)} +template = args.template.read_text() +print(*render(template, extract(archive, args.id), args.id), sep='', end='') -- cgit 1.4.1