diff options
author | Nguyễn Gia Phong <mcsinyx@disroot.org> | 2022-01-07 21:58:14 +0700 |
---|---|---|
committer | Nguyễn Gia Phong <mcsinyx@disroot.org> | 2022-01-07 21:58:14 +0700 |
commit | 9710c6acdf63d866e44e1f77d3bd5c578f329ecb (patch) | |
tree | 61e90b136f754298ae329405f730590a940094a0 /src/formbox.py | |
parent | 63b6b88c93c91b52e653a0ca1aa4936536ab1d8c (diff) | |
download | formbox-9710c6acdf63d866e44e1f77d3bd5c578f329ecb.tar.gz |
Improve extraction performance
Yes I'm micro-optimizing, gotta go fast.
Diffstat (limited to 'src/formbox.py')
-rw-r--r-- | src/formbox.py | 24 |
1 files changed, 9 insertions, 15 deletions
diff --git a/src/formbox.py b/src/formbox.py index 16c0221..84c778a 100644 --- a/src/formbox.py +++ b/src/formbox.py @@ -16,6 +16,7 @@ # along with this program. If not, see <https://www.gnu.org/licenses/>. from argparse import ArgumentParser +from collections import defaultdict from email.header import decode_header from email.utils import parsedate_to_datetime from functools import partial @@ -32,15 +33,6 @@ sanitise = partial(clean, tags=('a', 'code', 'em', 'strong', 'sub', 'sup', 'irc', 'ircs', 'mailto', 'matrix', 'xmpp')) -def extract(archive, parent): - """Recursively extract emails in reply to given message ID.""" - for message_id, message in archive.copy().items(): - # TODO: handle multipart - if message['In-Reply-To'] != parent: continue - archive.pop(message_id) - yield message, extract(archive, message_id) - - def decode(header): """Return the decoded email header.""" for string, charset in decode_header(header): @@ -65,17 +57,18 @@ def date(message): return parsedate_to_datetime(message['Date']).date() -def render(template, forest, parent): +def render(template, archive, parent): """Render the thread recursively based on given template.""" - for self, children in forest: + for self in archive[parent]: message_id = self['Message-Id'] try: author, address = decode(self['From']) except ValueError: author = self['From'] + # TODO: handle multipart body = sanitise(linkify(markdown(self.get_payload(), output_format='html5'))) - rendered_children = render(template, children, message_id) + rendered_children = render(template, archive, message_id) yield template.format(message_id=quote(message_id), mailto_params=urlencode(dict(reply_to(self))), date=date(self).isoformat(), author=author, @@ -90,10 +83,11 @@ def main(): parser.add_argument('template', type=Path, help='path to template') args = parser.parse_args() - archive = {m['Message-Id']: m for m in sorted(mbox(args.mbox), key=date)} + archive = defaultdict(list) + for message in sorted(mbox(args.mbox), key=date): + archive[message['In-Reply-To']].append(message) template = args.template.read_text() - print(*render(template, extract(archive, args.id), args.id), - sep='', end='') + print(*render(template, archive, args.id), sep='', end='') if __name__ == '__main__': main() |