about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorNguyễn Gia Phong <mcsinyx@disroot.org>2022-01-07 21:58:14 +0700
committerNguyễn Gia Phong <mcsinyx@disroot.org>2022-01-07 21:58:14 +0700
commit9710c6acdf63d866e44e1f77d3bd5c578f329ecb (patch)
tree61e90b136f754298ae329405f730590a940094a0 /src
parent63b6b88c93c91b52e653a0ca1aa4936536ab1d8c (diff)
downloadformbox-9710c6acdf63d866e44e1f77d3bd5c578f329ecb.tar.gz
Improve extraction performance
Yes I'm micro-optimizing, gotta go fast.
Diffstat (limited to 'src')
-rw-r--r--src/formbox.py24
1 files changed, 9 insertions, 15 deletions
diff --git a/src/formbox.py b/src/formbox.py
index 16c0221..84c778a 100644
--- a/src/formbox.py
+++ b/src/formbox.py
@@ -16,6 +16,7 @@
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
 from argparse import ArgumentParser
+from collections import defaultdict
 from email.header import decode_header
 from email.utils import parsedate_to_datetime
 from functools import partial
@@ -32,15 +33,6 @@ sanitise = partial(clean, tags=('a', 'code', 'em', 'strong', 'sub', 'sup',
                               'irc', 'ircs', 'mailto', 'matrix', 'xmpp'))
 
 
-def extract(archive, parent):
-    """Recursively extract emails in reply to given message ID."""
-    for message_id, message in archive.copy().items():
-        # TODO: handle multipart
-        if message['In-Reply-To'] != parent: continue
-        archive.pop(message_id)
-        yield message, extract(archive, message_id)
-
-
 def decode(header):
     """Return the decoded email header."""
     for string, charset in decode_header(header):
@@ -65,17 +57,18 @@ def date(message):
     return parsedate_to_datetime(message['Date']).date()
 
 
-def render(template, forest, parent):
+def render(template, archive, parent):
     """Render the thread recursively based on given template."""
-    for self, children in forest:
+    for self in archive[parent]:
         message_id = self['Message-Id']
         try:
             author, address = decode(self['From'])
         except ValueError:
             author = self['From']
+        # TODO: handle multipart
         body = sanitise(linkify(markdown(self.get_payload(),
                                          output_format='html5')))
-        rendered_children = render(template, children, message_id)
+        rendered_children = render(template, archive, message_id)
         yield template.format(message_id=quote(message_id),
                               mailto_params=urlencode(dict(reply_to(self))),
                               date=date(self).isoformat(), author=author,
@@ -90,10 +83,11 @@ def main():
     parser.add_argument('template', type=Path, help='path to template')
     args = parser.parse_args()
 
-    archive = {m['Message-Id']: m for m in sorted(mbox(args.mbox), key=date)}
+    archive = defaultdict(list)
+    for message in sorted(mbox(args.mbox), key=date):
+        archive[message['In-Reply-To']].append(message)
     template = args.template.read_text()
-    print(*render(template, extract(archive, args.id), args.id),
-          sep='', end='')
+    print(*render(template, archive, args.id), sep='', end='')
 
 
 if __name__ == '__main__': main()