about summary refs log tree commit diff
path: root/usth/ICT2.7/fuzzy-find
diff options
context:
space:
mode:
authorNguyễn Gia Phong <mcsinyx@disroot.org>2020-05-24 16:34:31 +0700
committerNguyễn Gia Phong <mcsinyx@disroot.org>2020-05-24 16:34:31 +0700
commitb2d80610db6beda38573890ed169815e495bc663 (patch)
tree176e1bca6fe644c619d53cf1c24682c244b79cf6 /usth/ICT2.7/fuzzy-find
parent49376ab97c7427f1c1eca64072d1a934c2e52f50 (diff)
downloadcp-b2d80610db6beda38573890ed169815e495bc663.tar.gz
[usth/ICT2.7] Engineer software
Diffstat (limited to 'usth/ICT2.7/fuzzy-find')
-rwxr-xr-xusth/ICT2.7/fuzzy-find26
1 files changed, 26 insertions, 0 deletions
diff --git a/usth/ICT2.7/fuzzy-find b/usth/ICT2.7/fuzzy-find
new file mode 100755
index 0000000..70f14de
--- /dev/null
+++ b/usth/ICT2.7/fuzzy-find
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3.8
+from glob import glob
+from itertools import islice
+from os import path
+from textwrap import wrap
+
+from fuzzywuzzy import fuzz, process
+
+
+transcripts = {}
+for filename in glob(path.join('*', '*')):
+    with open(filename) as f:
+        subtitles = ' '.join(' '.join(islice(f, 2, None, 4)).split())
+        transcripts[filename] = subtitles.replace('.', '. ').replace('?', '? ')
+
+while query := input('>>> '):
+    bests = process.extractBests(query, transcripts, scorer=fuzz.partial_ratio)
+    for index, (transcript, score, filename) in enumerate(bests):
+        print(index, filename)
+    while index := input('... '):
+        try:
+            chosen = bests[int(index)]
+        except (IndexError, ValueError):
+            pass
+        else:
+            print(chosen[2], *wrap(chosen[0], 80), sep='\n')