diff options
author | Nguyễn Gia Phong <mcsinyx@disroot.org> | 2020-05-24 16:34:31 +0700 |
---|---|---|
committer | Nguyễn Gia Phong <mcsinyx@disroot.org> | 2020-05-24 16:34:31 +0700 |
commit | b2d80610db6beda38573890ed169815e495bc663 (patch) | |
tree | 176e1bca6fe644c619d53cf1c24682c244b79cf6 /usth/ICT2.7/fuzzy-find | |
parent | 49376ab97c7427f1c1eca64072d1a934c2e52f50 (diff) | |
download | cp-b2d80610db6beda38573890ed169815e495bc663.tar.gz |
[usth/ICT2.7] Engineer software
Diffstat (limited to 'usth/ICT2.7/fuzzy-find')
-rwxr-xr-x | usth/ICT2.7/fuzzy-find | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/usth/ICT2.7/fuzzy-find b/usth/ICT2.7/fuzzy-find new file mode 100755 index 0000000..70f14de --- /dev/null +++ b/usth/ICT2.7/fuzzy-find @@ -0,0 +1,26 @@ +#!/usr/bin/env python3.8 +from glob import glob +from itertools import islice +from os import path +from textwrap import wrap + +from fuzzywuzzy import fuzz, process + + +transcripts = {} +for filename in glob(path.join('*', '*')): + with open(filename) as f: + subtitles = ' '.join(' '.join(islice(f, 2, None, 4)).split()) + transcripts[filename] = subtitles.replace('.', '. ').replace('?', '? ') + +while query := input('>>> '): + bests = process.extractBests(query, transcripts, scorer=fuzz.partial_ratio) + for index, (transcript, score, filename) in enumerate(bests): + print(index, filename) + while index := input('... '): + try: + chosen = bests[int(index)] + except (IndexError, ValueError): + pass + else: + print(chosen[2], *wrap(chosen[0], 80), sep='\n') |