diff options
Diffstat (limited to 'usth/ICT2.7/fuzzy-find')
-rwxr-xr-x | usth/ICT2.7/fuzzy-find | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/usth/ICT2.7/fuzzy-find b/usth/ICT2.7/fuzzy-find new file mode 100755 index 0000000..70f14de --- /dev/null +++ b/usth/ICT2.7/fuzzy-find @@ -0,0 +1,26 @@ +#!/usr/bin/env python3.8 +from glob import glob +from itertools import islice +from os import path +from textwrap import wrap + +from fuzzywuzzy import fuzz, process + + +transcripts = {} +for filename in glob(path.join('*', '*')): + with open(filename) as f: + subtitles = ' '.join(' '.join(islice(f, 2, None, 4)).split()) + transcripts[filename] = subtitles.replace('.', '. ').replace('?', '? ') + +while query := input('>>> '): + bests = process.extractBests(query, transcripts, scorer=fuzz.partial_ratio) + for index, (transcript, score, filename) in enumerate(bests): + print(index, filename) + while index := input('... '): + try: + chosen = bests[int(index)] + except (IndexError, ValueError): + pass + else: + print(chosen[2], *wrap(chosen[0], 80), sep='\n') |