about summary refs log tree commit diff
path: root/usth/ICT2.7/fuzzy-find
blob: 70f14ded422087f728eb6baa4911fefb8580e029 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
#!/usr/bin/env python3.8
from glob import glob
from itertools import islice
from os import path
from textwrap import wrap

from fuzzywuzzy import fuzz, process


transcripts = {}
for filename in glob(path.join('*', '*')):
    with open(filename) as f:
        subtitles = ' '.join(' '.join(islice(f, 2, None, 4)).split())
        transcripts[filename] = subtitles.replace('.', '. ').replace('?', '? ')

while query := input('>>> '):
    bests = process.extractBests(query, transcripts, scorer=fuzz.partial_ratio)
    for index, (transcript, score, filename) in enumerate(bests):
        print(index, filename)
    while index := input('... '):
        try:
            chosen = bests[int(index)]
        except (IndexError, ValueError):
            pass
        else:
            print(chosen[2], *wrap(chosen[0], 80), sep='\n')