From b2d80610db6beda38573890ed169815e495bc663 Mon Sep 17 00:00:00 2001 From: Nguyễn Gia Phong Date: Sun, 24 May 2020 16:34:31 +0700 Subject: [usth/ICT2.7] Engineer software --- usth/ICT2.7/fuzzy-find | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100755 usth/ICT2.7/fuzzy-find (limited to 'usth/ICT2.7/fuzzy-find') diff --git a/usth/ICT2.7/fuzzy-find b/usth/ICT2.7/fuzzy-find new file mode 100755 index 0000000..70f14de --- /dev/null +++ b/usth/ICT2.7/fuzzy-find @@ -0,0 +1,26 @@ +#!/usr/bin/env python3.8 +from glob import glob +from itertools import islice +from os import path +from textwrap import wrap + +from fuzzywuzzy import fuzz, process + + +transcripts = {} +for filename in glob(path.join('*', '*')): + with open(filename) as f: + subtitles = ' '.join(' '.join(islice(f, 2, None, 4)).split()) + transcripts[filename] = subtitles.replace('.', '. ').replace('?', '? ') + +while query := input('>>> '): + bests = process.extractBests(query, transcripts, scorer=fuzz.partial_ratio) + for index, (transcript, score, filename) in enumerate(bests): + print(index, filename) + while index := input('... '): + try: + chosen = bests[int(index)] + except (IndexError, ValueError): + pass + else: + print(chosen[2], *wrap(chosen[0], 80), sep='\n') -- cgit 1.4.1