From 513ab62ce77d24bad6ae12413968545752618daa Mon Sep 17 00:00:00 2001 From: Fabian Dill Date: Mon, 9 May 2022 07:18:50 +0200 Subject: [PATCH] Fuzzy: replace thefuzz with jellyfish GPL -> BSD2Clause and should be faster though I haven't tested it myself and just trusted people on the internet. Jellyfish also allows us access to many more algorithms should they be any better. Trying out Jaro distance now instead of Levenshtein. --- MultiServer.py | 4 +--- Options.py | 6 +++--- Utils.py | 8 ++++++++ requirements.txt | 4 ++-- 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/MultiServer.py b/MultiServer.py index f61701d2..c58083ae 100644 --- a/MultiServer.py +++ b/MultiServer.py @@ -24,8 +24,6 @@ ModuleUpdate.update() import websockets import colorama -from thefuzz import process as fuzzy_process - import NetUtils from worlds.AutoWorld import AutoWorldRegister @@ -907,7 +905,7 @@ def json_format_send_event(net_item: NetworkItem, receiving_player: int): def get_intended_text(input_text: str, possible_answers) -> typing.Tuple[str, bool, str]: - picks = fuzzy_process.extract(input_text, possible_answers, limit=2) + picks = Utils.get_fuzzy_results(input_text, possible_answers) if len(picks) > 1: dif = picks[0][1] - picks[1][1] if picks[0][1] == 100: diff --git a/Options.py b/Options.py index ea1b7eed..042224d1 100644 --- a/Options.py +++ b/Options.py @@ -6,7 +6,7 @@ import typing import random from schema import Schema, And, Or -from thefuzz import process as fuzzy_process +from Utils import get_fuzzy_results class AssembleOptions(abc.ABCMeta): @@ -457,14 +457,14 @@ class VerifyKeys: if self.verify_item_name: for item_name in self.value: if item_name not in world.item_names: - picks = fuzzy_process.extract(item_name, world.item_names, limit=2) + picks = get_fuzzy_results(item_name, world.item_names) raise Exception(f"Item {item_name} from option {self} " f"is not a valid item name from {world.game}. " f"Did you mean '{picks[0][0]}' ({picks[0][1]}% sure)") elif self.verify_location_name: for location_name in self.value: if location_name not in world.location_names: - picks = fuzzy_process.extract(location_name, world.location_names, limit=2) + picks = get_fuzzy_results(location_name, world.location_names) raise Exception(f"Location {location_name} from option {self} " f"is not a valid location name from {world.game}. " f"Did you mean '{picks[0][0]}' ({picks[0][1]}% sure)") diff --git a/Utils.py b/Utils.py index b353e9a7..fed6f4e3 100644 --- a/Utils.py +++ b/Utils.py @@ -28,6 +28,7 @@ class Version(typing.NamedTuple): __version__ = "0.3.2" version_tuple = tuplize_version(__version__) +import jellyfish from yaml import load, load_all, dump, SafeLoader try: @@ -492,3 +493,10 @@ def format_SI_prefix(value, power=1000, power_labels=('', 'k', 'M', 'G', 'T', "P return f"{value} {power_labels[n]}" else: return f"{value:0.3f} {power_labels[n]}" + + +def get_fuzzy_results(input_word: str, wordlist: typing.Sequence[str]) -> typing.List[typing.Tuple[str, int]]: + return sorted( + map(lambda candidate: (candidate, int(100*jellyfish.jaro_winkler_similarity(input_word, candidate))), wordlist), + key=lambda element: element[1], + reverse=True) diff --git a/requirements.txt b/requirements.txt index 9956a64a..0067d461 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ colorama>=0.4.4 websockets>=10.3 PyYAML>=6.0 -thefuzz[speedup]>=0.19.0 -jinja2>=3.1.1 +jellyfish>=0.9.0 +jinja2>=3.1.2 schema>=0.7.4 kivy>=2.1.0 bsdiff4>=1.2.2 \ No newline at end of file