From aa9f43dea1beae7b2f5ff3ad075f3d329c39aeff Mon Sep 17 00:00:00 2001 From: Fabian Dill Date: Mon, 9 May 2022 17:03:16 +0200 Subject: [PATCH] Fuzzy: switch to damerau_levenshtein_distance with ignored case --- MultiServer.py | 2 +- Options.py | 4 ++-- Utils.py | 24 +++++++++++++++++++----- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/MultiServer.py b/MultiServer.py index c58083ae..a11a2649 100644 --- a/MultiServer.py +++ b/MultiServer.py @@ -905,7 +905,7 @@ def json_format_send_event(net_item: NetworkItem, receiving_player: int): def get_intended_text(input_text: str, possible_answers) -> typing.Tuple[str, bool, str]: - picks = Utils.get_fuzzy_results(input_text, possible_answers) + picks = Utils.get_fuzzy_results(input_text, possible_answers, limit=2) if len(picks) > 1: dif = picks[0][1] - picks[1][1] if picks[0][1] == 100: diff --git a/Options.py b/Options.py index 042224d1..b18d74a6 100644 --- a/Options.py +++ b/Options.py @@ -457,14 +457,14 @@ class VerifyKeys: if self.verify_item_name: for item_name in self.value: if item_name not in world.item_names: - picks = get_fuzzy_results(item_name, world.item_names) + picks = get_fuzzy_results(item_name, world.item_names, limit=1) raise Exception(f"Item {item_name} from option {self} " f"is not a valid item name from {world.game}. " f"Did you mean '{picks[0][0]}' ({picks[0][1]}% sure)") elif self.verify_location_name: for location_name in self.value: if location_name not in world.location_names: - picks = get_fuzzy_results(location_name, world.location_names) + picks = get_fuzzy_results(location_name, world.location_names, limit=1) raise Exception(f"Location {location_name} from option {self} " f"is not a valid location name from {world.game}. " f"Did you mean '{picks[0][0]}' ({picks[0][1]}% sure)") diff --git a/Utils.py b/Utils.py index fed6f4e3..5cfe673f 100644 --- a/Utils.py +++ b/Utils.py @@ -495,8 +495,22 @@ def format_SI_prefix(value, power=1000, power_labels=('', 'k', 'M', 'G', 'T', "P return f"{value:0.3f} {power_labels[n]}" -def get_fuzzy_results(input_word: str, wordlist: typing.Sequence[str]) -> typing.List[typing.Tuple[str, int]]: - return sorted( - map(lambda candidate: (candidate, int(100*jellyfish.jaro_winkler_similarity(input_word, candidate))), wordlist), - key=lambda element: element[1], - reverse=True) +def get_fuzzy_ratio(word1: str, word2: str) -> float: + return (1 - jellyfish.damerau_levenshtein_distance(word1.lower(), word2.lower()) + / max(len(word1), len(word2))) + + +def get_fuzzy_results(input_word: str, wordlist: typing.Sequence[str], limit: typing.Optional[int] = None) \ + -> typing.List[typing.Tuple[str, int]]: + limit: int = limit if limit else len(wordlist) + return list( + map( + lambda container: (container[0], int(container[1]*100)), # convert up to limit to int % + sorted( + map(lambda candidate: + (candidate, get_fuzzy_ratio(input_word, candidate)), + wordlist), + key=lambda element: element[1], + reverse=True)[0:limit] + ) + )