Fuzzy: replace thefuzz with jellyfish

GPL -> BSD2Clause and should be faster though I haven't tested it myself and just trusted people on the internet.
Jellyfish also allows us access to many more algorithms should they be any better. Trying out Jaro distance now instead of Levenshtein.
This commit is contained in:
Fabian Dill 2022-05-09 07:18:50 +02:00 committed by KonoTyran
parent a020dea277
commit 513ab62ce7
4 changed files with 14 additions and 8 deletions

View File

@ -24,8 +24,6 @@ ModuleUpdate.update()
import websockets
import colorama
from thefuzz import process as fuzzy_process
import NetUtils
from worlds.AutoWorld import AutoWorldRegister
@ -907,7 +905,7 @@ def json_format_send_event(net_item: NetworkItem, receiving_player: int):
def get_intended_text(input_text: str, possible_answers) -> typing.Tuple[str, bool, str]:
picks = fuzzy_process.extract(input_text, possible_answers, limit=2)
picks = Utils.get_fuzzy_results(input_text, possible_answers)
if len(picks) > 1:
dif = picks[0][1] - picks[1][1]
if picks[0][1] == 100:

View File

@ -6,7 +6,7 @@ import typing
import random
from schema import Schema, And, Or
from thefuzz import process as fuzzy_process
from Utils import get_fuzzy_results
class AssembleOptions(abc.ABCMeta):
@ -457,14 +457,14 @@ class VerifyKeys:
if self.verify_item_name:
for item_name in self.value:
if item_name not in world.item_names:
picks = fuzzy_process.extract(item_name, world.item_names, limit=2)
picks = get_fuzzy_results(item_name, world.item_names)
raise Exception(f"Item {item_name} from option {self} "
f"is not a valid item name from {world.game}. "
f"Did you mean '{picks[0][0]}' ({picks[0][1]}% sure)")
elif self.verify_location_name:
for location_name in self.value:
if location_name not in world.location_names:
picks = fuzzy_process.extract(location_name, world.location_names, limit=2)
picks = get_fuzzy_results(location_name, world.location_names)
raise Exception(f"Location {location_name} from option {self} "
f"is not a valid location name from {world.game}. "
f"Did you mean '{picks[0][0]}' ({picks[0][1]}% sure)")

View File

@ -28,6 +28,7 @@ class Version(typing.NamedTuple):
__version__ = "0.3.2"
version_tuple = tuplize_version(__version__)
import jellyfish
from yaml import load, load_all, dump, SafeLoader
try:
@ -492,3 +493,10 @@ def format_SI_prefix(value, power=1000, power_labels=('', 'k', 'M', 'G', 'T', "P
return f"{value} {power_labels[n]}"
else:
return f"{value:0.3f} {power_labels[n]}"
def get_fuzzy_results(input_word: str, wordlist: typing.Sequence[str]) -> typing.List[typing.Tuple[str, int]]:
return sorted(
map(lambda candidate: (candidate, int(100*jellyfish.jaro_winkler_similarity(input_word, candidate))), wordlist),
key=lambda element: element[1],
reverse=True)

View File

@ -1,8 +1,8 @@
colorama>=0.4.4
websockets>=10.3
PyYAML>=6.0
thefuzz[speedup]>=0.19.0
jinja2>=3.1.1
jellyfish>=0.9.0
jinja2>=3.1.2
schema>=0.7.4
kivy>=2.1.0
bsdiff4>=1.2.2