Fuzzy: replace thefuzz with jellyfish
GPL -> BSD2Clause and should be faster though I haven't tested it myself and just trusted people on the internet. Jellyfish also allows us access to many more algorithms should they be any better. Trying out Jaro distance now instead of Levenshtein.
This commit is contained in:
parent
a020dea277
commit
513ab62ce7
|
@ -24,8 +24,6 @@ ModuleUpdate.update()
|
||||||
import websockets
|
import websockets
|
||||||
import colorama
|
import colorama
|
||||||
|
|
||||||
from thefuzz import process as fuzzy_process
|
|
||||||
|
|
||||||
import NetUtils
|
import NetUtils
|
||||||
from worlds.AutoWorld import AutoWorldRegister
|
from worlds.AutoWorld import AutoWorldRegister
|
||||||
|
|
||||||
|
@ -907,7 +905,7 @@ def json_format_send_event(net_item: NetworkItem, receiving_player: int):
|
||||||
|
|
||||||
|
|
||||||
def get_intended_text(input_text: str, possible_answers) -> typing.Tuple[str, bool, str]:
|
def get_intended_text(input_text: str, possible_answers) -> typing.Tuple[str, bool, str]:
|
||||||
picks = fuzzy_process.extract(input_text, possible_answers, limit=2)
|
picks = Utils.get_fuzzy_results(input_text, possible_answers)
|
||||||
if len(picks) > 1:
|
if len(picks) > 1:
|
||||||
dif = picks[0][1] - picks[1][1]
|
dif = picks[0][1] - picks[1][1]
|
||||||
if picks[0][1] == 100:
|
if picks[0][1] == 100:
|
||||||
|
|
|
@ -6,7 +6,7 @@ import typing
|
||||||
import random
|
import random
|
||||||
|
|
||||||
from schema import Schema, And, Or
|
from schema import Schema, And, Or
|
||||||
from thefuzz import process as fuzzy_process
|
from Utils import get_fuzzy_results
|
||||||
|
|
||||||
|
|
||||||
class AssembleOptions(abc.ABCMeta):
|
class AssembleOptions(abc.ABCMeta):
|
||||||
|
@ -457,14 +457,14 @@ class VerifyKeys:
|
||||||
if self.verify_item_name:
|
if self.verify_item_name:
|
||||||
for item_name in self.value:
|
for item_name in self.value:
|
||||||
if item_name not in world.item_names:
|
if item_name not in world.item_names:
|
||||||
picks = fuzzy_process.extract(item_name, world.item_names, limit=2)
|
picks = get_fuzzy_results(item_name, world.item_names)
|
||||||
raise Exception(f"Item {item_name} from option {self} "
|
raise Exception(f"Item {item_name} from option {self} "
|
||||||
f"is not a valid item name from {world.game}. "
|
f"is not a valid item name from {world.game}. "
|
||||||
f"Did you mean '{picks[0][0]}' ({picks[0][1]}% sure)")
|
f"Did you mean '{picks[0][0]}' ({picks[0][1]}% sure)")
|
||||||
elif self.verify_location_name:
|
elif self.verify_location_name:
|
||||||
for location_name in self.value:
|
for location_name in self.value:
|
||||||
if location_name not in world.location_names:
|
if location_name not in world.location_names:
|
||||||
picks = fuzzy_process.extract(location_name, world.location_names, limit=2)
|
picks = get_fuzzy_results(location_name, world.location_names)
|
||||||
raise Exception(f"Location {location_name} from option {self} "
|
raise Exception(f"Location {location_name} from option {self} "
|
||||||
f"is not a valid location name from {world.game}. "
|
f"is not a valid location name from {world.game}. "
|
||||||
f"Did you mean '{picks[0][0]}' ({picks[0][1]}% sure)")
|
f"Did you mean '{picks[0][0]}' ({picks[0][1]}% sure)")
|
||||||
|
|
8
Utils.py
8
Utils.py
|
@ -28,6 +28,7 @@ class Version(typing.NamedTuple):
|
||||||
__version__ = "0.3.2"
|
__version__ = "0.3.2"
|
||||||
version_tuple = tuplize_version(__version__)
|
version_tuple = tuplize_version(__version__)
|
||||||
|
|
||||||
|
import jellyfish
|
||||||
from yaml import load, load_all, dump, SafeLoader
|
from yaml import load, load_all, dump, SafeLoader
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -492,3 +493,10 @@ def format_SI_prefix(value, power=1000, power_labels=('', 'k', 'M', 'G', 'T', "P
|
||||||
return f"{value} {power_labels[n]}"
|
return f"{value} {power_labels[n]}"
|
||||||
else:
|
else:
|
||||||
return f"{value:0.3f} {power_labels[n]}"
|
return f"{value:0.3f} {power_labels[n]}"
|
||||||
|
|
||||||
|
|
||||||
|
def get_fuzzy_results(input_word: str, wordlist: typing.Sequence[str]) -> typing.List[typing.Tuple[str, int]]:
|
||||||
|
return sorted(
|
||||||
|
map(lambda candidate: (candidate, int(100*jellyfish.jaro_winkler_similarity(input_word, candidate))), wordlist),
|
||||||
|
key=lambda element: element[1],
|
||||||
|
reverse=True)
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
colorama>=0.4.4
|
colorama>=0.4.4
|
||||||
websockets>=10.3
|
websockets>=10.3
|
||||||
PyYAML>=6.0
|
PyYAML>=6.0
|
||||||
thefuzz[speedup]>=0.19.0
|
jellyfish>=0.9.0
|
||||||
jinja2>=3.1.1
|
jinja2>=3.1.2
|
||||||
schema>=0.7.4
|
schema>=0.7.4
|
||||||
kivy>=2.1.0
|
kivy>=2.1.0
|
||||||
bsdiff4>=1.2.2
|
bsdiff4>=1.2.2
|
Loading…
Reference in New Issue