Fuzzy: replace thefuzz with jellyfish
GPL -> BSD2Clause and should be faster though I haven't tested it myself and just trusted people on the internet. Jellyfish also allows us access to many more algorithms should they be any better. Trying out Jaro distance now instead of Levenshtein.
This commit is contained in:
parent
a020dea277
commit
513ab62ce7
|
@ -24,8 +24,6 @@ ModuleUpdate.update()
|
|||
import websockets
|
||||
import colorama
|
||||
|
||||
from thefuzz import process as fuzzy_process
|
||||
|
||||
import NetUtils
|
||||
from worlds.AutoWorld import AutoWorldRegister
|
||||
|
||||
|
@ -907,7 +905,7 @@ def json_format_send_event(net_item: NetworkItem, receiving_player: int):
|
|||
|
||||
|
||||
def get_intended_text(input_text: str, possible_answers) -> typing.Tuple[str, bool, str]:
|
||||
picks = fuzzy_process.extract(input_text, possible_answers, limit=2)
|
||||
picks = Utils.get_fuzzy_results(input_text, possible_answers)
|
||||
if len(picks) > 1:
|
||||
dif = picks[0][1] - picks[1][1]
|
||||
if picks[0][1] == 100:
|
||||
|
|
|
@ -6,7 +6,7 @@ import typing
|
|||
import random
|
||||
|
||||
from schema import Schema, And, Or
|
||||
from thefuzz import process as fuzzy_process
|
||||
from Utils import get_fuzzy_results
|
||||
|
||||
|
||||
class AssembleOptions(abc.ABCMeta):
|
||||
|
@ -457,14 +457,14 @@ class VerifyKeys:
|
|||
if self.verify_item_name:
|
||||
for item_name in self.value:
|
||||
if item_name not in world.item_names:
|
||||
picks = fuzzy_process.extract(item_name, world.item_names, limit=2)
|
||||
picks = get_fuzzy_results(item_name, world.item_names)
|
||||
raise Exception(f"Item {item_name} from option {self} "
|
||||
f"is not a valid item name from {world.game}. "
|
||||
f"Did you mean '{picks[0][0]}' ({picks[0][1]}% sure)")
|
||||
elif self.verify_location_name:
|
||||
for location_name in self.value:
|
||||
if location_name not in world.location_names:
|
||||
picks = fuzzy_process.extract(location_name, world.location_names, limit=2)
|
||||
picks = get_fuzzy_results(location_name, world.location_names)
|
||||
raise Exception(f"Location {location_name} from option {self} "
|
||||
f"is not a valid location name from {world.game}. "
|
||||
f"Did you mean '{picks[0][0]}' ({picks[0][1]}% sure)")
|
||||
|
|
8
Utils.py
8
Utils.py
|
@ -28,6 +28,7 @@ class Version(typing.NamedTuple):
|
|||
__version__ = "0.3.2"
|
||||
version_tuple = tuplize_version(__version__)
|
||||
|
||||
import jellyfish
|
||||
from yaml import load, load_all, dump, SafeLoader
|
||||
|
||||
try:
|
||||
|
@ -492,3 +493,10 @@ def format_SI_prefix(value, power=1000, power_labels=('', 'k', 'M', 'G', 'T', "P
|
|||
return f"{value} {power_labels[n]}"
|
||||
else:
|
||||
return f"{value:0.3f} {power_labels[n]}"
|
||||
|
||||
|
||||
def get_fuzzy_results(input_word: str, wordlist: typing.Sequence[str]) -> typing.List[typing.Tuple[str, int]]:
|
||||
return sorted(
|
||||
map(lambda candidate: (candidate, int(100*jellyfish.jaro_winkler_similarity(input_word, candidate))), wordlist),
|
||||
key=lambda element: element[1],
|
||||
reverse=True)
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
colorama>=0.4.4
|
||||
websockets>=10.3
|
||||
PyYAML>=6.0
|
||||
thefuzz[speedup]>=0.19.0
|
||||
jinja2>=3.1.1
|
||||
jellyfish>=0.9.0
|
||||
jinja2>=3.1.2
|
||||
schema>=0.7.4
|
||||
kivy>=2.1.0
|
||||
bsdiff4>=1.2.2
|
Loading…
Reference in New Issue