Archipelago/_speedups.pyx

379 lines
15 KiB
Cython
Raw Normal View History

#cython: language_level=3
#distutils: language = c
#distutils: depends = intset.h
"""
Provides faster implementation of some core parts.
This is deliberately .pyx because using a non-compiled "pure python" may be slower.
"""
# pip install cython cymem
import cython
import warnings
from cpython cimport PyObject
from typing import Any, Dict, Iterable, Iterator, Generator, Sequence, Tuple, TypeVar, Union, Set, List, TYPE_CHECKING
from cymem.cymem cimport Pool
from libc.stdint cimport int64_t, uint32_t
from collections import defaultdict
cdef extern from *:
"""
// avoid warning from cython-generated code with MSVC + pyximport
#ifdef _MSC_VER
#pragma warning( disable: 4551 )
#endif
"""
ctypedef uint32_t ap_player_t # on AMD64 this is faster (and smaller) than 64bit ints
ctypedef uint32_t ap_flags_t
ctypedef int64_t ap_id_t
cdef ap_player_t MAX_PLAYER_ID = 1000000 # limit the size of indexing array
cdef size_t INVALID_SIZE = <size_t>(-1) # this is all 0xff... adding 1 results in 0, but it's not negative
# configure INTSET for player
cdef extern from *:
"""
#define INTSET_NAME ap_player_set
#define INTSET_TYPE uint32_t // has to match ap_player_t
"""
# create INTSET for player
cdef extern from "intset.h":
"""
#undef INTSET_NAME
#undef INTSET_TYPE
"""
ctypedef struct ap_player_set:
pass
ap_player_set* ap_player_set_new(size_t bucket_count) nogil
void ap_player_set_free(ap_player_set* set) nogil
bint ap_player_set_add(ap_player_set* set, ap_player_t val) nogil
bint ap_player_set_contains(ap_player_set* set, ap_player_t val) nogil
cdef struct LocationEntry:
# layout is so that
# 64bit player: location+sender and item+receiver 128bit comparisons, if supported
# 32bit player: aligned to 32/64bit with no unused space
ap_id_t location
ap_player_t sender
ap_player_t receiver
ap_id_t item
ap_flags_t flags
cdef struct IndexEntry:
size_t start
size_t count
if TYPE_CHECKING:
State = Dict[Tuple[int, int], Set[int]]
else:
State = Union[Tuple[int, int], Set[int], defaultdict]
T = TypeVar('T')
@cython.auto_pickle(False)
cdef class LocationStore:
"""Compact store for locations and their items in a MultiServer"""
# The original implementation uses Dict[int, Dict[int, Tuple(int, int, int]]
# with sender, location, (item, receiver, flags).
# This implementation is a flat list of (sender, location, item, receiver, flags) using native integers
# as well as some mapping arrays used to speed up stuff, saving a lot of memory while speeding up hints.
# Using std::map might be worth investigating, but memory overhead would be ~100% compared to arrays.
cdef Pool _mem
cdef object _len
cdef LocationEntry* entries # 3.2MB/100k items
cdef size_t entry_count
cdef IndexEntry* sender_index # 16KB/1000 players
cdef size_t sender_index_size
cdef list _keys # ~36KB/1000 players, speed up iter (28 per int + 8 per list entry)
cdef list _items # ~64KB/1000 players, speed up items (56 per tuple + 8 per list entry)
cdef list _proxies # ~92KB/1000 players, speed up self[player] (56 per struct + 28 per len + 8 per list entry)
cdef PyObject** _raw_proxies # 8K/1000 players, faster access to _proxies, but does not keep a ref
def get_size(self):
from sys import getsizeof
size = getsizeof(self) + getsizeof(self._mem) + getsizeof(self._len) \
+ sizeof(LocationEntry) * self.entry_count + sizeof(IndexEntry) * self.sender_index_size
size += getsizeof(self._keys) + getsizeof(self._items) + getsizeof(self._proxies)
size += sum(sizeof(key) for key in self._keys)
size += sum(sizeof(item) for item in self._items)
size += sum(sizeof(proxy) for proxy in self._proxies)
size += sizeof(self._raw_proxies[0]) * self.sender_index_size
return size
def __init__(self, locations_dict: Dict[int, Dict[int, Sequence[int]]]) -> None:
self._mem = Pool()
cdef object key
self._keys = []
self._items = []
self._proxies = []
# iterate over everything to get all maxima and validate everything
cdef size_t max_sender = INVALID_SIZE # keep track of highest used player id for indexing
cdef size_t sender_count = 0
cdef size_t count = 0
for sender, locations in locations_dict.items():
# we don't require the dict to be sorted here
if not isinstance(sender, int) or sender < 1 or sender > MAX_PLAYER_ID:
raise ValueError(f"Invalid player id {sender} for location")
if max_sender == INVALID_SIZE:
max_sender = sender
else:
max_sender = max(max_sender, sender)
for location, data in locations.items():
receiver = data[1]
if receiver < 1 or receiver > MAX_PLAYER_ID:
raise ValueError(f"Invalid player id {receiver} for item")
count += 1
sender_count += 1
if not sender_count:
raise ValueError(f"Rejecting game with 0 players")
if sender_count != max_sender:
# we assume player 0 will never have locations
raise ValueError("Player IDs not continuous")
if not count:
warnings.warn("Game has no locations")
# allocate the arrays and invalidate index (0xff...)
if count:
# leaving entries as NULL if there are none, makes potential memory errors more visible
self.entries = <LocationEntry*>self._mem.alloc(count, sizeof(LocationEntry))
self.sender_index = <IndexEntry*>self._mem.alloc(max_sender + 1, sizeof(IndexEntry))
self._raw_proxies = <PyObject**>self._mem.alloc(max_sender + 1, sizeof(PyObject*))
assert (not self.entries) == (not count)
assert self.sender_index
assert self._raw_proxies
# build entries and index
cdef size_t i = 0
for sender, locations in sorted(locations_dict.items()):
self.sender_index[sender].start = i
self.sender_index[sender].count = 0
# Sorting locations here makes it possible to write a faster lookup without an additional index.
for location, data in sorted(locations.items()):
self.entries[i].sender = sender
self.entries[i].location = location
self.entries[i].item = data[0]
self.entries[i].receiver = data[1]
if len(data) > 2:
self.entries[i].flags = data[2] # initialized to 0 during alloc
# Ignoring extra data. warn?
self.sender_index[sender].count += 1
i += 1
# build pyobject caches
self._proxies.append(None) # player 0
assert self.sender_index[0].count == 0
for i in range(1, max_sender + 1):
assert self.sender_index[i].count == 0 or (
self.sender_index[i].start < count and
self.sender_index[i].start + self.sender_index[i].count <= count)
key = i # allocate python integer
proxy = PlayerLocationProxy(self, i)
self._keys.append(key)
self._items.append((key, proxy))
self._proxies.append(proxy)
self._raw_proxies[i] = <PyObject*>proxy
self.sender_index_size = max_sender + 1
self.entry_count = count
self._len = sender_count
# fake dict access
def __len__(self) -> int:
return self._len
def __iter__(self) -> Iterator[int]:
return self._keys.__iter__()
def __getitem__(self, key: int) -> Any:
# figure out if player actually exists in the multidata and return a proxy
cdef size_t i = key # NOTE: this may raise TypeError
if i < 1 or i >= self.sender_index_size:
raise KeyError(key)
return <object>self._raw_proxies[key]
def get(self, key: int, default: T) -> Union[PlayerLocationProxy, T]:
# calling into self.__getitem__ here is slow, but this is not used in MultiServer
try:
return self[key]
except KeyError:
return default
def items(self) -> Iterable[Tuple[int, PlayerLocationProxy]]:
return self._items
# specialized accessors
def find_item(self, slots: Set[int], seeked_item_id: int) -> Generator[Tuple[int, int, int, int, int], None, None]:
cdef ap_id_t item = seeked_item_id
cdef ap_player_t receiver
cdef ap_player_set* receivers
cdef size_t slot_count = len(slots)
if slot_count == 1:
# specialized implementation for single slot
receiver = list(slots)[0]
with nogil:
for entry in self.entries[:self.entry_count]:
if entry.item == item and entry.receiver == receiver:
with gil:
yield entry.sender, entry.location, entry.item, entry.receiver, entry.flags
elif slot_count:
# generic implementation with lookup in set
receivers = ap_player_set_new(min(1023, slot_count)) # limit top level struct to 16KB
if not receivers:
raise MemoryError()
try:
for receiver in slots:
if not ap_player_set_add(receivers, receiver):
raise MemoryError()
with nogil:
for entry in self.entries[:self.entry_count]:
if entry.item == item and ap_player_set_contains(receivers, entry.receiver):
with gil:
yield entry.sender, entry.location, entry.item, entry.receiver, entry.flags
finally:
ap_player_set_free(receivers)
def get_for_player(self, slot: int) -> Dict[int, Set[int]]:
cdef ap_player_t receiver = slot
all_locations: Dict[int, Set[int]] = {}
with nogil:
for entry in self.entries[:self.entry_count]:
if entry.receiver == receiver:
with gil:
sender: int = entry.sender
if sender not in all_locations:
all_locations[sender] = set()
all_locations[sender].add(entry.location)
return all_locations
def get_checked(self, state: State, team: int, slot: int) -> List[int]:
cdef ap_player_t sender = slot
if sender < 0 or sender >= self.sender_index_size:
raise KeyError(slot)
# This used to validate checks actually exist. A remnant from the past.
# If the order of locations becomes relevant at some point, we could not do sorted(set), so leaving it.
cdef set checked = state[team, slot]
if not len(checked):
# Skips loop if none have been checked.
# This optimizes the case where everyone connects to a fresh game at the same time.
return []
# Unless the set is close to empty, it's cheaper to use the python set directly, so we do that.
cdef LocationEntry* entry
cdef size_t start = self.sender_index[sender].start
cdef size_t count = self.sender_index[sender].count
return [entry.location for
entry in self.entries[start:start+count] if
entry.location in checked]
def get_missing(self, state: State, team: int, slot: int) -> List[int]:
cdef LocationEntry* entry
cdef ap_player_t sender = slot
if sender < 0 or sender >= self.sender_index_size:
raise KeyError(slot)
cdef set checked = state[team, slot]
cdef size_t start = self.sender_index[sender].start
cdef size_t count = self.sender_index[sender].count
if not len(checked):
# Skip `in` if none have been checked.
# This optimizes the case where everyone connects to a fresh game at the same time.
return [entry.location for
entry in self.entries[start:start + count]]
else:
# Unless the set is close to empty, it's cheaper to use the python set directly, so we do that.
return [entry.location for
entry in self.entries[start:start + count] if
entry.location not in checked]
def get_remaining(self, state: State, team: int, slot: int) -> List[Tuple[int, int]]:
cdef LocationEntry* entry
cdef ap_player_t sender = slot
if sender < 0 or sender >= self.sender_index_size:
raise KeyError(slot)
cdef set checked = state[team, slot]
cdef size_t start = self.sender_index[sender].start
cdef size_t count = self.sender_index[sender].count
return sorted([(entry.receiver, entry.item) for
entry in self.entries[start:start+count] if
entry.location not in checked])
@cython.auto_pickle(False)
@cython.internal # unsafe. disable direct import
cdef class PlayerLocationProxy:
cdef LocationStore _store
cdef size_t _player
cdef object _len
def __init__(self, store: LocationStore, player: int) -> None:
self._store = store
self._player = player
self._len = self._store.sender_index[self._player].count
def __len__(self) -> int:
return self._store.sender_index[self._player].count
def __iter__(self) -> Generator[int, None, None]:
cdef LocationEntry* entry
cdef size_t i
cdef size_t off = self._store.sender_index[self._player].start
for i in range(self._store.sender_index[self._player].count):
entry = self._store.entries + off + i
yield entry.location
cdef LocationEntry* _get(self, ap_id_t loc):
# This requires locations to be sorted.
# This is always going to be slower than a pure python dict, because constructing the result tuple takes as long
# as the search in a python dict, which stores a pointer to an existing tuple.
cdef LocationEntry* entry = NULL
# binary search
cdef size_t l = self._store.sender_index[self._player].start
cdef size_t e = l + self._store.sender_index[self._player].count
cdef size_t r = e
cdef size_t m
while l < r:
m = (l + r) // 2
entry = self._store.entries + m
if entry.location < loc:
l = m + 1
else:
r = m
if l < e:
entry = self._store.entries + l
if entry.location == loc:
return entry
return NULL
def __getitem__(self, key: int) -> Tuple[int, int, int]:
cdef LocationEntry* entry = self._get(key)
if entry:
return entry.item, entry.receiver, entry.flags
raise KeyError(f"No location {key} for player {self._player}")
def get(self, key: int, default: T) -> Union[Tuple[int, int, int], T]:
cdef LocationEntry* entry = self._get(key)
if entry:
return entry.item, entry.receiver, entry.flags
return default
def items(self) -> Generator[Tuple[int, Tuple[int, int, int]], None, None]:
cdef LocationEntry* entry
start = self._store.sender_index[self._player].start
count = self._store.sender_index[self._player].count
for entry in self._store.entries[start:start+count]:
yield entry.location, (entry.item, entry.receiver, entry.flags)