Speedups: remove dependency on c++ (#2796)

* Speedups: remove dependency on c++

* Speedups: intset: handle malloc failing

* Speedups: intset: fix corner case for int64 on 32bit systems

original idea was to only use bucket->val if int<pointer,
but we always have a union now anyway

* Speedups: add size comment to player_set bucket configuration

* test: more tests for LocationStore.find_item

* test: require _speedups in CI

This kind of tests that the build succeeds.

* test: even more tests for LocationStore.find_item

* Speedups: intset uniform comment style

* Speedups: intset: avoid memory leak when realloc fails

* Speedups: intset: make `gcc -pedantic -std=c99 -fanalyzer` without warnings

Unnamed unions are not in C99, this got fixed.
The overhead of setting count=0 is minimal or optimized-out and silences -fanalizer (see comment).

* Speedups: don't leak memory in case of exception

* Speedups: intset: validate alloc and free

This won't happen in our cython, but it's still a good addition.

* CI: add test framework for C/C++ code

* CI: ctest: fix cwd

* Speedups: intset: ignore msvc warning

* Tests: intset: revert attempt at no-asan

We solve this with env vars in ctest now, and this fails for msvc.

* Test: cpp: docs: fix typo

* Test: cpp: docs: fix another typo

* Test: intset: proper bucket count for Negative test

INTxx_MIN % 1 would not produce a negative number, so the test was flawed.
This commit is contained in:
black-sliver 2024-06-12 18:54:59 +02:00 committed by GitHub
parent 2daccded36
commit acf85eb9ab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 450 additions and 19 deletions

54
.github/workflows/ctest.yml vendored Normal file
View File

@ -0,0 +1,54 @@
# Run CMake / CTest C++ unit tests
name: ctest
on:
push:
paths:
- '**.cc?'
- '**.cpp'
- '**.cxx'
- '**.hh?'
- '**.hpp'
- '**.hxx'
- '**.CMakeLists'
- '.github/workflows/ctest.yml'
pull_request:
paths:
- '**.cc?'
- '**.cpp'
- '**.cxx'
- '**.hh?'
- '**.hpp'
- '**.hxx'
- '**.CMakeLists'
- '.github/workflows/ctest.yml'
jobs:
ctest:
runs-on: ${{ matrix.os }}
name: Test C++ ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest]
steps:
- uses: actions/checkout@v4
- uses: ilammy/msvc-dev-cmd@v1
if: startsWith(matrix.os,'windows')
- uses: Bacondish2023/setup-googletest@v1
with:
build-type: 'Release'
- name: Build tests
run: |
cd test/cpp
mkdir build
cmake -S . -B build/ -DCMAKE_BUILD_TYPE=Release
cmake --build build/ --config Release
ls
- name: Run tests
run: |
cd test/cpp
ctest --test-dir build/ -C Release --output-on-failure

1
.gitignore vendored
View File

@ -178,6 +178,7 @@ dmypy.json
cython_debug/
# Cython intermediates
_speedups.c
_speedups.cpp
_speedups.html

View File

@ -1,5 +1,6 @@
#cython: language_level=3
#distutils: language = c++
#distutils: language = c
#distutils: depends = intset.h
"""
Provides faster implementation of some core parts.
@ -13,7 +14,6 @@ from cpython cimport PyObject
from typing import Any, Dict, Iterable, Iterator, Generator, Sequence, Tuple, TypeVar, Union, Set, List, TYPE_CHECKING
from cymem.cymem cimport Pool
from libc.stdint cimport int64_t, uint32_t
from libcpp.set cimport set as std_set
from collections import defaultdict
cdef extern from *:
@ -31,6 +31,27 @@ ctypedef int64_t ap_id_t
cdef ap_player_t MAX_PLAYER_ID = 1000000 # limit the size of indexing array
cdef size_t INVALID_SIZE = <size_t>(-1) # this is all 0xff... adding 1 results in 0, but it's not negative
# configure INTSET for player
cdef extern from *:
"""
#define INTSET_NAME ap_player_set
#define INTSET_TYPE uint32_t // has to match ap_player_t
"""
# create INTSET for player
cdef extern from "intset.h":
"""
#undef INTSET_NAME
#undef INTSET_TYPE
"""
ctypedef struct ap_player_set:
pass
ap_player_set* ap_player_set_new(size_t bucket_count) nogil
void ap_player_set_free(ap_player_set* set) nogil
bint ap_player_set_add(ap_player_set* set, ap_player_t val) nogil
bint ap_player_set_contains(ap_player_set* set, ap_player_t val) nogil
cdef struct LocationEntry:
# layout is so that
@ -185,7 +206,7 @@ cdef class LocationStore:
def find_item(self, slots: Set[int], seeked_item_id: int) -> Generator[Tuple[int, int, int, int, int], None, None]:
cdef ap_id_t item = seeked_item_id
cdef ap_player_t receiver
cdef std_set[ap_player_t] receivers
cdef ap_player_set* receivers
cdef size_t slot_count = len(slots)
if slot_count == 1:
# specialized implementation for single slot
@ -197,13 +218,20 @@ cdef class LocationStore:
yield entry.sender, entry.location, entry.item, entry.receiver, entry.flags
elif slot_count:
# generic implementation with lookup in set
for receiver in slots:
receivers.insert(receiver)
with nogil:
for entry in self.entries[:self.entry_count]:
if entry.item == item and receivers.count(entry.receiver):
with gil:
yield entry.sender, entry.location, entry.item, entry.receiver, entry.flags
receivers = ap_player_set_new(min(1023, slot_count)) # limit top level struct to 16KB
if not receivers:
raise MemoryError()
try:
for receiver in slots:
if not ap_player_set_add(receivers, receiver):
raise MemoryError()
with nogil:
for entry in self.entries[:self.entry_count]:
if entry.item == item and ap_player_set_contains(receivers, entry.receiver):
with gil:
yield entry.sender, entry.location, entry.item, entry.receiver, entry.flags
finally:
ap_player_set_free(receivers)
def get_for_player(self, slot: int) -> Dict[int, Set[int]]:
cdef ap_player_t receiver = slot

View File

@ -1,8 +1,10 @@
# This file is required to get pyximport to work with C++.
# Switching from std::set to a pure C implementation is still on the table to simplify everything.
# This file is used when doing pyximport
import os
def make_ext(modname, pyxfilename):
from distutils.extension import Extension
return Extension(name=modname,
sources=[pyxfilename],
language='c++')
depends=["intset.h"],
include_dirs=[os.getcwd()],
language="c")

135
intset.h Normal file
View File

@ -0,0 +1,135 @@
/* A specialized unordered_set implementation for literals, where bucket_count
* is defined at initialization rather than increased automatically.
*/
#include <stddef.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#ifndef INTSET_NAME
#error "Please #define INTSET_NAME ... before including intset.h"
#endif
#ifndef INTSET_TYPE
#error "Please #define INTSET_TYPE ... before including intset.h"
#endif
/* macros to generate unique names from INTSET_NAME */
#ifndef INTSET_CONCAT
#define INTSET_CONCAT_(a, b) a ## b
#define INTSET_CONCAT(a, b) INTSET_CONCAT_(a, b)
#define INTSET_FUNC_(a, b) INTSET_CONCAT(a, _ ## b)
#endif
#define INTSET_FUNC(name) INTSET_FUNC_(INTSET_NAME, name)
#define INTSET_BUCKET INTSET_CONCAT(INTSET_NAME, Bucket)
#define INTSET_UNION INTSET_CONCAT(INTSET_NAME, Union)
#if defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable : 4200)
#endif
typedef struct {
size_t count;
union INTSET_UNION {
INTSET_TYPE val;
INTSET_TYPE *data;
} v;
} INTSET_BUCKET;
typedef struct {
size_t bucket_count;
INTSET_BUCKET buckets[];
} INTSET_NAME;
static INTSET_NAME *INTSET_FUNC(new)(size_t buckets)
{
size_t i, size;
INTSET_NAME *set;
if (buckets < 1)
buckets = 1;
if ((SIZE_MAX - sizeof(INTSET_NAME)) / sizeof(INTSET_BUCKET) < buckets)
return NULL;
size = sizeof(INTSET_NAME) + buckets * sizeof(INTSET_BUCKET);
set = (INTSET_NAME*)malloc(size);
if (!set)
return NULL;
memset(set, 0, size); /* gcc -fanalyzer does not understand this sets all buckets' count to 0 */
for (i = 0; i < buckets; i++) {
set->buckets[i].count = 0;
}
set->bucket_count = buckets;
return set;
}
static void INTSET_FUNC(free)(INTSET_NAME *set)
{
size_t i;
if (!set)
return;
for (i = 0; i < set->bucket_count; i++) {
if (set->buckets[i].count > 1)
free(set->buckets[i].v.data);
}
free(set);
}
static bool INTSET_FUNC(contains)(INTSET_NAME *set, INTSET_TYPE val)
{
size_t i;
INTSET_BUCKET* bucket = &set->buckets[(size_t)val % set->bucket_count];
if (bucket->count == 1)
return bucket->v.val == val;
for (i = 0; i < bucket->count; ++i) {
if (bucket->v.data[i] == val)
return true;
}
return false;
}
static bool INTSET_FUNC(add)(INTSET_NAME *set, INTSET_TYPE val)
{
INTSET_BUCKET* bucket;
if (INTSET_FUNC(contains)(set, val))
return true; /* ok */
bucket = &set->buckets[(size_t)val % set->bucket_count];
if (bucket->count == 0) {
bucket->v.val = val;
bucket->count = 1;
} else if (bucket->count == 1) {
INTSET_TYPE old = bucket->v.val;
bucket->v.data = (INTSET_TYPE*)malloc(2 * sizeof(INTSET_TYPE));
if (!bucket->v.data) {
bucket->v.val = old;
return false; /* error */
}
bucket->v.data[0] = old;
bucket->v.data[1] = val;
bucket->count = 2;
} else {
size_t new_bucket_size;
INTSET_TYPE* new_bucket_data;
new_bucket_size = (bucket->count + 1) * sizeof(INTSET_TYPE);
new_bucket_data = (INTSET_TYPE*)realloc(bucket->v.data, new_bucket_size);
if (!new_bucket_data)
return false; /* error */
bucket->v.data = new_bucket_data;
bucket->v.data[bucket->count++] = val;
}
return true; /* success */
}
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
#undef INTSET_FUNC
#undef INTSET_BUCKET
#undef INTSET_UNION

49
test/cpp/CMakeLists.txt Normal file
View File

@ -0,0 +1,49 @@
cmake_minimum_required(VERSION 3.5)
project(ap-cpp-tests)
enable_testing()
find_package(GTest REQUIRED)
if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_definitions("/source-charset:utf-8")
set(CMAKE_CXX_FLAGS_DEBUG "/MTd")
set(CMAKE_CXX_FLAGS_RELEASE "/MT")
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
# enable static analysis for gcc
add_compile_options(-fanalyzer -Werror)
# disable stuff that gets triggered by googletest
add_compile_options(-Wno-analyzer-malloc-leak)
# enable asan for gcc
add_compile_options(-fsanitize=address)
add_link_options(-fsanitize=address)
endif ()
add_executable(test_default)
target_include_directories(test_default
PRIVATE
${GTEST_INCLUDE_DIRS}
)
target_link_libraries(test_default
${GTEST_BOTH_LIBRARIES}
)
add_test(
NAME test_default
COMMAND test_default
)
set_property(
TEST test_default
PROPERTY ENVIRONMENT "ASAN_OPTIONS=allocator_may_return_null=1"
)
file(GLOB ITEMS *)
foreach(item ${ITEMS})
if(IS_DIRECTORY ${item} AND EXISTS ${item}/CMakeLists.txt)
message(${item})
add_subdirectory(${item})
endif()
endforeach()

32
test/cpp/README.md Normal file
View File

@ -0,0 +1,32 @@
# C++ tests
Test framework for C and C++ code in AP.
## Adding a Test
### GoogleTest
Adding GoogleTests is as simple as creating a directory with
* one or more `test_*.cpp` files that define tests using
[GoogleTest API](https://google.github.io/googletest/)
* a `CMakeLists.txt` that adds the .cpp files to `test_default` target using
[target_sources](https://cmake.org/cmake/help/latest/command/target_sources.html)
### CTest
If either GoogleTest is not suitable for the test or the build flags / sources / libraries are incompatible,
you can add another CTest to the project using add_target and add_test, similar to how it's done for `test_default`.
## Running Tests
* Install [CMake](https://cmake.org/).
* Build and/or install GoogleTest and make sure
[CMake can find it](https://cmake.org/cmake/help/latest/module/FindGTest.html), or
[create a parent `CMakeLists.txt` that fetches GoogleTest](https://google.github.io/googletest/quickstart-cmake.html).
* Enter the directory with the top-most `CMakeLists.txt` and run
```sh
mkdir build
cmake -S . -B build/ -DCMAKE_BUILD_TYPE=Release
cmake --build build/ --config Release && \
ctest --test-dir build/ -C Release --output-on-failure
```

View File

@ -0,0 +1,4 @@
target_sources(test_default
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/test_intset.cpp
)

View File

@ -0,0 +1,105 @@
#include <limits>
#include <cstdint>
#include <gtest/gtest.h>
// uint32Set
#define INTSET_NAME uint32Set
#define INTSET_TYPE uint32_t
#include "../../../intset.h"
#undef INTSET_NAME
#undef INTSET_TYPE
// int64Set
#define INTSET_NAME int64Set
#define INTSET_TYPE int64_t
#include "../../../intset.h"
TEST(IntsetTest, ZeroBuckets)
{
// trying to allocate with zero buckets has to either fail or be functioning
uint32Set *set = uint32Set_new(0);
if (!set)
return; // failed -> OK
EXPECT_FALSE(uint32Set_contains(set, 1));
EXPECT_TRUE(uint32Set_add(set, 1));
EXPECT_TRUE(uint32Set_contains(set, 1));
uint32Set_free(set);
}
TEST(IntsetTest, Duplicate)
{
// adding the same number again can't fail
uint32Set *set = uint32Set_new(2);
ASSERT_TRUE(set);
EXPECT_TRUE(uint32Set_add(set, 0));
EXPECT_TRUE(uint32Set_add(set, 0));
EXPECT_TRUE(uint32Set_contains(set, 0));
uint32Set_free(set);
}
TEST(IntsetTest, SetAllocFailure)
{
// try to allocate 100TB of RAM, should fail and return NULL
if (sizeof(size_t) < 8)
GTEST_SKIP() << "Alloc error not testable on 32bit";
int64Set *set = int64Set_new(6250000000000ULL);
EXPECT_FALSE(set);
int64Set_free(set);
}
TEST(IntsetTest, SetAllocOverflow)
{
// try to overflow argument passed to malloc
int64Set *set = int64Set_new(std::numeric_limits<size_t>::max());
EXPECT_FALSE(set);
int64Set_free(set);
}
TEST(IntsetTest, NullFree)
{
// free(NULL) should not try to free buckets
uint32Set_free(NULL);
int64Set_free(NULL);
}
TEST(IntsetTest, BucketRealloc)
{
// add a couple of values to the same bucket to test growing the bucket
uint32Set* set = uint32Set_new(1);
ASSERT_TRUE(set);
EXPECT_FALSE(uint32Set_contains(set, 0));
EXPECT_TRUE(uint32Set_add(set, 0));
EXPECT_TRUE(uint32Set_contains(set, 0));
for (uint32_t i = 1; i < 32; ++i) {
EXPECT_TRUE(uint32Set_add(set, i));
EXPECT_TRUE(uint32Set_contains(set, i - 1));
EXPECT_TRUE(uint32Set_contains(set, i));
EXPECT_FALSE(uint32Set_contains(set, i + 1));
}
uint32Set_free(set);
}
TEST(IntSet, Max)
{
constexpr auto n = std::numeric_limits<uint32_t>::max();
uint32Set *set = uint32Set_new(1);
ASSERT_TRUE(set);
EXPECT_FALSE(uint32Set_contains(set, n));
EXPECT_TRUE(uint32Set_add(set, n));
EXPECT_TRUE(uint32Set_contains(set, n));
uint32Set_free(set);
}
TEST(InsetTest, Negative)
{
constexpr auto n = std::numeric_limits<int64_t>::min();
static_assert(n < 0, "n not negative");
int64Set *set = int64Set_new(3);
ASSERT_TRUE(set);
EXPECT_FALSE(int64Set_contains(set, n));
EXPECT_TRUE(int64Set_add(set, n));
EXPECT_TRUE(int64Set_contains(set, n));
int64Set_free(set);
}

View File

@ -1,4 +1,5 @@
# Tests for _speedups.LocationStore and NetUtils._LocationStore
import os
import typing
import unittest
import warnings
@ -7,6 +8,8 @@ from NetUtils import LocationStore, _LocationStore
State = typing.Dict[typing.Tuple[int, int], typing.Set[int]]
RawLocations = typing.Dict[int, typing.Dict[int, typing.Tuple[int, int, int]]]
ci = bool(os.environ.get("CI")) # always set in GitHub actions
sample_data: RawLocations = {
1: {
11: (21, 2, 7),
@ -24,6 +27,9 @@ sample_data: RawLocations = {
3: {
9: (99, 4, 0),
},
5: {
9: (99, 5, 0),
}
}
empty_state: State = {
@ -45,14 +51,14 @@ class Base:
store: typing.Union[LocationStore, _LocationStore]
def test_len(self) -> None:
self.assertEqual(len(self.store), 4)
self.assertEqual(len(self.store), 5)
self.assertEqual(len(self.store[1]), 3)
def test_key_error(self) -> None:
with self.assertRaises(KeyError):
_ = self.store[0]
with self.assertRaises(KeyError):
_ = self.store[5]
_ = self.store[6]
locations = self.store[1] # no Exception
with self.assertRaises(KeyError):
_ = locations[7]
@ -71,7 +77,7 @@ class Base:
self.assertEqual(self.store[1].get(10, (None, None, None)), (None, None, None))
def test_iter(self) -> None:
self.assertEqual(sorted(self.store), [1, 2, 3, 4])
self.assertEqual(sorted(self.store), [1, 2, 3, 4, 5])
self.assertEqual(len(self.store), len(sample_data))
self.assertEqual(list(self.store[1]), [11, 12, 13])
self.assertEqual(len(self.store[1]), len(sample_data[1]))
@ -85,13 +91,26 @@ class Base:
self.assertEqual(sorted(self.store[1].items())[0][1], self.store[1][11])
def test_find_item(self) -> None:
# empty player set
self.assertEqual(sorted(self.store.find_item(set(), 99)), [])
# no such player, single
self.assertEqual(sorted(self.store.find_item({6}, 99)), [])
# no such player, set
self.assertEqual(sorted(self.store.find_item({7, 8, 9}, 99)), [])
# no such item
self.assertEqual(sorted(self.store.find_item({3}, 1)), [])
self.assertEqual(sorted(self.store.find_item({5}, 99)), [])
# valid matches
self.assertEqual(sorted(self.store.find_item({3}, 99)),
[(4, 9, 99, 3, 0)])
self.assertEqual(sorted(self.store.find_item({3, 4}, 99)),
[(3, 9, 99, 4, 0), (4, 9, 99, 3, 0)])
self.assertEqual(sorted(self.store.find_item({2, 3, 4}, 99)),
[(3, 9, 99, 4, 0), (4, 9, 99, 3, 0)])
# test hash collision in set
self.assertEqual(sorted(self.store.find_item({3, 5}, 99)),
[(4, 9, 99, 3, 0), (5, 9, 99, 5, 0)])
self.assertEqual(sorted(self.store.find_item(set(range(2048)), 13)),
[(1, 13, 13, 1, 0)])
def test_get_for_player(self) -> None:
self.assertEqual(self.store.get_for_player(3), {4: {9}})
@ -196,18 +215,20 @@ class TestPurePythonLocationStoreConstructor(Base.TestLocationStoreConstructor):
super().setUp()
@unittest.skipIf(LocationStore is _LocationStore, "_speedups not available")
@unittest.skipIf(LocationStore is _LocationStore and not ci, "_speedups not available")
class TestSpeedupsLocationStore(Base.TestLocationStore):
"""Run base method tests for cython implementation."""
def setUp(self) -> None:
self.assertFalse(LocationStore is _LocationStore, "Failed to load _speedups")
self.store = LocationStore(sample_data)
super().setUp()
@unittest.skipIf(LocationStore is _LocationStore, "_speedups not available")
@unittest.skipIf(LocationStore is _LocationStore and not ci, "_speedups not available")
class TestSpeedupsLocationStoreConstructor(Base.TestLocationStoreConstructor):
"""Run base constructor tests and tests the additional constraints for cython implementation."""
def setUp(self) -> None:
self.assertFalse(LocationStore is _LocationStore, "Failed to load _speedups")
self.type = LocationStore
super().setUp()