diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index e69de29bb..000000000 diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml index 06a70841a..eaabe8aa1 100644 --- a/.github/workflows/package.yml +++ b/.github/workflows/package.yml @@ -6,11 +6,14 @@ on: - main release: types: [published] + pull_request: jobs: - dist: - name: Build Packages + build_sdist: + name: Build Source Package runs-on: ubuntu-latest + outputs: + filename: ${{steps.build.outputs.filename}} steps: - uses: actions/checkout@v4 @@ -28,22 +31,52 @@ jobs: - name: Install Python deps run: pip install -U build - - name: Build distribution - run: python -m build + - name: Build source distribution + id: build + run: | + python -m build -s + basename dist/*.tar.gz |sed -e 's/^/filename=/' >> "$GITHUB_OUTPUT" + cat "$GITHUB_OUTPUT" - name: Save archive uses: actions/upload-artifact@v4 with: - name: pypi-pkgs + name: dist-src path: dist - name: List dist dir run: ls -R dist + build_wheels: + name: Build wheels on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + needs: [build_sdist] + strategy: + matrix: + os: [ubuntu-20.04, windows-2019, macos-11] + + steps: + - name: Fetch compiled package distributions + uses: actions/download-artifact@v4 + with: + name: dist-src + path: dist + + - name: Build wheels + uses: pypa/cibuildwheel@v2.16.5 + with: + package-dir: dist/${{needs.build_sdist.outputs.filename}} + output-dir: dist + + - uses: actions/upload-artifact@v4 + with: + name: dist-wheels-${{ matrix.os }} + path: ./dist/*.whl + pypi-publish: name: Publish to PyPI runs-on: ubuntu-latest - needs: [dist] + needs: [build_sdist, build_wheels] if: github.event_name == 'release' environment: release @@ -54,8 +87,9 @@ jobs: - name: Fetch compiled package distributions uses: actions/download-artifact@v4 with: - name: pypi-pkgs + pattern: dist-* path: dist + merge-multiple: true - name: Publish package distributions to PyPI uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d440c160f..b2c42f642 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -49,6 +49,10 @@ jobs: python -V numba -s + - name: Compile extension modules + run: | + just build-inplace + - name: Test LKPY run: | python -m pytest --cov=lenskit --verbose --log-file=test.log @@ -80,15 +84,11 @@ jobs: with: fetch-depth: 0 - - name: Create Conda environment file - run: | - pipx run pyproject2conda yaml -p 3.10 -e test -e demo -e dev -o environment.yml - - name: 👢 Set up Conda environment uses: mamba-org/setup-micromamba@v1 id: setup with: - environment-file: environment.yml + environment-file: envs/lenskit-py3.11-dev.yaml environment-name: lkpy cache-environment: true init-shell: bash @@ -108,6 +108,10 @@ jobs: !data/*.zip key: test-mldata-000 + - name: Compile extension modules + run: | + just build-inplace + - name: Download ML data run: | python -m lenskit.datasets.fetch ml-100k @@ -137,6 +141,40 @@ jobs: with: artifact-name: test-check-docs + cython-cover: + name: Measure Cython Coverage + timeout-minutes: 30 + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: 👢 Set up Conda environment + uses: mamba-org/setup-micromamba@v1 + id: setup + with: + environment-file: envs/lenskit-py3.11-dev.yaml + environment-name: lkpy + cache-environment: true + init-shell: bash + + - name: Compile extension modules + run: | + just build-inplace + env: + BUILD_FOR_COVER: 1 + + - name: Run Eval Tests + run: | + python -m pytest --cov=lenskit -m 'not slow' --log-file test-cython-cover.log + + - name: Process test results + uses: lenskit/lkbuild/actions/save-test-results@main + with: + artifact-name: test-cython-cover + vanilla: name: Vanilla Python ${{matrix.python}} on ${{matrix.platform}} runs-on: ${{matrix.platform}} @@ -237,6 +275,7 @@ jobs: - vanilla - check-docs - mindep + - cython-cover steps: - name: Check out source diff --git a/envs/lenskit-py3.10-ci.yaml b/envs/lenskit-py3.10-ci.yaml index 4295864c0..d88eabce3 100644 --- a/envs/lenskit-py3.10-ci.yaml +++ b/envs/lenskit-py3.10-ci.yaml @@ -12,10 +12,12 @@ channels: dependencies: - python=3.10 - binpickle>=0.3.2 + - c-compiler - cffi>=1.15.0 - copier==9.* - coverage>=5 - csr>=0.5 + - cython==3.* - docopt>=0.6 - hypothesis>=6 - ipython>=7 @@ -24,6 +26,7 @@ dependencies: - numpy>=1.23 - pandas<3,>=1.5 - pyproject2conda~=0.11 + - pytest-benchmark==4.* - pytest-cov>=2.12 - pytest-doctestplus==1.* - pytest==7.* diff --git a/envs/lenskit-py3.10-dev.yaml b/envs/lenskit-py3.10-dev.yaml index c7fa0553a..8fd38e608 100644 --- a/envs/lenskit-py3.10-dev.yaml +++ b/envs/lenskit-py3.10-dev.yaml @@ -12,10 +12,12 @@ channels: dependencies: - python=3.10 - binpickle>=0.3.2 + - c-compiler - cffi>=1.15.0 - copier==9.* - coverage>=5 - csr>=0.5 + - cython==3.* - docopt>=0.6 - hypothesis>=6 - ipython>=7 @@ -29,6 +31,7 @@ dependencies: - numpy>=1.23 - pandas<3,>=1.5 - pyproject2conda~=0.11 + - pytest-benchmark==4.* - pytest-cov>=2.12 - pytest-doctestplus==1.* - pytest==7.* @@ -44,7 +47,6 @@ dependencies: - sphinx_rtd_theme>=0.5 - sphinxcontrib-bibtex>=2.0 - sphinxext-opengraph>=0.5 - - tbb - threadpoolctl>=3.0 - tqdm>=4 - pip diff --git a/envs/lenskit-py3.10-test.yaml b/envs/lenskit-py3.10-test.yaml index 643453fa1..d0f792a1b 100644 --- a/envs/lenskit-py3.10-test.yaml +++ b/envs/lenskit-py3.10-test.yaml @@ -15,10 +15,12 @@ dependencies: - cffi>=1.15.0 - coverage>=5 - csr>=0.5 + - cython==3.* - hypothesis>=6 - numba<0.59,>=0.56 - numpy>=1.23 - pandas<3,>=1.5 + - pytest-benchmark==4.* - pytest-cov>=2.12 - pytest-doctestplus==1.* - pytest==7.* diff --git a/envs/lenskit-py3.11-ci.yaml b/envs/lenskit-py3.11-ci.yaml index a76eea719..6d37a929b 100644 --- a/envs/lenskit-py3.11-ci.yaml +++ b/envs/lenskit-py3.11-ci.yaml @@ -12,10 +12,12 @@ channels: dependencies: - python=3.11 - binpickle>=0.3.2 + - c-compiler - cffi>=1.15.0 - copier==9.* - coverage>=5 - csr>=0.5 + - cython==3.* - docopt>=0.6 - hypothesis>=6 - ipython>=7 @@ -24,6 +26,7 @@ dependencies: - numpy>=1.23 - pandas<3,>=1.5 - pyproject2conda~=0.11 + - pytest-benchmark==4.* - pytest-cov>=2.12 - pytest-doctestplus==1.* - pytest==7.* diff --git a/envs/lenskit-py3.11-dev.yaml b/envs/lenskit-py3.11-dev.yaml index b1e994e25..ff5412016 100644 --- a/envs/lenskit-py3.11-dev.yaml +++ b/envs/lenskit-py3.11-dev.yaml @@ -12,10 +12,12 @@ channels: dependencies: - python=3.11 - binpickle>=0.3.2 + - c-compiler - cffi>=1.15.0 - copier==9.* - coverage>=5 - csr>=0.5 + - cython==3.* - docopt>=0.6 - hypothesis>=6 - ipython>=7 @@ -29,6 +31,7 @@ dependencies: - numpy>=1.23 - pandas<3,>=1.5 - pyproject2conda~=0.11 + - pytest-benchmark==4.* - pytest-cov>=2.12 - pytest-doctestplus==1.* - pytest==7.* @@ -44,7 +47,6 @@ dependencies: - sphinx_rtd_theme>=0.5 - sphinxcontrib-bibtex>=2.0 - sphinxext-opengraph>=0.5 - - tbb - threadpoolctl>=3.0 - tqdm>=4 - pip diff --git a/envs/lenskit-py3.11-test.yaml b/envs/lenskit-py3.11-test.yaml index c47f53d8d..4ee3e3b77 100644 --- a/envs/lenskit-py3.11-test.yaml +++ b/envs/lenskit-py3.11-test.yaml @@ -15,10 +15,12 @@ dependencies: - cffi>=1.15.0 - coverage>=5 - csr>=0.5 + - cython==3.* - hypothesis>=6 - numba<0.59,>=0.56 - numpy>=1.23 - pandas<3,>=1.5 + - pytest-benchmark==4.* - pytest-cov>=2.12 - pytest-doctestplus==1.* - pytest==7.* diff --git a/justfile b/justfile index b7e76b7de..b390357e5 100644 --- a/justfile +++ b/justfile @@ -11,6 +11,10 @@ clean: build: python -m build -n +# build the extension modules in-place for testing +build-inplace: + python setup.py build_ext --inplace + # install the package [confirm("this installs package from a wheel, continue [y/N]?")] install: @@ -29,15 +33,15 @@ setup-conda-env version="3.11" env="dev": conda env create -n lkpy -f envs/lenskit-py{{version}}-{{env}}.yaml # run tests with default configuration -test: +test: build-inplace python -m pytest # run fast tests -test-fast: +test-fast: build-inplace python -m pytest -m 'not slow' # run tests matching a keyword query -test-matching query: +test-matching query: build-inplace python -m pytest -k {{query}} # build documentation diff --git a/lenskit/util/csmatrix.pyi b/lenskit/util/csmatrix.pyi new file mode 100644 index 000000000..33c4b702e --- /dev/null +++ b/lenskit/util/csmatrix.pyi @@ -0,0 +1,24 @@ +import numpy as np +import numpy.typing as npt +from scipy.sparse import csr_array + +class CSMatrix: + nrows: int + ncols: int + nnz: int + + rowptr: npt.NDArray[np.int32] + colind: npt.NDArray[np.int32] + values: npt.NDArray[np.float64] + + def __init__( + self, + nr: int, + nc: int, + rps: npt.NDArray[np.int32], + cis: npt.NDArray[np.int32], + vs: npt.NDArray[np.float64], + ): ... + def row_ep(self, row: int) -> tuple[int, int]: ... + @staticmethod + def from_scipy(matrix: csr_array) -> CSMatrix: ... diff --git a/lenskit/util/csmatrix.pyx b/lenskit/util/csmatrix.pyx new file mode 100644 index 000000000..a0ac84ad4 --- /dev/null +++ b/lenskit/util/csmatrix.pyx @@ -0,0 +1,27 @@ +# cython: language_level=3str + +cdef class CSMatrix: + cdef readonly int nrows, ncols, nnz + cdef readonly int[:] rowptr + cdef readonly int[:] colind + cdef readonly double[:] values + + def __cinit__(self, int nr, int nc, int[:] rps, int[:] cis, double[:] vs): + self.nrows = nr + self.ncols = nc + self.rowptr = rps + self.colind = cis + self.values = vs + self.nnz = self.rowptr[nr] + + @staticmethod + def from_scipy(m): + nr, nc = m.shape + + return CSMatrix(nr, nc, m.indptr, m.indices, m.data) + + cpdef (int,int) row_ep(self, row): + if row < 0 or row >= self.nrows: + raise IndexError(f"invalid row {row} for {self.nrows}x{self.ncols} matrix") + + return self.rowptr[row], self.rowptr[row+1] diff --git a/lenskit/util/kvp.pyx b/lenskit/util/kvp.pyx new file mode 100644 index 000000000..44eb3a974 --- /dev/null +++ b/lenskit/util/kvp.pyx @@ -0,0 +1,109 @@ +# cython: language_level=3str, initializedcheck=False +cimport cython + +cdef class KVPHeap: + cdef readonly int sp, ep, lim + cdef int[::1] keys + cdef double[::1] vals + + def __cinit__(self, int sp, int ep, int lim, int[::1] keys, double[::1] vals): + if ep < sp: + raise ValueError("ep before sp") + if ep - sp > lim: + raise ValueError("array already exceeds limit") + if sp + lim > keys.shape[0]: + raise ValueError("key array too short") + if sp + lim > vals.shape[0]: + raise ValueError("value array too short") + + self.sp = sp + self.ep = ep + self.lim = lim + self.keys = keys + self.vals = vals + + cpdef int insert(self, int k, double v) except -1: + if self.ep - self.sp < self.lim: + # insert into heap without size problems + # put on end, then upheap + self.keys[self.ep] = k + self.vals[self.ep] = v + self._upheap() + self.ep = self.ep + 1 + return self.ep + + elif v > self.vals[self.sp]: + # heap is full, but new value is larger than old min + # stick it on the front, and downheap + self.keys[self.sp] = k + self.vals[self.sp] = v + self._downheap(self.lim) + return self.ep + + else: + # heap is full and new value doesn't belong + return self.ep + + + cpdef void sort(self): + cdef int i = self.ep - self.sp - 1 + while i > 0: + self._swap(i, 0) + self._downheap(i) + i -= 1 + + + cdef void _downheap(self, int limit) noexcept nogil: + cdef bint finished = False + cdef int pos = 0 + cdef int min, left, right + while not finished: + min = pos + left = 2 * pos + 1 + right = 2 * pos + 2 + if left < limit and self._val(left) < self._val(min): + min = left + if right < limit and self._val(right) < self._val(min): + min = right + if min != pos: + # we want to swap! + self._swap(pos, min) + pos = min + else: + finished = True + + + cdef void _upheap(self) noexcept nogil: + cdef int pos = self.ep - self.sp + cdef int parent = (pos - 1) // 2 + while pos > 0 and self._val(parent) > self._val(pos): + self._swap(parent, pos) + pos = parent + parent = (pos - 1) // 2 + + + cdef int _offset(self, int i) noexcept nogil: + return self.sp + i + + + cdef void _swap(self, int i1, int i2) noexcept nogil: + cdef int p1 = self._offset(i1) + cdef int p2 = self._offset(i2) + cdef int tk + cdef double tv + + tk = self.keys[p1] + self.keys[p1] = self.keys[p2] + self.keys[p2] = tk + + tv = self.vals[p1] + self.vals[p1] = self.vals[p2] + self.vals[p2] = tv + + + cdef int _key(self, int i) noexcept nogil: + return self.keys[self._offset(i)] + + + cdef double _val(self, int i) noexcept nogil: + return self.vals[self._offset(i)] diff --git a/pyproject.toml b/pyproject.toml index 58f39d37b..21bcc6000 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools>=64", "setuptools_scm>=8"] +requires = ["setuptools>=64", "setuptools_scm>=8", "cython==3.*"] build-backend = "setuptools.build_meta" [project] @@ -41,6 +41,7 @@ dev = [ "ruff >= 0.2", "copier ==9.*", "unbeheader ~= 1.3", # p2c: -p + "cython ==3.*", "ipython >= 7", "pyproject2conda ~=0.11", "sphinx-autobuild >= 2021", @@ -52,6 +53,8 @@ test = [ "pytest-doctestplus ==1.*", "pytest-cov >= 2.12", "coverage >= 5", + "pytest-benchmark >=4.0.0", + "cython ==3.*", "hypothesis >= 6", ] doc = [ @@ -89,12 +92,19 @@ exclude = [ "tasks.py", ] -[tool.setuptools] -packages = ["lenskit"] +[tool.setuptools.packages.find] +include = ["lenskit*"] [tool.setuptools_scm] version_scheme = "release-branch-semver" +[tool.cibuildwheel] +build = "cp310* cp311*" +skip = "pp*" + +[tool.cibuildwheel.macos] +archs = "x86_64 arm64" + # settings for generating conda environments for dev & CI, when needed [tool.pyproject2conda] channels = ["conda-forge"] @@ -106,11 +116,11 @@ deps = ["tbb", "just"] [tool.pyproject2conda.envs.dev] extras = ["dev", "test", "doc", "demo", "sklearn"] -deps = ["just==1.*", "tbb"] +deps = ["just==1.*", "c-compiler", "tbb"] [tool.pyproject2conda.envs.ci] extras = ["test", "sklearn", "dev"] -deps = ["just==1.*", "tbb"] +deps = ["just==1.*", "c-compiler", "tbb"] [tool.ruff] line-length = 100 @@ -148,3 +158,6 @@ exclude = [ ] reportMissingImports = true reportMissingTypeStubs = false + +[tool.coverage.run] +plugins = ["Cython.Coverage"] diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..13901024d --- /dev/null +++ b/setup.py @@ -0,0 +1,43 @@ +# This file is part of LensKit. +# Copyright (C) 2018-2023 Boise State University +# Copyright (C) 2023-2024 Drexel University +# Licensed under the MIT license, see LICENSE.md for details. +# SPDX-License-Identifier: MIT + +import os + +from setuptools import Extension, setup + +try: + from Cython.Build import cythonize + + USE_CYTHON = True +except ImportError: + USE_CYTHON = False + + +COVERAGE = os.environ.get("BUILD_FOR_COVER", None) +EXT_SPECS = {"lenskit.util.kvp": None, "lenskit.util.csmatrix": None} + +CYTHON_OPTIONS = {} +C_DEFINES = [] +if COVERAGE: + print("enabling tracing") + CYTHON_OPTIONS["linetrace"] = True + C_DEFINES.append(("CYTHON_TRACE_NOGIL", "1")) + + +def _make_extension(name: str, opts: None) -> Extension: + path = name.replace(".", "/") + if USE_CYTHON: + path += ".pyx" + else: + path += ".c" + return Extension(name, [path], define_macros=C_DEFINES) + + +EXTENSIONS = [_make_extension(ext, opts) for (ext, opts) in EXT_SPECS.items()] +if USE_CYTHON: + EXTENSIONS = cythonize(EXTENSIONS, compiler_directives=CYTHON_OPTIONS) +print(EXTENSIONS[0].__dict__) +setup(ext_modules=EXTENSIONS) diff --git a/tests/test_csmatrix.py b/tests/test_csmatrix.py new file mode 100644 index 000000000..3de548382 --- /dev/null +++ b/tests/test_csmatrix.py @@ -0,0 +1,62 @@ +import numpy as np +import scipy.sparse as sps +from numba import njit + +import hypothesis.extra.numpy as nph +import hypothesis.strategies as st +from hypothesis import assume, given, settings +from pytest import mark + +from lenskit.util.csmatrix import CSMatrix + + +@st.composite +def sparse_matrices(draw, max_shape=(50, 50), density=st.floats(0, 1), format="csr"): + ubr, ubc = max_shape + + rows = draw(st.integers(1, ubr)) + cols = draw(st.integers(1, ubc)) + dens = draw(density) + prod = rows * cols + nnz = int(prod * dens) + + points = draw(nph.arrays("int32", nnz, elements=st.integers(0, prod - 1), unique=True)) + values = draw(nph.arrays("float64", nnz)) + rvs = points % rows + cvs = points // rows + assert np.all(rvs < rows) + assert np.all(cvs < cols) + + return sps.csr_array((values, (rvs, cvs)), shape=(rows, cols)) + + +@given(sparse_matrices()) +def test_init_matrix(m: sps.csr_array): + print(m.shape, m.nnz, m.indptr.dtype, m.indices.dtype) + nr, nc = m.shape + + m2 = CSMatrix(nr, nc, m.indptr, m.indices, m.data) + + assert m2.nrows == nr + assert m2.ncols == nc + assert m2.nnz == m.nnz + + +@given(sparse_matrices()) +def test_from_scipy(m: sps.csr_array): + print(m.shape, m.nnz, m.indptr.dtype, m.indices.dtype) + m2 = CSMatrix.from_scipy(m) + + assert m2.nrows == m.shape[0] + assert m2.ncols == m.shape[1] + assert m2.nnz == m.nnz + + +@given(sparse_matrices()) +def test_csm_row_ep(m: sps.csr_array): + m2 = CSMatrix.from_scipy(m) + + for i in range(m2.nrows): + sp, ep = m2.row_ep(i) + assert sp == m2.rowptr[i] + assert ep == m2.rowptr[i + 1] diff --git a/tests/test_util_accum.py b/tests/test_util_accum.py index 8e6cd4f35..055aef33e 100644 --- a/tests/test_util_accum.py +++ b/tests/test_util_accum.py @@ -5,13 +5,14 @@ # SPDX-License-Identifier: MIT import numpy as np +from numba import njit -from lenskit.util.accum import kvp_minheap_insert, kvp_minheap_sort - - -from hypothesis import given, assume, settings -import hypothesis.strategies as st import hypothesis.extra.numpy as nph +import hypothesis.strategies as st +from hypothesis import assume, given, settings +from pytest import mark + +from lenskit.util.accum import kvp_minheap_insert, kvp_minheap_sort def test_kvp_add_to_empty(): @@ -185,3 +186,25 @@ def test_kvp_sort(values): assert vs[-1] == np.min(ovs) assert all(ks == oks[ord]) assert all(vs == ovs[ord]) + + +@mark.benchmark(group="KVPSort") +def test_kvp_sort_numba(rng, benchmark): + N = 10000 + K = 500 + in_keys = np.arange(N) + in_vals = rng.uniform(size=N) + + def op(): + ks = np.zeros(K, np.int32) + vs = np.zeros(K, np.float64) + ep = 0 + for i in range(N): + ep = kvp_minheap_insert(0, ep, K, in_keys[i], in_vals[i], ks, vs) + + kvp_minheap_sort(0, ep, ks, vs) + + # dry run to compile + op() + + benchmark(op) diff --git a/tests/test_util_kvp.py b/tests/test_util_kvp.py new file mode 100644 index 000000000..4bc8d644d --- /dev/null +++ b/tests/test_util_kvp.py @@ -0,0 +1,216 @@ +# This file is part of LensKit. +# Copyright (C) 2018-2023 Boise State University +# Copyright (C) 2023-2024 Drexel University +# Licensed under the MIT license, see LICENSE.md for details. +# SPDX-License-Identifier: MIT + +import numpy as np + +import hypothesis.extra.numpy as nph +import hypothesis.strategies as st +from hypothesis import assume, given, settings +from pytest import mark + +from lenskit.util.kvp import KVPHeap + + +def test_kvp_add_to_empty(): + ks = np.empty(10, dtype=np.int32) + vs = np.empty(10) + + # insert an item + kvp = KVPHeap(0, 0, 10, ks, vs) + n = kvp.insert(5, 3.0) + + # ep has moved + assert n == 1 + assert kvp.ep == 1 + + # item is there + assert ks[0] == 5 + assert vs[0] == 3.0 + + +def test_kvp_add_larger(): + ks = np.empty(10, dtype=np.int32) + vs = np.empty(10) + + # insert an item + kvp = KVPHeap(0, 0, 10, ks, vs) + n = kvp.insert(5, 3.0) + n = kvp.insert(1, 6.0) + + # ep has moved + assert n == 2 + assert kvp.ep == 2 + + # data is there + assert all(ks[:2] == [5, 1]) + assert all(vs[:2] == [3.0, 6.0]) + + +def test_kvp_add_smaller(): + ks = np.empty(10, dtype=np.int32) + vs = np.empty(10) + + # insert an item + kvp = KVPHeap(0, 0, 10, ks, vs) + n = kvp.insert(5, 3.0) + n = kvp.insert(1, 1.0) + + # ep has moved + assert n == 2 + + # data is there + assert all(ks[:2] == [1, 5]) + assert all(vs[:2] == [1.0, 3.0]) + + +@given(st.integers(10, 100), st.data()) +def test_kvp_add_several(kvp_len, data): + "Test filling up a KVP." + ks = np.full(kvp_len, -1, dtype=np.int32) + vs = np.zeros(kvp_len) + + n = 0 + + values = np.random.randn(kvp_len) * 100 + + kvp = KVPHeap(0, 0, kvp_len, ks, vs) + for k, v in enumerate(values): + n = kvp.insert(k, v) + + assert n == kvp_len + # all key slots are used + assert all(ks >= 0) + # all keys are there + assert all(np.sort(ks) == list(range(kvp_len))) + # value is the smallest + assert vs[0] == np.min(vs) + + # it rejects a smaller value; -10000 is below our min value + special_k = 500 + n2 = kvp.insert(special_k, -10000) + + assert n2 == n + assert all(ks != special_k) + assert all(vs > -5000.0) + + # it inserts a larger value somewhere + old_mk = ks[0] + old_mv = vs[0] + assume(np.median(vs) < 50) + nv = data.draw(st.floats(np.median(vs), 100)) + n2 = kvp.insert(special_k, nv) + + assert n2 == n + # the old value minimum key has been removed + assert all(ks != old_mk) + # the old minimum value has been removed + assert all(vs > old_mv) + assert np.count_nonzero(ks == special_k) == 1 + + +@given(st.data()) +def test_kvp_add_middle(data): + "Test that KVP works in the middle of an array." + ks = np.full(100, -1, dtype=np.int32) + vs = np.full(100, np.nan) + + n = 25 + avs = [] + + values = st.floats(-100, 100) + kvp = KVPHeap(25, 25, 10, ks, vs) + for k in range(25): + v = data.draw(values) + avs.append(v) + n = kvp.insert(k, v) + + assert n == 35 + # all the keys + assert all(ks[25:35] >= 0) + # value is the smallest + assert vs[25] == np.min(vs[25:35]) + # highest-ranked keys + assert all(np.sort(vs[25:35]) == np.sort(avs)[15:]) + + # early is untouched + assert all(ks[:25] == -1) + assert all(np.isnan(vs[:25])) + assert all(ks[35:] == -1) + assert all(np.isnan(vs[35:])) + + +def test_kvp_insert_min(): + ks = np.full(10, -1, dtype=np.int32) + vs = np.zeros(10) + + n = 0 + + # something less than existing data + kvp = KVPHeap(0, 0, 10, ks, vs) + n = kvp.insert(5, -3) + assert n == 1 + assert ks[0] == 5 + assert vs[0] == -3.0 + + # equal to existing data + kvp = KVPHeap(0, 0, 10, ks, vs) + n = kvp.insert(7, -3.0) + assert n == 1 + assert ks[0] == 7 + assert vs[0] == -3.0 + + # greater than to existing data + kvp = KVPHeap(0, 0, 10, ks, vs) + n = kvp.insert(9, 5.0) + assert n == 1 + assert ks[0] == 9 + assert vs[0] == 5.0 + + +@settings(deadline=None) +@given(nph.arrays(np.float64, 20, elements=st.floats(-100, 100), unique=True)) +def test_kvp_sort(values): + "Test that sorting logic works" + ks = np.full(10, -1, dtype=np.int32) + vs = np.zeros(10) + + n = 0 + + kvp = KVPHeap(0, 0, 10, ks, vs) + for k in range(20): + v = values[k] + n = kvp.insert(k, v) + + assert n == 10 + + ovs = vs.copy() + oks = ks.copy() + ord = np.argsort(ovs) + ord = ord[::-1] + + kvp.sort() + assert vs[0] == np.max(ovs) + assert vs[-1] == np.min(ovs) + assert all(ks == oks[ord]) + assert all(vs == ovs[ord]) + + +@mark.benchmark(group="KVPSort") +def test_kvp_sort_cython(rng, benchmark): + N = 10000 + K = 500 + in_keys = np.arange(N) + in_vals = rng.uniform(size=N) + + def op(): + ks = np.zeros(K, np.int32) + vs = np.zeros(K, np.float64) + kvp = KVPHeap(0, 0, K, ks, vs) + for i in range(N): + kvp.insert(in_keys[i], in_vals[i]) + kvp.sort() + + benchmark(op)