From bf39f6e587708b4109581b701ec9fe42a267d8e4 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sun, 3 Mar 2024 18:02:49 -0500 Subject: [PATCH 01/19] Initial Cython build infra & Cython-based KVP --- lenskit/util/kvp.pyx | 109 +++++++++++++++++++++++ setup.py | 13 +++ tests/test_util_kvp.py | 197 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 319 insertions(+) create mode 100644 lenskit/util/kvp.pyx create mode 100644 setup.py create mode 100644 tests/test_util_kvp.py diff --git a/lenskit/util/kvp.pyx b/lenskit/util/kvp.pyx new file mode 100644 index 000000000..44eb3a974 --- /dev/null +++ b/lenskit/util/kvp.pyx @@ -0,0 +1,109 @@ +# cython: language_level=3str, initializedcheck=False +cimport cython + +cdef class KVPHeap: + cdef readonly int sp, ep, lim + cdef int[::1] keys + cdef double[::1] vals + + def __cinit__(self, int sp, int ep, int lim, int[::1] keys, double[::1] vals): + if ep < sp: + raise ValueError("ep before sp") + if ep - sp > lim: + raise ValueError("array already exceeds limit") + if sp + lim > keys.shape[0]: + raise ValueError("key array too short") + if sp + lim > vals.shape[0]: + raise ValueError("value array too short") + + self.sp = sp + self.ep = ep + self.lim = lim + self.keys = keys + self.vals = vals + + cpdef int insert(self, int k, double v) except -1: + if self.ep - self.sp < self.lim: + # insert into heap without size problems + # put on end, then upheap + self.keys[self.ep] = k + self.vals[self.ep] = v + self._upheap() + self.ep = self.ep + 1 + return self.ep + + elif v > self.vals[self.sp]: + # heap is full, but new value is larger than old min + # stick it on the front, and downheap + self.keys[self.sp] = k + self.vals[self.sp] = v + self._downheap(self.lim) + return self.ep + + else: + # heap is full and new value doesn't belong + return self.ep + + + cpdef void sort(self): + cdef int i = self.ep - self.sp - 1 + while i > 0: + self._swap(i, 0) + self._downheap(i) + i -= 1 + + + cdef void _downheap(self, int limit) noexcept nogil: + cdef bint finished = False + cdef int pos = 0 + cdef int min, left, right + while not finished: + min = pos + left = 2 * pos + 1 + right = 2 * pos + 2 + if left < limit and self._val(left) < self._val(min): + min = left + if right < limit and self._val(right) < self._val(min): + min = right + if min != pos: + # we want to swap! + self._swap(pos, min) + pos = min + else: + finished = True + + + cdef void _upheap(self) noexcept nogil: + cdef int pos = self.ep - self.sp + cdef int parent = (pos - 1) // 2 + while pos > 0 and self._val(parent) > self._val(pos): + self._swap(parent, pos) + pos = parent + parent = (pos - 1) // 2 + + + cdef int _offset(self, int i) noexcept nogil: + return self.sp + i + + + cdef void _swap(self, int i1, int i2) noexcept nogil: + cdef int p1 = self._offset(i1) + cdef int p2 = self._offset(i2) + cdef int tk + cdef double tv + + tk = self.keys[p1] + self.keys[p1] = self.keys[p2] + self.keys[p2] = tk + + tv = self.vals[p1] + self.vals[p1] = self.vals[p2] + self.vals[p2] = tv + + + cdef int _key(self, int i) noexcept nogil: + return self.keys[self._offset(i)] + + + cdef double _val(self, int i) noexcept nogil: + return self.vals[self._offset(i)] diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..4723f91ec --- /dev/null +++ b/setup.py @@ -0,0 +1,13 @@ +from Cython.Build import cythonize +from setuptools import Extension, setup + +EXT_SPECS = {"lenskit.util.kvp": None} + + +def _make_extension(name: str, opts: None) -> Extension: + path = name.replace(".", "/") + ".pyx" + return Extension(name, [path]) + + +EXTENSIONS = [_make_extension(ext, opts) for (ext, opts) in EXT_SPECS.items()] +setup(ext_modules=cythonize(EXTENSIONS)) diff --git a/tests/test_util_kvp.py b/tests/test_util_kvp.py new file mode 100644 index 000000000..ee28fe04d --- /dev/null +++ b/tests/test_util_kvp.py @@ -0,0 +1,197 @@ +# This file is part of LensKit. +# Copyright (C) 2018-2023 Boise State University +# Copyright (C) 2023-2024 Drexel University +# Licensed under the MIT license, see LICENSE.md for details. +# SPDX-License-Identifier: MIT + +import numpy as np + +import hypothesis.extra.numpy as nph +import hypothesis.strategies as st +from hypothesis import assume, given, settings + +from lenskit.util.kvp import KVPHeap + + +def test_kvp_add_to_empty(): + ks = np.empty(10, dtype=np.int32) + vs = np.empty(10) + + # insert an item + kvp = KVPHeap(0, 0, 10, ks, vs) + n = kvp.insert(5, 3.0) + + # ep has moved + assert n == 1 + assert kvp.ep == 1 + + # item is there + assert ks[0] == 5 + assert vs[0] == 3.0 + + +def test_kvp_add_larger(): + ks = np.empty(10, dtype=np.int32) + vs = np.empty(10) + + # insert an item + kvp = KVPHeap(0, 0, 10, ks, vs) + n = kvp.insert(5, 3.0) + n = kvp.insert(1, 6.0) + + # ep has moved + assert n == 2 + assert kvp.ep == 2 + + # data is there + assert all(ks[:2] == [5, 1]) + assert all(vs[:2] == [3.0, 6.0]) + + +def test_kvp_add_smaller(): + ks = np.empty(10, dtype=np.int32) + vs = np.empty(10) + + # insert an item + kvp = KVPHeap(0, 0, 10, ks, vs) + n = kvp.insert(5, 3.0) + n = kvp.insert(1, 1.0) + + # ep has moved + assert n == 2 + + # data is there + assert all(ks[:2] == [1, 5]) + assert all(vs[:2] == [1.0, 3.0]) + + +@given(st.integers(10, 100), st.data()) +def test_kvp_add_several(kvp_len, data): + "Test filling up a KVP." + ks = np.full(kvp_len, -1, dtype=np.int32) + vs = np.zeros(kvp_len) + + n = 0 + + values = np.random.randn(kvp_len) * 100 + + kvp = KVPHeap(0, 0, kvp_len, ks, vs) + for k, v in enumerate(values): + n = kvp.insert(k, v) + + assert n == kvp_len + # all key slots are used + assert all(ks >= 0) + # all keys are there + assert all(np.sort(ks) == list(range(kvp_len))) + # value is the smallest + assert vs[0] == np.min(vs) + + # it rejects a smaller value; -10000 is below our min value + special_k = 500 + n2 = kvp.insert(special_k, -10000) + + assert n2 == n + assert all(ks != special_k) + assert all(vs > -5000.0) + + # it inserts a larger value somewhere + old_mk = ks[0] + old_mv = vs[0] + assume(np.median(vs) < 50) + nv = data.draw(st.floats(np.median(vs), 100)) + n2 = kvp.insert(special_k, nv) + + assert n2 == n + # the old value minimum key has been removed + assert all(ks != old_mk) + # the old minimum value has been removed + assert all(vs > old_mv) + assert np.count_nonzero(ks == special_k) == 1 + + +@given(st.data()) +def test_kvp_add_middle(data): + "Test that KVP works in the middle of an array." + ks = np.full(100, -1, dtype=np.int32) + vs = np.full(100, np.nan) + + n = 25 + avs = [] + + values = st.floats(-100, 100) + kvp = KVPHeap(25, 25, 10, ks, vs) + for k in range(25): + v = data.draw(values) + avs.append(v) + n = kvp.insert(k, v) + + assert n == 35 + # all the keys + assert all(ks[25:35] >= 0) + # value is the smallest + assert vs[25] == np.min(vs[25:35]) + # highest-ranked keys + assert all(np.sort(vs[25:35]) == np.sort(avs)[15:]) + + # early is untouched + assert all(ks[:25] == -1) + assert all(np.isnan(vs[:25])) + assert all(ks[35:] == -1) + assert all(np.isnan(vs[35:])) + + +def test_kvp_insert_min(): + ks = np.full(10, -1, dtype=np.int32) + vs = np.zeros(10) + + n = 0 + + # something less than existing data + kvp = KVPHeap(0, 0, 10, ks, vs) + n = kvp.insert(5, -3) + assert n == 1 + assert ks[0] == 5 + assert vs[0] == -3.0 + + # equal to existing data + kvp = KVPHeap(0, 0, 10, ks, vs) + n = kvp.insert(7, -3.0) + assert n == 1 + assert ks[0] == 7 + assert vs[0] == -3.0 + + # greater than to existing data + kvp = KVPHeap(0, 0, 10, ks, vs) + n = kvp.insert(9, 5.0) + assert n == 1 + assert ks[0] == 9 + assert vs[0] == 5.0 + + +@settings(deadline=None) +@given(nph.arrays(np.float64, 20, elements=st.floats(-100, 100), unique=True)) +def test_kvp_sort(values): + "Test that sorting logic works" + ks = np.full(10, -1, dtype=np.int32) + vs = np.zeros(10) + + n = 0 + + kvp = KVPHeap(0, 0, 10, ks, vs) + for k in range(20): + v = values[k] + n = kvp.insert(k, v) + + assert n == 10 + + ovs = vs.copy() + oks = ks.copy() + ord = np.argsort(ovs) + ord = ord[::-1] + + kvp.sort() + assert vs[0] == np.max(ovs) + assert vs[-1] == np.min(ovs) + assert all(ks == oks[ord]) + assert all(vs == ovs[ord]) From 11811fd7442c0aba73f19fff5918bb2fa75dbd91 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sun, 3 Mar 2024 18:40:55 -0500 Subject: [PATCH 02/19] fix setup.py build problems --- pyproject.toml | 4 ++-- setup.py | 18 +++++++++++++++--- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 58f39d37b..6d6a3d1e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -89,8 +89,8 @@ exclude = [ "tasks.py", ] -[tool.setuptools] -packages = ["lenskit"] +[tool.setuptools.packages.find] +include = ["lenskit*"] [tool.setuptools_scm] version_scheme = "release-branch-semver" diff --git a/setup.py b/setup.py index 4723f91ec..c62f019b3 100644 --- a/setup.py +++ b/setup.py @@ -1,13 +1,25 @@ -from Cython.Build import cythonize from setuptools import Extension, setup +try: + from Cython.Build import cythonize + + USE_CYTHON = True +except ImportError: + USE_CYTHON = False + EXT_SPECS = {"lenskit.util.kvp": None} def _make_extension(name: str, opts: None) -> Extension: - path = name.replace(".", "/") + ".pyx" + path = name.replace(".", "/") + if USE_CYTHON: + path += ".pyx" + else: + path += ".c" return Extension(name, [path]) EXTENSIONS = [_make_extension(ext, opts) for (ext, opts) in EXT_SPECS.items()] -setup(ext_modules=cythonize(EXTENSIONS)) +if USE_CYTHON: + EXTENSIONS = cythonize(EXTENSIONS) +setup(ext_modules=EXTENSIONS) From 242f9c8de2da7f456676b016fb009e946ea7bfef Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Wed, 6 Mar 2024 10:10:49 -0500 Subject: [PATCH 03/19] add cython build dependency --- envs/lenskit-py3.10-ci.yaml | 2 ++ envs/lenskit-py3.10-dev.yaml | 3 ++- envs/lenskit-py3.11-ci.yaml | 2 ++ envs/lenskit-py3.11-dev.yaml | 3 ++- pyproject.toml | 1 + 5 files changed, 9 insertions(+), 2 deletions(-) diff --git a/envs/lenskit-py3.10-ci.yaml b/envs/lenskit-py3.10-ci.yaml index 4295864c0..2f13afe72 100644 --- a/envs/lenskit-py3.10-ci.yaml +++ b/envs/lenskit-py3.10-ci.yaml @@ -12,10 +12,12 @@ channels: dependencies: - python=3.10 - binpickle>=0.3.2 + - c-compiler - cffi>=1.15.0 - copier==9.* - coverage>=5 - csr>=0.5 + - cython==3.* - docopt>=0.6 - hypothesis>=6 - ipython>=7 diff --git a/envs/lenskit-py3.10-dev.yaml b/envs/lenskit-py3.10-dev.yaml index c7fa0553a..069e4feae 100644 --- a/envs/lenskit-py3.10-dev.yaml +++ b/envs/lenskit-py3.10-dev.yaml @@ -12,10 +12,12 @@ channels: dependencies: - python=3.10 - binpickle>=0.3.2 + - c-compiler - cffi>=1.15.0 - copier==9.* - coverage>=5 - csr>=0.5 + - cython==3.* - docopt>=0.6 - hypothesis>=6 - ipython>=7 @@ -44,7 +46,6 @@ dependencies: - sphinx_rtd_theme>=0.5 - sphinxcontrib-bibtex>=2.0 - sphinxext-opengraph>=0.5 - - tbb - threadpoolctl>=3.0 - tqdm>=4 - pip diff --git a/envs/lenskit-py3.11-ci.yaml b/envs/lenskit-py3.11-ci.yaml index a76eea719..1998dba79 100644 --- a/envs/lenskit-py3.11-ci.yaml +++ b/envs/lenskit-py3.11-ci.yaml @@ -12,10 +12,12 @@ channels: dependencies: - python=3.11 - binpickle>=0.3.2 + - c-compiler - cffi>=1.15.0 - copier==9.* - coverage>=5 - csr>=0.5 + - cython==3.* - docopt>=0.6 - hypothesis>=6 - ipython>=7 diff --git a/envs/lenskit-py3.11-dev.yaml b/envs/lenskit-py3.11-dev.yaml index b1e994e25..53aea92eb 100644 --- a/envs/lenskit-py3.11-dev.yaml +++ b/envs/lenskit-py3.11-dev.yaml @@ -12,10 +12,12 @@ channels: dependencies: - python=3.11 - binpickle>=0.3.2 + - c-compiler - cffi>=1.15.0 - copier==9.* - coverage>=5 - csr>=0.5 + - cython==3.* - docopt>=0.6 - hypothesis>=6 - ipython>=7 @@ -44,7 +46,6 @@ dependencies: - sphinx_rtd_theme>=0.5 - sphinxcontrib-bibtex>=2.0 - sphinxext-opengraph>=0.5 - - tbb - threadpoolctl>=3.0 - tqdm>=4 - pip diff --git a/pyproject.toml b/pyproject.toml index 6d6a3d1e3..84809090f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ dev = [ "ruff >= 0.2", "copier ==9.*", "unbeheader ~= 1.3", # p2c: -p + "cython ==3.*", "ipython >= 7", "pyproject2conda ~=0.11", "sphinx-autobuild >= 2021", From 233853b512e7b30a0cf6a91b0efacff350c95b73 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Wed, 6 Mar 2024 09:32:37 -0500 Subject: [PATCH 04/19] manually install cython --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d440c160f..8c380509f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -166,7 +166,7 @@ jobs: - name: Install build tools run: | - "$PYTHON" -m pip install -U 'uv>=0.1.15' + "$PYTHON" -m pip install -U 'uv>=0.1.15' cython env: PYTHON: ${{ steps.pyinstall.outputs.python-path }} From 3974f9e2a9dd73080c3e78035cf6b2774502b774 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Tue, 16 Apr 2024 19:31:48 -0400 Subject: [PATCH 05/19] Require Cython for building from source --- .github/workflows/test.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8c380509f..d440c160f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -166,7 +166,7 @@ jobs: - name: Install build tools run: | - "$PYTHON" -m pip install -U 'uv>=0.1.15' cython + "$PYTHON" -m pip install -U 'uv>=0.1.15' env: PYTHON: ${{ steps.pyinstall.outputs.python-path }} diff --git a/pyproject.toml b/pyproject.toml index 84809090f..0f3112e28 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["setuptools>=64", "setuptools_scm>=8"] +requires = ["setuptools>=64", "setuptools_scm>=8", "cython==3.*"] build-backend = "setuptools.build_meta" [project] From 2d227ac163952b0952992d69caefbe5667a742a9 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Wed, 6 Mar 2024 09:46:15 -0500 Subject: [PATCH 06/19] add cython build to justfile --- justfile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/justfile b/justfile index b7e76b7de..b390357e5 100644 --- a/justfile +++ b/justfile @@ -11,6 +11,10 @@ clean: build: python -m build -n +# build the extension modules in-place for testing +build-inplace: + python setup.py build_ext --inplace + # install the package [confirm("this installs package from a wheel, continue [y/N]?")] install: @@ -29,15 +33,15 @@ setup-conda-env version="3.11" env="dev": conda env create -n lkpy -f envs/lenskit-py{{version}}-{{env}}.yaml # run tests with default configuration -test: +test: build-inplace python -m pytest # run fast tests -test-fast: +test-fast: build-inplace python -m pytest -m 'not slow' # run tests matching a keyword query -test-matching query: +test-matching query: build-inplace python -m pytest -k {{query}} # build documentation From da959d8d3860e699d2a32cbabe94da999d6620d2 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sun, 3 Mar 2024 18:52:56 -0500 Subject: [PATCH 07/19] compile extension modules in conda build --- .github/workflows/test.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d440c160f..4f971c22f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -49,6 +49,10 @@ jobs: python -V numba -s + - name: Compile extension modules + run: | + just build-inplace + - name: Test LKPY run: | python -m pytest --cov=lenskit --verbose --log-file=test.log @@ -108,6 +112,10 @@ jobs: !data/*.zip key: test-mldata-000 + - name: Compile extension modules + run: | + just build-inplace + - name: Download ML data run: | python -m lenskit.datasets.fetch ml-100k From 27d2795063c70b1527525ea134b3a1aacb20a731 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sun, 3 Mar 2024 19:00:23 -0500 Subject: [PATCH 08/19] use full dev deps for demo build --- .github/workflows/test.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4f971c22f..818a091cc 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -84,15 +84,11 @@ jobs: with: fetch-depth: 0 - - name: Create Conda environment file - run: | - pipx run pyproject2conda yaml -p 3.10 -e test -e demo -e dev -o environment.yml - - name: 👢 Set up Conda environment uses: mamba-org/setup-micromamba@v1 id: setup with: - environment-file: environment.yml + environment-file: envs/lenskit-py3.11-dev.yaml environment-name: lkpy cache-environment: true init-shell: bash From 0c4f5ce1cbd43625e5e65c16cd53b7d77b182e68 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sun, 3 Mar 2024 19:09:21 -0500 Subject: [PATCH 09/19] update package.yml to build wheels --- .github/workflows/package.yml | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml index 06a70841a..0304e19a3 100644 --- a/.github/workflows/package.yml +++ b/.github/workflows/package.yml @@ -8,8 +8,8 @@ on: types: [published] jobs: - dist: - name: Build Packages + build_sdist: + name: Build Source Package runs-on: ubuntu-latest steps: @@ -28,22 +28,42 @@ jobs: - name: Install Python deps run: pip install -U build - - name: Build distribution - run: python -m build + - name: Build source distribution + run: python -m build -s - name: Save archive uses: actions/upload-artifact@v4 with: - name: pypi-pkgs + name: dist-src path: dist - name: List dist dir run: ls -R dist + build_wheels: + name: Build wheels on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-20.04, windows-2019, macos-11] + + steps: + - uses: actions/checkout@v4 + + - name: Build wheels + uses: pypa/cibuildwheel@v2.16.5 + with: + output-dir: dist + + - uses: actions/upload-artifact@v4 + with: + name: dist-wheels-${{ matrix.os }} + path: ./dist/*.whl + pypi-publish: name: Publish to PyPI runs-on: ubuntu-latest - needs: [dist] + needs: [build_sdist, build_wheels] if: github.event_name == 'release' environment: release @@ -54,8 +74,9 @@ jobs: - name: Fetch compiled package distributions uses: actions/download-artifact@v4 with: - name: pypi-pkgs + pattern: dist-* path: dist + merge-multiple: true - name: Publish package distributions to PyPI uses: pypa/gh-action-pypi-publish@release/v1 From 8d31d149058fc8d1e1837af627993be3995ae835 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sun, 3 Mar 2024 19:11:57 -0500 Subject: [PATCH 10/19] run packaging workflow on pull requests --- .github/workflows/package.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml index 0304e19a3..9a31d895c 100644 --- a/.github/workflows/package.yml +++ b/.github/workflows/package.yml @@ -6,6 +6,7 @@ on: - main release: types: [published] + pull_request: jobs: build_sdist: From 3b3da7eaa00382d186ecdd66f76b9f070ea890de Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sun, 3 Mar 2024 19:22:42 -0500 Subject: [PATCH 11/19] archs & cibuildwheel fixes --- .github/workflows/package.yml | 2 ++ pyproject.toml | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml index 9a31d895c..986b41142 100644 --- a/.github/workflows/package.yml +++ b/.github/workflows/package.yml @@ -50,6 +50,8 @@ jobs: steps: - uses: actions/checkout@v4 + - name: Fetch Git tags + run: git fetch --tags - name: Build wheels uses: pypa/cibuildwheel@v2.16.5 diff --git a/pyproject.toml b/pyproject.toml index 0f3112e28..942f25caa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,6 +96,13 @@ include = ["lenskit*"] [tool.setuptools_scm] version_scheme = "release-branch-semver" +[tool.cibuildwheel] +build = "cp310* cp311*" +skip = "pp*" + +[tool.cibuildwheel.macos] +archs = "all" + # settings for generating conda environments for dev & CI, when needed [tool.pyproject2conda] channels = ["conda-forge"] From 75a1b68283486dee2f0cfd408d393acbbb5f64e9 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Wed, 6 Mar 2024 10:11:14 -0500 Subject: [PATCH 12/19] rerender dev with just spec --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 942f25caa..58dce1d5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -114,11 +114,11 @@ deps = ["tbb", "just"] [tool.pyproject2conda.envs.dev] extras = ["dev", "test", "doc", "demo", "sklearn"] -deps = ["just==1.*", "tbb"] +deps = ["just==1.*", "c-compiler", "tbb"] [tool.pyproject2conda.envs.ci] extras = ["test", "sklearn", "dev"] -deps = ["just==1.*", "tbb"] +deps = ["just==1.*", "c-compiler", "tbb"] [tool.ruff] line-length = 100 From 492c181d09c28d45b1961c2ab3ac1e3f7588cd10 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Sun, 3 Mar 2024 19:30:12 -0500 Subject: [PATCH 13/19] don't build universal wheels --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 58dce1d5d..4f805cba8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,7 +101,7 @@ build = "cp310* cp311*" skip = "pp*" [tool.cibuildwheel.macos] -archs = "all" +archs = "x86_64 arm64" # settings for generating conda environments for dev & CI, when needed [tool.pyproject2conda] From d387fc66c0f95604cb38f4ad09aecf9c41cd6e4a Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Mon, 4 Mar 2024 08:43:05 -0500 Subject: [PATCH 14/19] Build binary wheels from the sdist. --- .github/workflows/package.yml | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/.github/workflows/package.yml b/.github/workflows/package.yml index 986b41142..eaabe8aa1 100644 --- a/.github/workflows/package.yml +++ b/.github/workflows/package.yml @@ -12,6 +12,8 @@ jobs: build_sdist: name: Build Source Package runs-on: ubuntu-latest + outputs: + filename: ${{steps.build.outputs.filename}} steps: - uses: actions/checkout@v4 @@ -30,7 +32,11 @@ jobs: run: pip install -U build - name: Build source distribution - run: python -m build -s + id: build + run: | + python -m build -s + basename dist/*.tar.gz |sed -e 's/^/filename=/' >> "$GITHUB_OUTPUT" + cat "$GITHUB_OUTPUT" - name: Save archive uses: actions/upload-artifact@v4 @@ -44,18 +50,22 @@ jobs: build_wheels: name: Build wheels on ${{ matrix.os }} runs-on: ${{ matrix.os }} + needs: [build_sdist] strategy: matrix: os: [ubuntu-20.04, windows-2019, macos-11] steps: - - uses: actions/checkout@v4 - - name: Fetch Git tags - run: git fetch --tags + - name: Fetch compiled package distributions + uses: actions/download-artifact@v4 + with: + name: dist-src + path: dist - name: Build wheels uses: pypa/cibuildwheel@v2.16.5 with: + package-dir: dist/${{needs.build_sdist.outputs.filename}} output-dir: dist - uses: actions/upload-artifact@v4 From 868b9eceb8243bfb445ea9ec655325a1573949e4 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Mon, 4 Mar 2024 09:36:23 -0500 Subject: [PATCH 15/19] Support coverage of Cython code --- .coveragerc | 0 .github/workflows/test.yml | 35 +++++++++++++++++++++++++++++++++++ envs/lenskit-py3.10-test.yaml | 1 + envs/lenskit-py3.11-test.yaml | 1 + pyproject.toml | 4 ++++ setup.py | 16 ++++++++++++++-- 6 files changed, 55 insertions(+), 2 deletions(-) delete mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index e69de29bb..000000000 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 818a091cc..b2c42f642 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -141,6 +141,40 @@ jobs: with: artifact-name: test-check-docs + cython-cover: + name: Measure Cython Coverage + timeout-minutes: 30 + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: 👢 Set up Conda environment + uses: mamba-org/setup-micromamba@v1 + id: setup + with: + environment-file: envs/lenskit-py3.11-dev.yaml + environment-name: lkpy + cache-environment: true + init-shell: bash + + - name: Compile extension modules + run: | + just build-inplace + env: + BUILD_FOR_COVER: 1 + + - name: Run Eval Tests + run: | + python -m pytest --cov=lenskit -m 'not slow' --log-file test-cython-cover.log + + - name: Process test results + uses: lenskit/lkbuild/actions/save-test-results@main + with: + artifact-name: test-cython-cover + vanilla: name: Vanilla Python ${{matrix.python}} on ${{matrix.platform}} runs-on: ${{matrix.platform}} @@ -241,6 +275,7 @@ jobs: - vanilla - check-docs - mindep + - cython-cover steps: - name: Check out source diff --git a/envs/lenskit-py3.10-test.yaml b/envs/lenskit-py3.10-test.yaml index 643453fa1..e1fe397f6 100644 --- a/envs/lenskit-py3.10-test.yaml +++ b/envs/lenskit-py3.10-test.yaml @@ -15,6 +15,7 @@ dependencies: - cffi>=1.15.0 - coverage>=5 - csr>=0.5 + - cython==3.* - hypothesis>=6 - numba<0.59,>=0.56 - numpy>=1.23 diff --git a/envs/lenskit-py3.11-test.yaml b/envs/lenskit-py3.11-test.yaml index c47f53d8d..8cdfb62d7 100644 --- a/envs/lenskit-py3.11-test.yaml +++ b/envs/lenskit-py3.11-test.yaml @@ -15,6 +15,7 @@ dependencies: - cffi>=1.15.0 - coverage>=5 - csr>=0.5 + - cython==3.* - hypothesis>=6 - numba<0.59,>=0.56 - numpy>=1.23 diff --git a/pyproject.toml b/pyproject.toml index 4f805cba8..df917af2a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ test = [ "pytest-doctestplus ==1.*", "pytest-cov >= 2.12", "coverage >= 5", + "cython ==3.*", "hypothesis >= 6", ] doc = [ @@ -156,3 +157,6 @@ exclude = [ ] reportMissingImports = true reportMissingTypeStubs = false + +[tool.coverage.run] +plugins = ["Cython.Coverage"] diff --git a/setup.py b/setup.py index c62f019b3..8b53a6d34 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,5 @@ +import os + from setuptools import Extension, setup try: @@ -7,8 +9,17 @@ except ImportError: USE_CYTHON = False + +COVERAGE = os.environ.get("BUILD_FOR_COVER", None) EXT_SPECS = {"lenskit.util.kvp": None} +CYTHON_OPTIONS = {} +C_DEFINES = [] +if COVERAGE: + print("enabling tracing") + CYTHON_OPTIONS["linetrace"] = True + C_DEFINES.append(("CYTHON_TRACE_NOGIL", "1")) + def _make_extension(name: str, opts: None) -> Extension: path = name.replace(".", "/") @@ -16,10 +27,11 @@ def _make_extension(name: str, opts: None) -> Extension: path += ".pyx" else: path += ".c" - return Extension(name, [path]) + return Extension(name, [path], define_macros=C_DEFINES) EXTENSIONS = [_make_extension(ext, opts) for (ext, opts) in EXT_SPECS.items()] if USE_CYTHON: - EXTENSIONS = cythonize(EXTENSIONS) + EXTENSIONS = cythonize(EXTENSIONS, compiler_directives=CYTHON_OPTIONS) +print(EXTENSIONS[0].__dict__) setup(ext_modules=EXTENSIONS) From 6b25ce559f66353bcf9b99666fe78fd59637719f Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Mon, 4 Mar 2024 21:02:53 -0500 Subject: [PATCH 16/19] benchmark the KVP accumulator --- envs/lenskit-py3.10-ci.yaml | 1 + envs/lenskit-py3.10-dev.yaml | 1 + envs/lenskit-py3.10-test.yaml | 1 + envs/lenskit-py3.11-ci.yaml | 1 + envs/lenskit-py3.11-dev.yaml | 1 + envs/lenskit-py3.11-test.yaml | 1 + pyproject.toml | 1 + tests/test_util_accum.py | 33 ++++++++++++++++++++++++++++----- tests/test_util_kvp.py | 19 +++++++++++++++++++ 9 files changed, 54 insertions(+), 5 deletions(-) diff --git a/envs/lenskit-py3.10-ci.yaml b/envs/lenskit-py3.10-ci.yaml index 2f13afe72..d88eabce3 100644 --- a/envs/lenskit-py3.10-ci.yaml +++ b/envs/lenskit-py3.10-ci.yaml @@ -26,6 +26,7 @@ dependencies: - numpy>=1.23 - pandas<3,>=1.5 - pyproject2conda~=0.11 + - pytest-benchmark==4.* - pytest-cov>=2.12 - pytest-doctestplus==1.* - pytest==7.* diff --git a/envs/lenskit-py3.10-dev.yaml b/envs/lenskit-py3.10-dev.yaml index 069e4feae..8fd38e608 100644 --- a/envs/lenskit-py3.10-dev.yaml +++ b/envs/lenskit-py3.10-dev.yaml @@ -31,6 +31,7 @@ dependencies: - numpy>=1.23 - pandas<3,>=1.5 - pyproject2conda~=0.11 + - pytest-benchmark==4.* - pytest-cov>=2.12 - pytest-doctestplus==1.* - pytest==7.* diff --git a/envs/lenskit-py3.10-test.yaml b/envs/lenskit-py3.10-test.yaml index e1fe397f6..d0f792a1b 100644 --- a/envs/lenskit-py3.10-test.yaml +++ b/envs/lenskit-py3.10-test.yaml @@ -20,6 +20,7 @@ dependencies: - numba<0.59,>=0.56 - numpy>=1.23 - pandas<3,>=1.5 + - pytest-benchmark==4.* - pytest-cov>=2.12 - pytest-doctestplus==1.* - pytest==7.* diff --git a/envs/lenskit-py3.11-ci.yaml b/envs/lenskit-py3.11-ci.yaml index 1998dba79..6d37a929b 100644 --- a/envs/lenskit-py3.11-ci.yaml +++ b/envs/lenskit-py3.11-ci.yaml @@ -26,6 +26,7 @@ dependencies: - numpy>=1.23 - pandas<3,>=1.5 - pyproject2conda~=0.11 + - pytest-benchmark==4.* - pytest-cov>=2.12 - pytest-doctestplus==1.* - pytest==7.* diff --git a/envs/lenskit-py3.11-dev.yaml b/envs/lenskit-py3.11-dev.yaml index 53aea92eb..ff5412016 100644 --- a/envs/lenskit-py3.11-dev.yaml +++ b/envs/lenskit-py3.11-dev.yaml @@ -31,6 +31,7 @@ dependencies: - numpy>=1.23 - pandas<3,>=1.5 - pyproject2conda~=0.11 + - pytest-benchmark==4.* - pytest-cov>=2.12 - pytest-doctestplus==1.* - pytest==7.* diff --git a/envs/lenskit-py3.11-test.yaml b/envs/lenskit-py3.11-test.yaml index 8cdfb62d7..4ee3e3b77 100644 --- a/envs/lenskit-py3.11-test.yaml +++ b/envs/lenskit-py3.11-test.yaml @@ -20,6 +20,7 @@ dependencies: - numba<0.59,>=0.56 - numpy>=1.23 - pandas<3,>=1.5 + - pytest-benchmark==4.* - pytest-cov>=2.12 - pytest-doctestplus==1.* - pytest==7.* diff --git a/pyproject.toml b/pyproject.toml index df917af2a..21bcc6000 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ test = [ "pytest-doctestplus ==1.*", "pytest-cov >= 2.12", "coverage >= 5", + "pytest-benchmark >=4.0.0", "cython ==3.*", "hypothesis >= 6", ] diff --git a/tests/test_util_accum.py b/tests/test_util_accum.py index 8e6cd4f35..055aef33e 100644 --- a/tests/test_util_accum.py +++ b/tests/test_util_accum.py @@ -5,13 +5,14 @@ # SPDX-License-Identifier: MIT import numpy as np +from numba import njit -from lenskit.util.accum import kvp_minheap_insert, kvp_minheap_sort - - -from hypothesis import given, assume, settings -import hypothesis.strategies as st import hypothesis.extra.numpy as nph +import hypothesis.strategies as st +from hypothesis import assume, given, settings +from pytest import mark + +from lenskit.util.accum import kvp_minheap_insert, kvp_minheap_sort def test_kvp_add_to_empty(): @@ -185,3 +186,25 @@ def test_kvp_sort(values): assert vs[-1] == np.min(ovs) assert all(ks == oks[ord]) assert all(vs == ovs[ord]) + + +@mark.benchmark(group="KVPSort") +def test_kvp_sort_numba(rng, benchmark): + N = 10000 + K = 500 + in_keys = np.arange(N) + in_vals = rng.uniform(size=N) + + def op(): + ks = np.zeros(K, np.int32) + vs = np.zeros(K, np.float64) + ep = 0 + for i in range(N): + ep = kvp_minheap_insert(0, ep, K, in_keys[i], in_vals[i], ks, vs) + + kvp_minheap_sort(0, ep, ks, vs) + + # dry run to compile + op() + + benchmark(op) diff --git a/tests/test_util_kvp.py b/tests/test_util_kvp.py index ee28fe04d..4bc8d644d 100644 --- a/tests/test_util_kvp.py +++ b/tests/test_util_kvp.py @@ -9,6 +9,7 @@ import hypothesis.extra.numpy as nph import hypothesis.strategies as st from hypothesis import assume, given, settings +from pytest import mark from lenskit.util.kvp import KVPHeap @@ -195,3 +196,21 @@ def test_kvp_sort(values): assert vs[-1] == np.min(ovs) assert all(ks == oks[ord]) assert all(vs == ovs[ord]) + + +@mark.benchmark(group="KVPSort") +def test_kvp_sort_cython(rng, benchmark): + N = 10000 + K = 500 + in_keys = np.arange(N) + in_vals = rng.uniform(size=N) + + def op(): + ks = np.zeros(K, np.int32) + vs = np.zeros(K, np.float64) + kvp = KVPHeap(0, 0, K, ks, vs) + for i in range(N): + kvp.insert(in_keys[i], in_vals[i]) + kvp.sort() + + benchmark(op) From b0cd488e98343ae5e82720f26d3d1a975a5ed20c Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Wed, 6 Mar 2024 10:12:28 -0500 Subject: [PATCH 17/19] add header to setup.py --- setup.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/setup.py b/setup.py index 8b53a6d34..55250c94e 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,9 @@ +# This file is part of LensKit. +# Copyright (C) 2018-2023 Boise State University +# Copyright (C) 2023-2024 Drexel University +# Licensed under the MIT license, see LICENSE.md for details. +# SPDX-License-Identifier: MIT + import os from setuptools import Extension, setup From a00aa58949407998bfb18fea9541640e08083656 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Tue, 26 Mar 2024 17:10:28 +0000 Subject: [PATCH 18/19] Initial CS matrix class --- lenskit/util/csmatrix.pyi | 21 ++++++++++++++++ lenskit/util/csmatrix.pyx | 21 ++++++++++++++++ setup.py | 2 +- tests/test_csmatrix.py | 53 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 96 insertions(+), 1 deletion(-) create mode 100644 lenskit/util/csmatrix.pyi create mode 100644 lenskit/util/csmatrix.pyx create mode 100644 tests/test_csmatrix.py diff --git a/lenskit/util/csmatrix.pyi b/lenskit/util/csmatrix.pyi new file mode 100644 index 000000000..7fbabcc80 --- /dev/null +++ b/lenskit/util/csmatrix.pyi @@ -0,0 +1,21 @@ +import numpy as np +import numpy.typing as npt + +class CSMatrix: + nrows: int + ncols: int + nnz: int + + rowptr: npt.NDArray[np.int32] + colind: npt.NDArray[np.int32] + values: npt.NDArray[np.float64] + + def __init__( + self, + nr: int, + nc: int, + rps: npt.NDArray[np.int32], + cis: npt.NDArray[np.int32], + vs: npt.NDArray[np.float64], + ): ... + def row_ep(self, row: int) -> tuple[int, int]: ... diff --git a/lenskit/util/csmatrix.pyx b/lenskit/util/csmatrix.pyx new file mode 100644 index 000000000..3e236cdf4 --- /dev/null +++ b/lenskit/util/csmatrix.pyx @@ -0,0 +1,21 @@ +# cython: language_level=3str + +cdef class CSMatrix: + cdef readonly int nrows, ncols, nnz + cdef readonly int[:] rowptr + cdef readonly int[:] colind + cdef readonly double[:] values + + def __cinit__(self, int nr, int nc, int[:] rps, int[:] cis, double[:] vs): + self.nrows = nr + self.ncols = nc + self.rowptr = rps + self.colind = cis + self.values = vs + self.nnz = self.rowptr[nr] + + cpdef (int,int) row_ep(self, row): + if row < 0 or row >= self.nrows: + raise IndexError(f"invalid row {row} for {self.nrows}x{self.ncols} matrix") + + return self.rowptr[row], self.rowptr[row+1] diff --git a/setup.py b/setup.py index 55250c94e..13901024d 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ COVERAGE = os.environ.get("BUILD_FOR_COVER", None) -EXT_SPECS = {"lenskit.util.kvp": None} +EXT_SPECS = {"lenskit.util.kvp": None, "lenskit.util.csmatrix": None} CYTHON_OPTIONS = {} C_DEFINES = [] diff --git a/tests/test_csmatrix.py b/tests/test_csmatrix.py new file mode 100644 index 000000000..1a25a124e --- /dev/null +++ b/tests/test_csmatrix.py @@ -0,0 +1,53 @@ +import numpy as np +import scipy.sparse as sps +from numba import njit + +import hypothesis.extra.numpy as nph +import hypothesis.strategies as st +from hypothesis import assume, given, settings +from pytest import mark + +from lenskit.util.csmatrix import CSMatrix + + +@st.composite +def sparse_matrices(draw, max_shape=(50, 50), density=st.floats(0, 1), format="csr"): + ubr, ubc = max_shape + + rows = draw(st.integers(1, ubr)) + cols = draw(st.integers(1, ubc)) + dens = draw(density) + prod = rows * cols + nnz = int(prod * dens) + + points = draw(nph.arrays("int32", nnz, elements=st.integers(0, prod - 1), unique=True)) + values = draw(nph.arrays("float64", nnz)) + rvs = points % rows + cvs = points // rows + assert np.all(rvs < rows) + assert np.all(cvs < cols) + + return sps.csr_array((values, (rvs, cvs)), shape=(rows, cols)) + + +@given(sparse_matrices()) +def test_init_matrix(m: sps.csr_array): + print(m.shape, m.nnz, m.indptr.dtype, m.indices.dtype) + nr, nc = m.shape + + m2 = CSMatrix(nr, nc, m.indptr, m.indices, m.data) + + assert m2.nrows == nr + assert m2.ncols == nc + assert m2.nnz == m.nnz + + +@given(sparse_matrices()) +def test_csm_row_ep(m: sps.csr_array): + nr, nc = m.shape + m2 = CSMatrix(nr, nc, m.indptr, m.indices, m.data) + + for i in range(nr): + sp, ep = m2.row_ep(i) + assert sp == m2.rowptr[i] + assert ep == m2.rowptr[i + 1] From a9b39a111d111aadfd4c00b6f055645fba3f1cd5 Mon Sep 17 00:00:00 2001 From: Michael Ekstrand Date: Tue, 26 Mar 2024 17:15:25 +0000 Subject: [PATCH 19/19] add scipy converstion --- lenskit/util/csmatrix.pyi | 3 +++ lenskit/util/csmatrix.pyx | 6 ++++++ tests/test_csmatrix.py | 15 ++++++++++++--- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/lenskit/util/csmatrix.pyi b/lenskit/util/csmatrix.pyi index 7fbabcc80..33c4b702e 100644 --- a/lenskit/util/csmatrix.pyi +++ b/lenskit/util/csmatrix.pyi @@ -1,5 +1,6 @@ import numpy as np import numpy.typing as npt +from scipy.sparse import csr_array class CSMatrix: nrows: int @@ -19,3 +20,5 @@ class CSMatrix: vs: npt.NDArray[np.float64], ): ... def row_ep(self, row: int) -> tuple[int, int]: ... + @staticmethod + def from_scipy(matrix: csr_array) -> CSMatrix: ... diff --git a/lenskit/util/csmatrix.pyx b/lenskit/util/csmatrix.pyx index 3e236cdf4..a0ac84ad4 100644 --- a/lenskit/util/csmatrix.pyx +++ b/lenskit/util/csmatrix.pyx @@ -14,6 +14,12 @@ cdef class CSMatrix: self.values = vs self.nnz = self.rowptr[nr] + @staticmethod + def from_scipy(m): + nr, nc = m.shape + + return CSMatrix(nr, nc, m.indptr, m.indices, m.data) + cpdef (int,int) row_ep(self, row): if row < 0 or row >= self.nrows: raise IndexError(f"invalid row {row} for {self.nrows}x{self.ncols} matrix") diff --git a/tests/test_csmatrix.py b/tests/test_csmatrix.py index 1a25a124e..3de548382 100644 --- a/tests/test_csmatrix.py +++ b/tests/test_csmatrix.py @@ -42,12 +42,21 @@ def test_init_matrix(m: sps.csr_array): assert m2.nnz == m.nnz +@given(sparse_matrices()) +def test_from_scipy(m: sps.csr_array): + print(m.shape, m.nnz, m.indptr.dtype, m.indices.dtype) + m2 = CSMatrix.from_scipy(m) + + assert m2.nrows == m.shape[0] + assert m2.ncols == m.shape[1] + assert m2.nnz == m.nnz + + @given(sparse_matrices()) def test_csm_row_ep(m: sps.csr_array): - nr, nc = m.shape - m2 = CSMatrix(nr, nc, m.indptr, m.indices, m.data) + m2 = CSMatrix.from_scipy(m) - for i in range(nr): + for i in range(m2.nrows): sp, ep = m2.row_ep(i) assert sp == m2.rowptr[i] assert ep == m2.rowptr[i + 1]