lenskit · mdekstrand · Nov 11, 2023 · Nov 9, 2023 · Nov 9, 2023 · Nov 9, 2023
diff --git a/.copier-answers.yml b/.copier-answers.yml
@@ -0,0 +1,7 @@
+# Changes here will be overwritten by Copier
+_commit: 28184e6
+_src_path: https://github.com/lenskit/lk-project-template
+package_name: lenskit
+project_name: lenskit
+require_lint: true
+start_year: 2018
diff --git a/.editorconfig b/.editorconfig
@@ -1,17 +1,23 @@
 root = true
 
 [*]
-insert_final_newline = true
 charset = utf-8
+insert_final_newline = true
 trim_trailing_whitespace = true
 indent_size = 4
 indent_style = space
 
-[*.{yml,yaml}]
+[{*.json,*.yml,*.yaml,*.yml.jinja}]
+indent_size = 2
+
+[*.toml]
 indent_size = 2
 
 [*.sh]
 end_of_line = lf
 
+[*.{bat,cmd}]
+end_of_line = crlf
+
 [*.md]
 trim_trailing_whitespace = false
diff --git a/.gitattributes b/.gitattributes
@@ -1,3 +1,4 @@
 * text=auto
 *.sh text eol=lf
 *.bat text eol=crlf
+*.cmd text eol=crlf
diff --git a/.github/workflows/check-sources.yml b/.github/workflows/check-sources.yml
@@ -0,0 +1,74 @@
+name: Validate Source Rules
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+concurrency:
+  group: check-${{github.ref}}
+  cancel-in-progress: true
+
+jobs:
+  lint:
+    name: Check Source Style
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: 📥 Check out source code
+      uses: actions/checkout@v2
+      with:
+        fetch-depth: 0
+
+    - name: 🐍 Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.11"
+        cache: 'pip'
+
+    - name: 🛠️ Install tools
+      run: |
+        pip install ruff
+
+    - name: 🪮 Check source code formatting
+      id: format
+      run: |
+        if pipx run ruff format --diff $PKG_DIR; then
+          echo passed=yes >>"$GITHUB_OUTPUT"
+        else
+          echo passed=no >>"$GITHUB_OUTPUT"
+          echo "::error::source code not formatted"
+        fi
+      env:
+        PKG_DIR: lenskit
+
+    - name: 🐜 Check source code lint rules
+      id: lint
+      run: |
+        if pipx run ruff check --output-format=github $PKG_DIR; then
+          echo passed=yes >>"$GITHUB_OUTPUT"
+        else
+          echo passed=no >>"$GITHUB_OUTPUT"
+          echo "::error::source code lint check failed"
+        fi
+      env:
+        PKG_DIR: lenskit
+
+    - name: 🧾 Checking results
+      run: |
+        if [ "$FMT_PASSED" = no ]; then
+            echo "::error::format failed, failing build"
+            exit 1
+        fi
+        if [ "$LINT_PASSED" = no ]; then
+            if [ "$LINT_REQUIRED" = true ]; then
+                echo "::error::lint failed, failing build"
+                exit 2
+            else
+                echo "::error::lint failed but non-mandatory"
+            fi
+        fi
+      env:
+        FMT_PASSED: ${{ steps.fmt.outputs.passed }}
+        LINT_PASSED: ${{ steps.lint.outputs.passed }}
+        LINT_REQUIRED: True
diff --git a/.gitignore b/.gitignore
@@ -1,49 +1,49 @@
+# log and debug outputs
 *.log
 *.pdb
+*.prof
+*.lprof
+emissions.csv
+intel_power_gadget_log.csv
+.coverage*
+coverage.xml
+cov-reports/
+test-logs/
+htmlcov/
 
+# caches and working directories
 __pycache__/
 *.pyc
-*.prof
-*.lprof
 .ipynb_checkpoints/
+dask-worker-space/
 .idea/
-.vs/
-
-.eggs/
 .*_cache/
-.vscode/
-*.egg-info/
+.hypothesis/
+.tox/
+.vagrant/
+.venv/
+scratch/
+
+# build outputs
 build/
 dist/
-.coverage*
-coverage.xml
-cov-reports/
-test-logs/
-htmlcov/
-my-eval/
-doc/data/
 *.pyd
 *.so
 *.dll
 *.exp
 *.lib
 *.o
 *.obj
+
+# environment locks that aren't committed
 /*env*.yml
 conda-lock.yml
 *.lock
 *.lock.yml
-*.tar.bz2
-
-dask-worker-space/
-.hypothesis/
-
-build-env/
-.tox/
-pythonenv*/
-.vagrant/
-scratch/
-emissions.csv
-intel_power_gadget_log.csv
 
+# Editor and OS cruft
 .DS_Store
+._.DS_Store
+*~
+*.tmp
+.vs/
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,6 @@
+{
+  "[python]": {
+    "editor.defaultFormatter": "charliermarsh.ruff",
+    "editor.formatOnSave": true,
+  },
+}
diff --git a/LICENSE.md b/LICENSE.md
@@ -1,4 +1,5 @@
-Copyright (c) 2018–2022 Boise State University
+Copyright (c) 2018–2023 Boise State University
+Copyright (c) 2023 Michael Ekstrand
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -7,8 +8,8 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
+> The above copyright notice and this permission notice shall be included in
+> all copies or substantial portions of the Software.
 
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

diff --git a/docs/performance.rst b/docs/performance.rst
@@ -14,19 +14,24 @@ Quick Tips
 ----------
 
 * Use Conda-based Python, with ``tbb`` installed.
-* Set the ``MKL_THREADING_LAYER`` environment variable to ``tbb``, so both MKL and LensKit
-  will use TBB and can coordinate their thread pools.
+* When using MKL, set the ``MKL_THREADING_LAYER`` environment variable to ``tbb``, so both
+  MKL and LensKit will use TBB and can coordinate their thread pools.
 * Use ``LK_NUM_PROCS`` if you want to control LensKit's batch prediction and recommendation
   parallelism, and ``NUMBA_NUM_THREADS`` to control its model training parallelism.
 
-We generally find the best performance using MKL with TBB throughout the stack.  If both
-LensKit's Numba-accelerated code and MKL are using TBB, they will coordinate their
-thread pools to coordinate threading levels.
+We generally find the best performance using MKL with TBB throughout the stack on Intel
+processors.  If both LensKit's Numba-accelerated code and MKL are using TBB, they will
+coordinate their thread pools to coordinate threading levels.
 
-If you are **not** using MKL with TBB, we recommend setting ``MKL_NUM_THREADS=1`` and/or
-``OPENBLAS_NUM_THREADS=1`` (depending on your BLAS implementation) to turn off
-BLAS threading.  When LensKit starts (usually at model training time), it will
-check your runtime environment and log warning messages if it detects problems.
+If you are **not** using MKL (Apple Silicon, maybe also AMD processors), we recommend
+controlling your BLAS parallelism.  For OpenBLAS, how you control this depends on how
+OpenBLAS was built, whether Numba is using OpenMP or TBB, and whether you are training
+or evaluating the model.
+
+When LensKit starts (usually at model training time), it will check your runtime environment
+and log warning messages if it detects problems.  During evaluation, it also makes a
+best-effort attempt, through `threadpoolctl`_, to disable nested parallelism when running
+a parallel evaluation.
 
 Controlling Parallelism
 -----------------------

diff --git a/lenskit/__init__.py b/lenskit/__init__.py
@@ -5,18 +5,20 @@
 
 from lenskit.algorithms import *  # noqa: F401,F403
 
-__version__ = '0.15.0'
+__version__ = "0.15.0"
 
 
 class DataWarning(UserWarning):
     """
     Warning raised for detectable problems with input data.
     """
+
     pass
 
 
 class ConfigWarning(UserWarning):
     """
     Warning raised for detectable problems with algorithm configurations.
     """
+
     pass
diff --git a/lenskit/algorithms/__init__.py b/lenskit/algorithms/__init__.py
@@ -10,7 +10,7 @@
 from abc import ABCMeta, abstractmethod
 import inspect
 
-__all__ = ['Algorithm', 'Recommender', 'Predictor', 'CandidateSelector']
+__all__ = ["Algorithm", "Recommender", "Predictor", "CandidateSelector"]
 
 
 class Algorithm(metaclass=ABCMeta):
@@ -68,10 +68,10 @@ def get_params(self, deep=True):
             if hasattr(self, name) and name not in self.IGNORED_PARAMS:
                 value = getattr(self, name)
                 params[name] = value
-                if deep and hasattr(value, 'get_params'):
+                if deep and hasattr(value, "get_params"):
                     sps = value.get_params(deep)
                     for k, sv in sps.items():
-                        params[name + '__' + k] = sv
+                        params[name + "__" + k] = sv
 
         return params
 
@@ -101,16 +101,16 @@ def predict(self, pairs, ratings=None):
             raise NotImplementedError()
 
         def upred(df):
-            user, = df['user'].unique()
-            items = df['item']
+            (user,) = df["user"].unique()
+            items = df["item"]
             preds = self.predict_for_user(user, items)
-            preds.name = 'prediction'
-            res = df.join(preds, on='item', how='left')
+            preds.name = "prediction"
+            res = df.join(preds, on="item", how="left")
             return res.prediction
 
-        res = pairs.loc[:, ['user', 'item']].groupby('user', sort=False).apply(upred)
-        res.reset_index(level='user', inplace=True, drop=True)
-        res.name = 'prediction'
+        res = pairs.loc[:, ["user", "item"]].groupby("user", sort=False).apply(upred)
+        res.reset_index(level="user", inplace=True, drop=True)
+        res.name = "prediction"
         return res.loc[pairs.index.values]
 
     @abstractmethod
@@ -173,6 +173,7 @@ def adapt(cls, algo):
             algo(Predictor): the underlying rating predictor.
         """
         from .basic import TopN
+
         if isinstance(algo, Recommender):
             return algo
         else:
@@ -212,6 +213,7 @@ def rated_items(ratings):
         """
         import pandas as pd
         import numpy as np
+
         if isinstance(ratings, pd.Series):
             return ratings.index.values
         elif isinstance(ratings, np.ndarray):