diff --git a/.copier-answers.yml b/.copier-answers.yml
new file mode 100644
index 000000000..c22d41c9e
--- /dev/null
+++ b/.copier-answers.yml
@@ -0,0 +1,7 @@
+# Changes here will be overwritten by Copier
+_commit: 28184e6
+_src_path: https://github.com/lenskit/lk-project-template
+package_name: lenskit
+project_name: lenskit
+require_lint: false
+start_year: 2018
diff --git a/.editorconfig b/.editorconfig
index 4d2c0c2b6..f558a411b 100644
--- a/.editorconfig
+++ b/.editorconfig
@@ -1,17 +1,23 @@
 root = true
 
 [*]
-insert_final_newline = true
 charset = utf-8
+insert_final_newline = true
 trim_trailing_whitespace = true
 indent_size = 4
 indent_style = space
 
-[*.{yml,yaml}]
+[{*.json,*.yml,*.yaml,*.yml.jinja}]
+indent_size = 2
+
+[*.toml]
 indent_size = 2
 
 [*.sh]
 end_of_line = lf
 
+[*.{bat,cmd}]
+end_of_line = crlf
+
 [*.md]
 trim_trailing_whitespace = false
diff --git a/.gitattributes b/.gitattributes
index e2673ec6b..d4234f2e5 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,4 @@
 * text=auto
 *.sh text eol=lf
 *.bat text eol=crlf
+*.cmd text eol=crlf
diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
deleted file mode 100644
index 4ab28ba69..000000000
--- a/.github/release-drafter.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-categories:
-- title: ⚠ Breaking Changes
-  labels:
-  - breaking
-- title: 🐜 Bug Fixes
-  labels:
-  - bug
-- title: 📏 Evaluation Support
-  labels:
-  - evaluation
-  - batch
-- title: 🧩 Algorithms
-  labels:
-  - algorithms
-- title: 🧱 Internals
-  labels:
-  - internals
-- title: 🔧 Maintenance
-  labels:
-  - build
-  - dependencies
-template: |
-  ## What’s Changed
-
-  $CHANGES
diff --git a/.github/workflows/check-sources.yml b/.github/workflows/check-sources.yml
new file mode 100644
index 000000000..7d231df6b
--- /dev/null
+++ b/.github/workflows/check-sources.yml
@@ -0,0 +1,74 @@
+name: Validate Source Rules
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+concurrency:
+  group: check-${{github.ref}}
+  cancel-in-progress: true
+
+jobs:
+  lint:
+    name: Check Source Style
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: 📥 Check out source code
+      uses: actions/checkout@v2
+      with:
+        fetch-depth: 0
+
+    - name: 🐍 Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: "3.11"
+        cache: 'pip'
+
+    - name: 🛠️ Install tools
+      run: |
+        pip install ruff
+
+    - name: 🪮 Check source code formatting
+      id: format
+      run: |
+        if pipx run ruff format --diff $PKG_DIR; then
+          echo passed=yes >>"$GITHUB_OUTPUT"
+        else
+          echo passed=no >>"$GITHUB_OUTPUT"
+          echo "::error::source code not formatted"
+        fi
+      env:
+        PKG_DIR: lenskit
+
+    - name: 🐜 Check source code lint rules
+      id: lint
+      run: |
+        if pipx run ruff check --output-format=github $PKG_DIR; then
+          echo passed=yes >>"$GITHUB_OUTPUT"
+        else
+          echo passed=no >>"$GITHUB_OUTPUT"
+          echo "::error::source code lint check failed"
+        fi
+      env:
+        PKG_DIR: lenskit
+
+    - name: 🧾 Checking results
+      run: |
+        if [ "$FMT_PASSED" = no ]; then
+            echo "::error::format failed, failing build"
+            exit 1
+        fi
+        if [ "$LINT_PASSED" = no ]; then
+            if [ "$LINT_REQUIRED" = true ]; then
+                echo "::error::lint failed, failing build"
+                exit 2
+            else
+                echo "::error::lint failed but non-mandatory"
+            fi
+        fi
+      env:
+        FMT_PASSED: ${{ steps.fmt.outputs.passed }}
+        LINT_PASSED: ${{ steps.lint.outputs.passed }}
+        LINT_REQUIRED: False
diff --git a/.github/workflows/build-packages.yml b/.github/workflows/package.yml
similarity index 97%
rename from .github/workflows/build-packages.yml
rename to .github/workflows/package.yml
index 05086060b..1767f10b4 100644
--- a/.github/workflows/build-packages.yml
+++ b/.github/workflows/package.yml
@@ -3,14 +3,13 @@ on:
   push:
     branches:
       - main
-    tag:
   release:
     types: [published]
 jobs:
   sdist:
     name: Build and upload packages
     runs-on: ubuntu-latest
-    
+
     steps:
     - uses: actions/checkout@v2
 
@@ -33,7 +32,7 @@ jobs:
       with:
         name: pypi-pkgs
         path: dist
-    
+
     - name: List dist dir
       run: ls -R dist
 
diff --git a/.github/workflows/release-drafter.yml b/.github/workflows/release-drafter.yml
deleted file mode 100644
index 678007f91..000000000
--- a/.github/workflows/release-drafter.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-name: Release Drafter
-
-on:
-  push:
-    # branches to consider in the event; optional, defaults to all
-    branches:
-      - main
-  # pull_request event is required only for autolabeler
-  pull_request:
-    # Only following types are handled by the action, but one can default to all as well
-    types: [opened, reopened, synchronize]
-
-jobs:
-  update_release_draft:
-    runs-on: ubuntu-latest
-    steps:
-      # (Optional) GitHub Enterprise requires GHE_HOST variable set
-      #- name: Set GHE_HOST
-      #  run: |
-      #    echo "GHE_HOST=${GITHUB_SERVER_URL##https:\/\/}" >> $GITHUB_ENV
-
-      # Drafts your next Release notes as Pull Requests are merged into "master"
-      - uses: release-drafter/release-drafter@v5
-        # (Optional) specify config name to use, relative to .github/. Default: release-drafter.yml
-        # with:
-        #   config-name: my-config.yml
-        #   disable-autolabeler: true
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test.yml
similarity index 100%
rename from .github/workflows/test-suite.yml
rename to .github/workflows/test.yml
diff --git a/.gitignore b/.gitignore
index 3d485b409..6472891ef 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,27 +1,32 @@
+# log and debug outputs
 *.log
 *.pdb
+*.prof
+*.lprof
+emissions.csv
+intel_power_gadget_log.csv
+.coverage*
+coverage.xml
+cov-reports/
+test-logs/
+htmlcov/
 
+# caches and working directories
 __pycache__/
 *.pyc
-*.prof
-*.lprof
 .ipynb_checkpoints/
+dask-worker-space/
 .idea/
-.vs/
-
-.eggs/
 .*_cache/
-.vscode/
-*.egg-info/
+.hypothesis/
+.tox/
+.vagrant/
+.venv/
+scratch/
+
+# build outputs
 build/
 dist/
-.coverage*
-coverage.xml
-cov-reports/
-test-logs/
-htmlcov/
-my-eval/
-doc/data/
 *.pyd
 *.so
 *.dll
@@ -29,21 +34,16 @@ doc/data/
 *.lib
 *.o
 *.obj
+
+# environment locks that aren't committed
 /*env*.yml
 conda-lock.yml
 *.lock
 *.lock.yml
-*.tar.bz2
-
-dask-worker-space/
-.hypothesis/
-
-build-env/
-.tox/
-pythonenv*/
-.vagrant/
-scratch/
-emissions.csv
-intel_power_gadget_log.csv
 
+# Editor and OS cruft
 .DS_Store
+._.DS_Store
+*~
+*.tmp
+.vs/
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 000000000..b608d943d
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,6 @@
+{
+  "[python]": {
+    "editor.defaultFormatter": "charliermarsh.ruff",
+    "editor.formatOnSave": true,
+  },
+}
diff --git a/LICENSE.md b/LICENSE.md
index 77939bd32..eea0cdd33 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -1,4 +1,5 @@
-Copyright (c) 2018–2022 Boise State University
+Copyright (c) 2018–2023 Boise State University
+Copyright (c) 2023 Michael Ekstrand
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -7,8 +8,8 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
+> The above copyright notice and this permission notice shall be included in
+> all copies or substantial portions of the Software.
 
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
diff --git a/docs/performance.rst b/docs/performance.rst
index 1ec7130f1..2f2e933fa 100644
--- a/docs/performance.rst
+++ b/docs/performance.rst
@@ -14,19 +14,24 @@ Quick Tips
 ----------
 
 * Use Conda-based Python, with ``tbb`` installed.
-* Set the ``MKL_THREADING_LAYER`` environment variable to ``tbb``, so both MKL and LensKit
-  will use TBB and can coordinate their thread pools.
+* When using MKL, set the ``MKL_THREADING_LAYER`` environment variable to ``tbb``, so both
+  MKL and LensKit will use TBB and can coordinate their thread pools.
 * Use ``LK_NUM_PROCS`` if you want to control LensKit's batch prediction and recommendation
   parallelism, and ``NUMBA_NUM_THREADS`` to control its model training parallelism.
 
-We generally find the best performance using MKL with TBB throughout the stack.  If both
-LensKit's Numba-accelerated code and MKL are using TBB, they will coordinate their
-thread pools to coordinate threading levels.
+We generally find the best performance using MKL with TBB throughout the stack on Intel
+processors.  If both LensKit's Numba-accelerated code and MKL are using TBB, they will
+coordinate their thread pools to coordinate threading levels.
 
-If you are **not** using MKL with TBB, we recommend setting ``MKL_NUM_THREADS=1`` and/or
-``OPENBLAS_NUM_THREADS=1`` (depending on your BLAS implementation) to turn off
-BLAS threading.  When LensKit starts (usually at model training time), it will
-check your runtime environment and log warning messages if it detects problems.
+If you are **not** using MKL (Apple Silicon, maybe also AMD processors), we recommend
+controlling your BLAS parallelism.  For OpenBLAS, how you control this depends on how
+OpenBLAS was built, whether Numba is using OpenMP or TBB, and whether you are training
+or evaluating the model.
+
+When LensKit starts (usually at model training time), it will check your runtime environment
+and log warning messages if it detects problems.  During evaluation, it also makes a
+best-effort attempt, through `threadpoolctl`_, to disable nested parallelism when running
+a parallel evaluation.
 
 Controlling Parallelism
 -----------------------
diff --git a/lenskit/__init__.py b/lenskit/__init__.py
index 9eedb9d0d..19c34f19c 100644
--- a/lenskit/__init__.py
+++ b/lenskit/__init__.py
@@ -5,13 +5,14 @@
 
 from lenskit.algorithms import *  # noqa: F401,F403
 
-__version__ = '0.15.0'
+__version__ = "0.15.0"
 
 
 class DataWarning(UserWarning):
     """
     Warning raised for detectable problems with input data.
     """
+
     pass
 
 
@@ -19,4 +20,5 @@ class ConfigWarning(UserWarning):
     """
     Warning raised for detectable problems with algorithm configurations.
     """
+
     pass
diff --git a/lenskit/algorithms/__init__.py b/lenskit/algorithms/__init__.py
index dbec96cff..5c57cbb66 100644
--- a/lenskit/algorithms/__init__.py
+++ b/lenskit/algorithms/__init__.py
@@ -10,7 +10,7 @@
 from abc import ABCMeta, abstractmethod
 import inspect
 
-__all__ = ['Algorithm', 'Recommender', 'Predictor', 'CandidateSelector']
+__all__ = ["Algorithm", "Recommender", "Predictor", "CandidateSelector"]
 
 
 class Algorithm(metaclass=ABCMeta):
@@ -68,10 +68,10 @@ def get_params(self, deep=True):
             if hasattr(self, name) and name not in self.IGNORED_PARAMS:
                 value = getattr(self, name)
                 params[name] = value
-                if deep and hasattr(value, 'get_params'):
+                if deep and hasattr(value, "get_params"):
                     sps = value.get_params(deep)
                     for k, sv in sps.items():
-                        params[name + '__' + k] = sv
+                        params[name + "__" + k] = sv
 
         return params
 
@@ -101,16 +101,16 @@ def predict(self, pairs, ratings=None):
             raise NotImplementedError()
 
         def upred(df):
-            user, = df['user'].unique()
-            items = df['item']
+            (user,) = df["user"].unique()
+            items = df["item"]
             preds = self.predict_for_user(user, items)
-            preds.name = 'prediction'
-            res = df.join(preds, on='item', how='left')
+            preds.name = "prediction"
+            res = df.join(preds, on="item", how="left")
             return res.prediction
 
-        res = pairs.loc[:, ['user', 'item']].groupby('user', sort=False).apply(upred)
-        res.reset_index(level='user', inplace=True, drop=True)
-        res.name = 'prediction'
+        res = pairs.loc[:, ["user", "item"]].groupby("user", sort=False).apply(upred)
+        res.reset_index(level="user", inplace=True, drop=True)
+        res.name = "prediction"
         return res.loc[pairs.index.values]
 
     @abstractmethod
@@ -173,6 +173,7 @@ def adapt(cls, algo):
             algo(Predictor): the underlying rating predictor.
         """
         from .basic import TopN
+
         if isinstance(algo, Recommender):
             return algo
         else:
@@ -212,6 +213,7 @@ def rated_items(ratings):
         """
         import pandas as pd
         import numpy as np
+
         if isinstance(ratings, pd.Series):
             return ratings.index.values
         elif isinstance(ratings, np.ndarray):
diff --git a/lenskit/algorithms/als.py b/lenskit/algorithms/als.py
index c418ab0f7..156dc41a3 100644
--- a/lenskit/algorithms/als.py
+++ b/lenskit/algorithms/als.py
@@ -15,10 +15,7 @@
 
 _logger = logging.getLogger(__name__)
 
-PartialModel = namedtuple('PartialModel', [
-    'users', 'items',
-    'user_matrix', 'item_matrix'
-])
+PartialModel = namedtuple("PartialModel", ["users", "items", "user_matrix", "item_matrix"])
 
 
 @njit
@@ -343,10 +340,22 @@ class BiasedMF(MFPredictor):
             Random number generator or state (see :func:`lenskit.util.random.rng`).
         progress: a :func:`tqdm.tqdm`-compatible progress bar function
     """
+
     timer = None
 
-    def __init__(self, features, *, iterations=20, reg=0.1, damping=5, bias=True, method='cd',
-                 rng_spec=None, progress=None, save_user_features=True):
+    def __init__(
+        self,
+        features,
+        *,
+        iterations=20,
+        reg=0.1,
+        damping=5,
+        bias=True,
+        method="cd",
+        rng_spec=None,
+        progress=None,
+        save_user_features=True,
+    ):
         self.features = features
         self.iterations = iterations
         self.regularization = reg
@@ -377,12 +386,18 @@ def fit(self, ratings, **kwargs):
             pass  # we just need to do the iterations
 
         if self.user_features_ is not None:
-            _logger.info('trained model in %s (|P|=%f, |Q|=%f)', self.timer,
-                         np.linalg.norm(self.user_features_, 'fro'),
-                         np.linalg.norm(self.item_features_, 'fro'))
+            _logger.info(
+                "trained model in %s (|P|=%f, |Q|=%f)",
+                self.timer,
+                np.linalg.norm(self.user_features_, "fro"),
+                np.linalg.norm(self.item_features_, "fro"),
+            )
         else:
-            _logger.info('trained model in %s (|Q|=%f)', self.timer,
-                         np.linalg.norm(self.item_features_, 'fro'))
+            _logger.info(
+                "trained model in %s (|Q|=%f)",
+                self.timer,
+                np.linalg.norm(self.item_features_, "fro"),
+            )
 
         del self.timer
         return self
@@ -399,13 +414,14 @@ def fit_iters(self, ratings, **kwargs):
         """
 
         if self.bias:
-            _logger.info('[%s] fitting bias model', self.timer)
+            _logger.info("[%s] fitting bias model", self.timer)
             self.bias.fit(ratings)
 
         current, uctx, ictx = self._initial_model(ratings)
 
-        _logger.info('[%s] training biased MF model with ALS for %d features',
-                     self.timer, self.features)
+        _logger.info(
+            "[%s] training biased MF model with ALS for %d features", self.timer, self.features
+        )
         for epoch, model in enumerate(self._train_iters(current, uctx, ictx)):
             self._save_params(model)
             yield self
@@ -423,7 +439,7 @@ def _save_params(self, model):
     def _initial_model(self, ratings):
         # transform ratings using offsets
         if self.bias:
-            _logger.info('[%s] normalizing ratings', self.timer)
+            _logger.info("[%s] normalizing ratings", self.timer)
             ratings = self.bias.transform(ratings)
 
         "Initialize a model and build contexts."
@@ -431,17 +447,17 @@ def _initial_model(self, ratings):
         n_users = len(users)
         n_items = len(items)
 
-        _logger.debug('setting up contexts')
+        _logger.debug("setting up contexts")
         trmat = rmat.transpose()
 
-        _logger.debug('initializing item matrix')
+        _logger.debug("initializing item matrix")
         imat = self.rng.standard_normal((n_items, self.features))
         imat /= np.linalg.norm(imat, axis=1).reshape((n_items, 1))
-        _logger.debug('|Q|: %f', np.linalg.norm(imat, 'fro'))
-        _logger.debug('initializing user matrix')
+        _logger.debug("|Q|: %f", np.linalg.norm(imat, "fro"))
+        _logger.debug("initializing user matrix")
         umat = self.rng.standard_normal((n_users, self.features))
         umat /= np.linalg.norm(umat, axis=1).reshape((n_users, 1))
-        _logger.debug('|P|: %f', np.linalg.norm(umat, 'fro'))
+        _logger.debug("|P|: %f", np.linalg.norm(umat, "fro"))
 
         return PartialModel(users, items, umat, imat), rmat, trmat
 
@@ -461,24 +477,24 @@ def _train_iters(self, current, uctx, ictx):
         assert ictx.nrows == n_items
         assert ictx.ncols == n_users
 
-        if self.method == 'cd':
+        if self.method == "cd":
             train = _train_matrix_cd
-        elif self.method == 'lu':
+        elif self.method == "lu":
             train = _train_matrix_lu
         else:
-            raise ValueError('invalid training method ' + self.method)
+            raise ValueError("invalid training method " + self.method)
 
         if isinstance(self.regularization, tuple):
             ureg, ireg = self.regularization
         else:
             ureg = ireg = self.regularization
 
-        for epoch in self.progress(range(self.iterations), desc='BiasedMF', leave=False):
+        for epoch in self.progress(range(self.iterations), desc="BiasedMF", leave=False):
             du = train(uctx, current.user_matrix, current.item_matrix, ureg)
-            _logger.debug('[%s] finished user epoch %d', self.timer, epoch)
+            _logger.debug("[%s] finished user epoch %d", self.timer, epoch)
             di = train(ictx, current.item_matrix, current.user_matrix, ireg)
-            _logger.debug('[%s] finished item epoch %d', self.timer, epoch)
-            _logger.info('[%s] finished epoch %d (|ΔP|=%.3f, |ΔQ|=%.3f)', self.timer, epoch, du, di)
+            _logger.debug("[%s] finished item epoch %d", self.timer, epoch)
+            _logger.info("[%s] finished epoch %d (|ΔP|=%.3f, |ΔQ|=%.3f)", self.timer, epoch, du, di)
             yield current
 
     def predict_for_user(self, user, items, ratings=None):
@@ -513,8 +529,9 @@ def predict_for_user(self, user, items, ratings=None):
             return scores
 
     def __str__(self):
-        return 'als.BiasedMF(features={}, regularization={})'.\
-            format(self.features, self.regularization)
+        return "als.BiasedMF(features={}, regularization={})".format(
+            self.features, self.regularization
+        )
 
 
 class ImplicitMF(MFPredictor):
@@ -561,10 +578,22 @@ class ImplicitMF(MFPredictor):
             Random number generator or state (see :func:`lenskit.util.random.rng`).
         progress: a :func:`tqdm.tqdm`-compatible progress bar function
     """
+
     timer = None
 
-    def __init__(self, features, *, iterations=20, reg=0.1, weight=40, use_ratings=False,
-                 method='cg', rng_spec=None, progress=None, save_user_features=True):
+    def __init__(
+        self,
+        features,
+        *,
+        iterations=20,
+        reg=0.1,
+        weight=40,
+        use_ratings=False,
+        method="cg",
+        rng_spec=None,
+        progress=None,
+        save_user_features=True,
+    ):
         self.features = features
         self.iterations = iterations
         self.reg = reg
@@ -582,14 +611,20 @@ def fit(self, ratings, **kwargs):
             pass
 
         if self.user_features_ is not None:
-            _logger.info('[%s] finished training model with %d features (|P|=%f, |Q|=%f)',
-                         self.timer, self.features,
-                         np.linalg.norm(self.user_features_, 'fro'),
-                         np.linalg.norm(self.item_features_, 'fro'))
+            _logger.info(
+                "[%s] finished training model with %d features (|P|=%f, |Q|=%f)",
+                self.timer,
+                self.features,
+                np.linalg.norm(self.user_features_, "fro"),
+                np.linalg.norm(self.item_features_, "fro"),
+            )
         else:
-            _logger.info('[%s] finished training model with %d features (|Q|=%f)',
-                         self.timer, self.features,
-                         np.linalg.norm(self.item_features_, 'fro'))
+            _logger.info(
+                "[%s] finished training model with %d features (|Q|=%f)",
+                self.timer,
+                self.features,
+                np.linalg.norm(self.item_features_, "fro"),
+            )
 
         # unpack the regularization
         if isinstance(self.reg, tuple):
@@ -605,10 +640,12 @@ def fit(self, ratings, **kwargs):
     def fit_iters(self, ratings, **kwargs):
         current, uctx, ictx = self._initial_model(ratings)
 
-        _logger.info('[%s] training implicit MF model with ALS for %d features',
-                     self.timer, self.features)
-        _logger.info('have %d observations for %d users and %d items',
-                     uctx.nnz, uctx.nrows, ictx.nrows)
+        _logger.info(
+            "[%s] training implicit MF model with ALS for %d features", self.timer, self.features
+        )
+        _logger.info(
+            "have %d observations for %d users and %d items", uctx.nnz, uctx.nrows, ictx.nrows
+        )
         for model in self._train_iters(current, uctx, ictx):
             self._save_model(model)
             yield self
@@ -624,37 +661,37 @@ def _save_model(self, model):
 
     def _train_iters(self, current, uctx, ictx):
         "Generator of training iterations."
-        if self.method == 'lu':
+        if self.method == "lu":
             train = _train_implicit_lu
-        elif self.method == 'cg':
+        elif self.method == "cg":
             train = _train_implicit_cg
         else:
-            raise ValueError('unknown solver ' + self.method)
+            raise ValueError("unknown solver " + self.method)
 
         if isinstance(self.reg, tuple):
             ureg, ireg = self.reg
         else:
             ureg = ireg = self.reg
 
-        for epoch in self.progress(range(self.iterations), desc='ImplicitMF', leave=False):
+        for epoch in self.progress(range(self.iterations), desc="ImplicitMF", leave=False):
             du = train(uctx, current.user_matrix, current.item_matrix, ureg)
-            _logger.debug('[%s] finished user epoch %d', self.timer, epoch)
+            _logger.debug("[%s] finished user epoch %d", self.timer, epoch)
             di = train(ictx, current.item_matrix, current.user_matrix, ireg)
-            _logger.debug('[%s] finished item epoch %d', self.timer, epoch)
-            _logger.info('[%s] finished epoch %d (|ΔP|=%.3f, |ΔQ|=%.3f)', self.timer, epoch, du, di)
+            _logger.debug("[%s] finished item epoch %d", self.timer, epoch)
+            _logger.info("[%s] finished epoch %d (|ΔP|=%.3f, |ΔQ|=%.3f)", self.timer, epoch, du, di)
             yield current
 
     def _initial_model(self, ratings):
         "Initialize a model and build contexts."
 
         if not self.use_ratings:
-            ratings = ratings[['user', 'item']]
+            ratings = ratings[["user", "item"]]
 
         rmat, users, items = sparse_ratings(ratings)
         n_users = len(users)
         n_items = len(items)
 
-        _logger.debug('setting up contexts')
+        _logger.debug("setting up contexts")
         # force values to exist
         if rmat.values is None:
             rmat.values = np.ones(rmat.nnz)
@@ -685,5 +722,6 @@ def predict_for_user(self, user, items, ratings=None):
             return self.score_by_ids(user, items)
 
     def __str__(self):
-        return 'als.ImplicitMF(features={}, reg={}, w={})'.\
-            format(self.features, self.reg, self.weight)
+        return "als.ImplicitMF(features={}, reg={}, w={})".format(
+            self.features, self.reg, self.weight
+        )
diff --git a/lenskit/algorithms/basic.py b/lenskit/algorithms/basic.py
index d76ceeec0..0e7147e4a 100644
--- a/lenskit/algorithms/basic.py
+++ b/lenskit/algorithms/basic.py
@@ -38,9 +38,9 @@ def __init__(self, selector=None):
         self.selector = selector
 
     def fit(self, ratings, **kwargs):
-        pop = ratings.groupby('item').user.count()
-        pop.name = 'score'
-        self.item_pop_ = pop.astype('float64')
+        pop = ratings.groupby("item").user.count()
+        pop.name = "score"
+        self.item_pop_ = pop.astype("float64")
 
         if self.selector is None:
             self.selector = UnratedItemCandidateSelector()
@@ -63,7 +63,7 @@ def recommend(self, user, n=None, candidates=None, ratings=None):
             return scores.nlargest(n).reset_index()
 
     def __str__(self):
-        return 'Popular'
+        return "Popular"
 
 
 class PopScore(Predictor):
@@ -84,26 +84,26 @@ class PopScore(Predictor):
             Item popularity scores.
     """
 
-    def __init__(self, score_method='quantile'):
+    def __init__(self, score_method="quantile"):
         self.score_method = score_method
 
     def fit(self, ratings, **kwargs):
-        _logger.info('counting item popularity')
-        scores = ratings['item'].value_counts()
-        if self.score_method == 'rank':
-            _logger.info('ranking %d items', len(scores))
+        _logger.info("counting item popularity")
+        scores = ratings["item"].value_counts()
+        if self.score_method == "rank":
+            _logger.info("ranking %d items", len(scores))
             scores = scores.rank().sort_index()
-        elif self.score_method == 'quantile':
-            _logger.info('computing quantiles for %d items', len(scores))
+        elif self.score_method == "quantile":
+            _logger.info("computing quantiles for %d items", len(scores))
             cmass = scores.sort_values()
             cmass = cmass.cumsum()
             cdens = cmass / scores.sum()
             scores = cdens.sort_index()
-        elif self.score_method == 'count':
-            _logger.info('scoring items with their rating counts')
+        elif self.score_method == "count":
+            _logger.info("scoring items with their rating counts")
             scores = scores.sort_index()
         else:
-            raise ValueError('invalid scoring method ' + repr(self.score_method))
+            raise ValueError("invalid scoring method " + repr(self.score_method))
 
         self.item_scores_ = scores
 
@@ -113,7 +113,7 @@ def predict_for_user(self, user, items, ratings=None):
         return self.item_scores_.reindex(items)
 
     def __str__(self):
-        return 'PopScore({})'.format(self.score_method)
+        return "PopScore({})".format(self.score_method)
 
 
 class Memorized(Predictor):
@@ -134,7 +134,7 @@ def fit(self, *args, **kwargs):
 
     def predict_for_user(self, user, items, ratings=None):
         uscores = self.scores[self.scores.user == user]
-        urates = uscores.set_index('item').rating
+        urates = uscores.set_index("item").rating
         return urates.reindex(items)
 
 
@@ -170,7 +170,7 @@ def predict_for_user(self, user, items, ratings=None):
         preds = None
 
         for algo in self.algorithms:
-            _logger.debug('predicting for %d items for user %s', len(remaining), user)
+            _logger.debug("predicting for %d items for user %s", len(remaining), user)
             aps = algo.predict_for_user(user, remaining, ratings=ratings)
             aps = aps[aps.notna()]
             if preds is None:
@@ -185,7 +185,7 @@ def predict_for_user(self, user, items, ratings=None):
 
     def __str__(self):
         str_algos = [str(algo) for algo in self.algorithms]
-        return 'Fallback([{}])'.format(', '.join(str_algos))
+        return "Fallback([{}])".format(", ".join(str_algos))
 
 
 class EmptyCandidateSelector(CandidateSelector):
@@ -196,7 +196,7 @@ class EmptyCandidateSelector(CandidateSelector):
     dtype_ = np.int64
 
     def fit(self, ratings, **kwarsg):
-        self.dtype_ = ratings['item'].dtype
+        self.dtype_ = ratings["item"].dtype
 
     def candidates(self, user, ratings=None):
         return np.array([], dtype=self.dtype_)
@@ -213,14 +213,15 @@ class UnratedItemCandidateSelector(CandidateSelector):
         user_items_(CSR):
             Items rated by each known user, as positions in the ``items`` index.
     """
+
     items_ = None
     users_ = None
     user_items_ = None
 
     def fit(self, ratings, **kwargs):
-        r2 = ratings[['user', 'item']]
+        r2 = ratings[["user", "item"]]
         sparse = sparse_ratings(r2)
-        _logger.info('trained unrated candidate selector for %d ratings', sparse.matrix.nnz)
+        _logger.info("trained unrated candidate selector for %d ratings", sparse.matrix.nnz)
         self.items_ = sparse.items
         self.users_ = sparse.users
         self.user_items_ = sparse.matrix
@@ -255,10 +256,11 @@ class AllItemsCandidateSelector(CandidateSelector):
     Attributes:
         items_(numpy.ndarray): All known items.
     """
+
     items_ = None
 
     def fit(self, ratings, **kwargs):
-        self.items_ = ratings['item'].unique()
+        self.items_ = ratings["item"].unique()
         return self
 
     def candidates(self, user, ratings=None):
@@ -290,7 +292,7 @@ def __init__(self, selector=None, rng_spec=None):
 
     def fit(self, ratings, **kwargs):
         self.selector.fit(ratings, **kwargs)
-        items = pd.DataFrame(ratings['item'].unique(), columns=['item'])
+        items = pd.DataFrame(ratings["item"].unique(), columns=["item"])
         self.items = items
         return self
 
@@ -301,12 +303,12 @@ def recommend(self, user, n=None, candidates=None, ratings=None):
             n = len(candidates)
 
         rng = self.rng_source(user)
-        c_df = pd.DataFrame(candidates, columns=['item'])
+        c_df = pd.DataFrame(candidates, columns=["item"])
         recs = c_df.sample(n, random_state=rng)
         return recs.reset_index(drop=True)
 
     def __str__(self):
-        return 'Random'
+        return "Random"
 
 
 class KnownRating(Predictor):
@@ -315,9 +317,9 @@ class KnownRating(Predictor):
     """
 
     def fit(self, ratings, **kwargs):
-        self.ratings = ratings.set_index(['user', 'item']).sort_index()
+        self.ratings = ratings.set_index(["user", "item"]).sort_index()
         return self
 
     def predict_for_user(self, user, items, ratings=None):
-        uscores = self.ratings.xs(user, level='user', drop_level=True)
+        uscores = self.ratings.xs(user, level="user", drop_level=True)
         return uscores.rating.reindex(items)
diff --git a/lenskit/algorithms/bias.py b/lenskit/algorithms/bias.py
index 2fa7e7b5f..09233a6a3 100644
--- a/lenskit/algorithms/bias.py
+++ b/lenskit/algorithms/bias.py
@@ -72,27 +72,27 @@ def fit(self, ratings, **kwargs):
         Returns:
             Bias: the fit bias object.
         """
-        _logger.info('building bias model for %d ratings', len(ratings))
+        _logger.info("building bias model for %d ratings", len(ratings))
         self.mean_ = ratings.rating.mean()
-        _logger.info('global mean: %.3f', self.mean_)
+        _logger.info("global mean: %.3f", self.mean_)
         nrates = ratings.assign(rating=lambda df: df.rating - self.mean_)
 
         if self.items:
-            group = nrates.groupby('item').rating
+            group = nrates.groupby("item").rating
             self.item_offsets_ = self._mean(group, self.item_damping)
-            self.item_offsets_.name = 'i_off'
-            _logger.info('computed means for %d items', len(self.item_offsets_))
+            self.item_offsets_.name = "i_off"
+            _logger.info("computed means for %d items", len(self.item_offsets_))
         else:
             self.item_offsets_ = None
 
         if self.users:
             if self.item_offsets_ is not None:
-                nrates = nrates.join(pd.DataFrame(self.item_offsets_), on='item', how='inner')
+                nrates = nrates.join(pd.DataFrame(self.item_offsets_), on="item", how="inner")
                 nrates = nrates.assign(rating=lambda df: df.rating - df.i_off)
 
-            self.user_offsets_ = self._mean(nrates.groupby('user').rating, self.user_damping)
-            self.user_offsets_.name = 'u_off'
-            _logger.info('computed means for %d users', len(self.user_offsets_))
+            self.user_offsets_ = self._mean(nrates.groupby("user").rating, self.user_damping)
+            self.user_offsets_.name = "u_off"
+            _logger.info("computed means for %d users", len(self.user_offsets_))
         else:
             self.user_offsets_ = None
 
@@ -117,38 +117,35 @@ def transform(self, ratings, *, indexes=False):
                 A data frame with ``rating`` transformed by subtracting
                 user-item bias prediction.
         """
-        rvps = ratings[['user', 'item']].copy()
-        rvps['rating'] = ratings['rating'] - self.mean_
+        rvps = ratings[["user", "item"]].copy()
+        rvps["rating"] = ratings["rating"] - self.mean_
         if self.item_offsets_ is not None:
-            rvps = rvps.join(self.item_offsets_, on='item', how='left')
-            rvps['rating'] -= rvps['i_off'].fillna(0)
-            rvps = rvps.drop(columns='i_off')
+            rvps = rvps.join(self.item_offsets_, on="item", how="left")
+            rvps["rating"] -= rvps["i_off"].fillna(0)
+            rvps = rvps.drop(columns="i_off")
         if self.user_offsets_ is not None:
-            rvps = rvps.join(self.user_offsets_, on='user', how='left')
-            rvps['rating'] -= rvps['u_off'].fillna(0)
-            rvps = rvps.drop(columns='u_off')
+            rvps = rvps.join(self.user_offsets_, on="user", how="left")
+            rvps["rating"] -= rvps["u_off"].fillna(0)
+            rvps = rvps.drop(columns="u_off")
         if indexes:
-            rvps['uidx'] = self.user_offsets_.index.get_indexer(rvps['user'])
-            rvps['iidx'] = self.item_offsets_.index.get_indexer(rvps['item'])
+            rvps["uidx"] = self.user_offsets_.index.get_indexer(rvps["user"])
+            rvps["iidx"] = self.item_offsets_.index.get_indexer(rvps["item"])
         return rvps
 
     def inverse_transform(self, ratings):
         """
         Transform ratings by removing the bias term.
         """
-        rvps = pd.DataFrame({
-            'user': ratings['user'],
-            'item': ratings['item']
-        })
-        rvps['rating'] = ratings['rating'] + self.mean_
+        rvps = pd.DataFrame({"user": ratings["user"], "item": ratings["item"]})
+        rvps["rating"] = ratings["rating"] + self.mean_
         if self.item_offsets_ is not None:
-            rvps = rvps.join(self.item_offsets_, on='item', how='left')
-            rvps['rating'] += rvps['i_off'].fillna(0)
-            del rvps['i_off']
+            rvps = rvps.join(self.item_offsets_, on="item", how="left")
+            rvps["rating"] += rvps["i_off"].fillna(0)
+            del rvps["i_off"]
         if self.user_offsets_ is not None:
-            rvps = rvps.join(self.user_offsets_, on='user', how='left')
-            rvps['rating'] += rvps['u_off'].fillna(0)
-            del rvps['u_off']
+            rvps = rvps.join(self.user_offsets_, on="user", how="left")
+            rvps["rating"] += rvps["u_off"].fillna(0)
+            del rvps["u_off"]
         return rvps
 
     def transform_user(self, ratings):
@@ -236,7 +233,7 @@ def predict_for_user(self, user, items, ratings=None):
             preds = preds + umean
         elif self.user_offsets_ is not None:
             umean = self.user_offsets_.get(user, 0.0)
-            _logger.debug('using mean(user %s) = %.3f', user, umean)
+            _logger.debug("using mean(user %s) = %.3f", user, umean)
             preds = preds + umean
 
         return preds
@@ -258,4 +255,4 @@ def _mean(self, series, damping):
             return series.mean()
 
     def __str__(self):
-        return 'Bias(ud={}, id={})'.format(self.user_damping, self.item_damping)
+        return "Bias(ud={}, id={})".format(self.user_damping, self.item_damping)
diff --git a/lenskit/algorithms/funksvd.py b/lenskit/algorithms/funksvd.py
index 990453480..0ffd3a493 100644
--- a/lenskit/algorithms/funksvd.py
+++ b/lenskit/algorithms/funksvd.py
@@ -9,6 +9,7 @@
 import numpy as np
 import numba as n
 from pandas.core.series import Series
+
 try:
     from numba.experimental import jitclass
 except ImportError:
@@ -21,16 +22,19 @@
 _logger = logging.getLogger(__name__)
 
 
-@jitclass([
-    ('user_features', n.double[:, :]),
-    ('item_features', n.double[:, :]),
-    ('feature_count', n.int32),
-    ('user_count', n.int32),
-    ('item_count', n.int32),
-    ('initial_value', n.double)
-])
+@jitclass(
+    [
+        ("user_features", n.double[:, :]),
+        ("item_features", n.double[:, :]),
+        ("feature_count", n.int32),
+        ("user_count", n.int32),
+        ("item_count", n.int32),
+        ("initial_value", n.double),
+    ]
+)
 class Model:
     "Internal model class for training SGD MF."
+
     def __init__(self, umat, imat):
         self.user_features = umat
         self.item_features = imat
@@ -52,13 +56,15 @@ def _fresh_model(nfeatures, nusers, nitems, init=0.1):
     return model
 
 
-@jitclass([
-    ('iter_count', n.int32),
-    ('lrate', n.double),
-    ('reg_term', n.double),
-    ('rmin', n.double),
-    ('rmax', n.double)
-])
+@jitclass(
+    [
+        ("iter_count", n.int32),
+        ("lrate", n.double),
+        ("reg_term", n.double),
+        ("rmin", n.double),
+        ("rmax", n.double),
+    ]
+)
 class _Params:
     def __init__(self, niters, lrate, reg, rmin, rmax):
         self.iter_count = niters
@@ -78,11 +84,7 @@ def make_params(niters, lrate, reg, range):
     return _Params(niters, lrate, reg, rmin, rmax)
 
 
-@jitclass([
-    ('est', n.double[:]),
-    ('feature', n.int32),
-    ('trail', n.double)
-])
+@jitclass([("est", n.double[:]), ("feature", n.int32), ("trail", n.double)])
 class _FeatContext:
     def __init__(self, est, feature, trail):
         self.est = est
@@ -90,13 +92,15 @@ def __init__(self, est, feature, trail):
         self.trail = trail
 
 
-@jitclass([
-    ('users', n.int32[:]),
-    ('items', n.int32[:]),
-    ('ratings', n.double[:]),
-    ('bias', n.double[:]),
-    ('n_samples', n.uint64)
-])
+@jitclass(
+    [
+        ("users", n.int32[:]),
+        ("items", n.int32[:]),
+        ("ratings", n.double[:]),
+        ("bias", n.double[:]),
+        ("n_samples", n.uint64),
+    ]
+)
 class Context:
     def __init__(self, users, items, ratings, bias):
         self.users = users
@@ -169,8 +173,7 @@ def train(ctx: Context, params: _Params, model: Model, timer):
         fc = _FeatContext(est, f, trail)
         rmse = _train_feature(ctx, params, model, fc)
         end = time.perf_counter()
-        _logger.info('[%s] finished feature %d (RMSE=%f) in %.2fs',
-                     timer, f, rmse, end - start)
+        _logger.info("[%s] finished feature %d (RMSE=%f) in %.2fs", timer, f, rmse, end - start)
 
         est = est + model.user_features[ctx.users, f] * model.item_features[ctx.items, f]
         est = np.maximum(est, params.rmin)
@@ -215,8 +218,18 @@ class FunkSVD(MFPredictor):
             The random state for shuffling the data prior to training.
     """
 
-    def __init__(self, features, iterations=100, *, lrate=0.001, reg=0.015,
-                 damping=5, range=None, bias=True, random_state=None):
+    def __init__(
+        self,
+        features,
+        iterations=100,
+        *,
+        lrate=0.001,
+        reg=0.015,
+        damping=5,
+        range=None,
+        bias=True,
+        random_state=None,
+    ):
         self.features = features
         self.iterations = iterations
         self.lrate = lrate
@@ -240,21 +253,21 @@ def fit(self, ratings, **kwargs):
         """
         util.check_env()
         timer = util.Stopwatch()
-        if 'rating' not in ratings:
-            _logger.warning('no rating column found, assuming rating values of 1.0')
+        if "rating" not in ratings:
+            _logger.warning("no rating column found, assuming rating values of 1.0")
             ratings = ratings.assign(rating=1.0)
 
         if self.bias:
-            _logger.info('[%s] fitting bias model', timer)
+            _logger.info("[%s] fitting bias model", timer)
             self.bias.fit(ratings)
 
-        _logger.info('[%s] preparing rating data for %d samples', timer, len(ratings))
-        _logger.debug('shuffling rating data')
+        _logger.info("[%s] preparing rating data for %d samples", timer, len(ratings))
+        _logger.debug("shuffling rating data")
         shuf = np.arange(len(ratings), dtype=np.int_)
         self.random.shuffle(shuf)
         ratings = ratings.iloc[shuf, :]
 
-        _logger.debug('[%s] indexing users and items', timer)
+        _logger.debug("[%s] indexing users and items", timer)
         uidx = pd.Index(ratings.user.unique())
         iidx = pd.Index(ratings.item.unique())
 
@@ -263,7 +276,7 @@ def fit(self, ratings, **kwargs):
         items = iidx.get_indexer(ratings.item).astype(np.int32)
         assert np.all(items >= 0)
 
-        _logger.debug('[%s] computing initial estimates', timer)
+        _logger.debug("[%s] computing initial estimates", timer)
         if self.bias:
             initial = pd.Series(self.bias.mean_, index=ratings.index, dtype=np.float_)
             ibias, initial = _align_add_bias(self.bias.item_offsets_, iidx, ratings.item, initial)
@@ -271,19 +284,18 @@ def fit(self, ratings, **kwargs):
         else:
             initial = pd.Series(0.0, index=ratings.index)
 
-        _logger.debug('have %d estimates for %d ratings', len(initial), len(ratings))
+        _logger.debug("have %d estimates for %d ratings", len(initial), len(ratings))
         assert len(initial) == len(ratings)
 
-        _logger.debug('[%s] initializing data structures', timer)
-        context = Context(users, items, ratings.rating.astype(np.float_).values,
-                          initial.values)
+        _logger.debug("[%s] initializing data structures", timer)
+        context = Context(users, items, ratings.rating.astype(np.float_).values, initial.values)
         params = make_params(self.iterations, self.lrate, self.reg, self.range)
 
         model = _fresh_model(self.features, len(uidx), len(iidx))
 
-        _logger.info('[%s] training biased MF model with %d features', timer, self.features)
+        _logger.info("[%s] training biased MF model with %d features", timer, self.features)
         train(context, params, model, timer)
-        _logger.info('finished model training in %s', timer)
+        _logger.info("finished model training in %s", timer)
 
         self.user_index_ = uidx
         self.item_index_ = iidx
@@ -307,5 +319,4 @@ def predict_for_user(self, user, items, ratings=None):
         return preds
 
     def __str__(self):
-        return 'FunkSVD(features={}, reg={})'.\
-            format(self.features, self.reg)
+        return "FunkSVD(features={}, reg={})".format(self.features, self.reg)
diff --git a/lenskit/algorithms/item_knn.py b/lenskit/algorithms/item_knn.py
index 23bb309ca..bca7616bb 100644
--- a/lenskit/algorithms/item_knn.py
+++ b/lenskit/algorithms/item_knn.py
@@ -71,8 +71,9 @@ def _trim_sim_block(nitems, bsp, bitems, block, min_sim, max_nbrs):
             sp, lep = block_csr.row_extent(r)
             lim = lep - sp
             if c != bsp + r and v >= min_sim:
-                eps[r] = kvp_minheap_insert(sp, eps[r], lim, c, v,
-                                            block_csr.colinds, block_csr.values)
+                eps[r] = kvp_minheap_insert(
+                    sp, eps[r], lim, c, v, block_csr.colinds, block_csr.values
+                )
         # we're done!
     return block_csr
 
@@ -195,10 +196,7 @@ def _predict_sum(model, nitems, nrange, ratings, rated, targets):
     return scores
 
 
-_predictors = {
-    'weighted-average': _predict_weighted_average,
-    'sum': _predict_sum
-}
+_predictors = {"weighted-average": _predict_weighted_average, "sum": _predict_sum}
 
 
 class ItemItem(Predictor):
@@ -256,15 +254,17 @@ class ItemItem(Predictor):
         user_index_(pandas.Index): the index of known user IDs for the rating matrix.
         rating_matrix_(matrix.CSR): the user-item rating matrix for looking up users' ratings.
     """
-    IGNORED_PARAMS = ['feedback']
-    EXTRA_PARAMS = ['center', 'aggregate', 'use_ratings']
 
-    AGG_SUM = intern('sum')
-    AGG_WA = intern('weighted-average')
+    IGNORED_PARAMS = ["feedback"]
+    EXTRA_PARAMS = ["center", "aggregate", "use_ratings"]
+
+    AGG_SUM = intern("sum")
+    AGG_WA = intern("weighted-average")
     RATING_AGGS = [AGG_WA]  # the aggregates that use rating values
 
-    def __init__(self, nnbrs, min_nbrs=1, min_sim=1.0e-6, save_nbrs=None, feedback='explicit',
-                 **kwargs):
+    def __init__(
+        self, nnbrs, min_nbrs=1, min_sim=1.0e-6, save_nbrs=None, feedback="explicit", **kwargs
+    ):
         self.nnbrs = nnbrs
         if self.nnbrs is not None and self.nnbrs < 1:
             self.nnbrs = -1
@@ -274,42 +274,48 @@ def __init__(self, nnbrs, min_nbrs=1, min_sim=1.0e-6, save_nbrs=None, feedback='
         self.min_sim = min_sim
         self.save_nbrs = save_nbrs
 
-        if feedback == 'explicit':
-            defaults = {
-                'center': True,
-                'aggregate': self.AGG_WA,
-                'use_ratings': True
-            }
-        elif feedback == 'implicit':
-            defaults = {
-                'center': False,
-                'aggregate': self.AGG_SUM,
-                'use_ratings': False
-            }
+        if feedback == "explicit":
+            defaults = {"center": True, "aggregate": self.AGG_WA, "use_ratings": True}
+        elif feedback == "implicit":
+            defaults = {"center": False, "aggregate": self.AGG_SUM, "use_ratings": False}
         else:
-            raise ValueError(f'invalid feedback mode: {feedback}')
+            raise ValueError(f"invalid feedback mode: {feedback}")
 
         defaults.update(kwargs)
-        self.center = defaults['center']
-        self.aggregate = intern(defaults['aggregate'])
-        self.use_ratings = defaults['use_ratings']
+        self.center = defaults["center"]
+        self.aggregate = intern(defaults["aggregate"])
+        self.use_ratings = defaults["use_ratings"]
 
         self._check_setup()
 
     def _check_setup(self):
         if not self.use_ratings:
             if self.center:
-                _logger.warning('item-item configured to ignore ratings, but ``center=True`` - likely bug')
-                warnings.warn(util.clean_str('''
+                _logger.warning(
+                    "item-item configured to ignore ratings, but ``center=True`` - likely bug"
+                )
+                warnings.warn(
+                    util.clean_str(
+                        """
                     item-item configured to ignore ratings, but ``center=True``.  This configuration
                     is unlikely to work well.
-                '''), ConfigWarning)
-            if self.aggregate == 'weighted-average':
-                _logger.warning('item-item configured to ignore ratings, but using weighted averages - likely bug')
-                warnings.warn(util.clean_str('''
+                """
+                    ),
+                    ConfigWarning,
+                )
+            if self.aggregate == "weighted-average":
+                _logger.warning(
+                    "item-item configured to ignore ratings, but using weighted averages - likely bug"
+                )
+                warnings.warn(
+                    util.clean_str(
+                        """
                     item-item configured to ignore ratings, but use weighted averages.  This configuration
                     is unlikely to work well.
-                '''), ConfigWarning)
+                """
+                    ),
+                    ConfigWarning,
+                )
 
     def fit(self, ratings, **kwargs):
         """
@@ -328,29 +334,38 @@ def fit(self, ratings, **kwargs):
         # 2. Compute similarities with pairwise dot products
         self._timer = util.Stopwatch()
 
-        _logger.debug('[%s] beginning fit, memory use %s', self._timer, util.max_memory())
-        _logger.debug('[%s] using CSR kernel %s', self._timer, csrk.name)
+        _logger.debug("[%s] beginning fit, memory use %s", self._timer, util.max_memory())
+        _logger.debug("[%s] using CSR kernel %s", self._timer, csrk.name)
 
         init_rmat, users, items = sparse_ratings(ratings)
         n_items = len(items)
-        _logger.info('[%s] made sparse matrix for %d items (%d ratings from %d users)',
-                     self._timer, len(items), init_rmat.nnz, len(users))
-        _logger.debug('[%s] made matrix, memory use %s', self._timer, util.max_memory())
+        _logger.info(
+            "[%s] made sparse matrix for %d items (%d ratings from %d users)",
+            self._timer,
+            len(items),
+            init_rmat.nnz,
+            len(users),
+        )
+        _logger.debug("[%s] made matrix, memory use %s", self._timer, util.max_memory())
 
         rmat, item_means = self._mean_center(ratings, init_rmat, items)
-        _logger.debug('[%s] centered, memory use %s', self._timer, util.max_memory())
+        _logger.debug("[%s] centered, memory use %s", self._timer, util.max_memory())
 
         rmat = self._normalize(rmat)
-        _logger.debug('[%s] normalized, memory use %s', self._timer, util.max_memory())
+        _logger.debug("[%s] normalized, memory use %s", self._timer, util.max_memory())
 
-        _logger.info('[%s] computing similarity matrix', self._timer)
+        _logger.info("[%s] computing similarity matrix", self._timer)
         smat = self._compute_similarities(rmat)
-        _logger.debug('[%s] computed, memory use %s', self._timer, util.max_memory())
+        _logger.debug("[%s] computed, memory use %s", self._timer, util.max_memory())
 
-        _logger.info('[%s] got neighborhoods for %d of %d items',
-                     self._timer, np.sum(np.diff(smat.rowptrs) > 0), n_items)
+        _logger.info(
+            "[%s] got neighborhoods for %d of %d items",
+            self._timer,
+            np.sum(np.diff(smat.rowptrs) > 0),
+            n_items,
+        )
 
-        _logger.info('[%s] computed %d neighbor pairs', self._timer, smat.nnz)
+        _logger.info("[%s] computed %d neighbor pairs", self._timer, smat.nnz)
 
         self.item_index_ = items
         self.item_means_ = item_means
@@ -360,8 +375,8 @@ def fit(self, ratings, **kwargs):
         self.rating_matrix_ = init_rmat
         # create an inverted similarity matrix for efficient scanning
         self._sim_inv_ = smat.transpose()
-        _logger.info('[%s] transposed matrix for optimization', self._timer)
-        _logger.debug('[%s] done, memory use %s', self._timer, util.max_memory())
+        _logger.info("[%s] transposed matrix for optimization", self._timer)
+        _logger.debug("[%s] done, memory use %s", self._timer, util.max_memory())
 
         return self
 
@@ -369,16 +384,17 @@ def _mean_center(self, ratings, rmat, items):
         if not self.center:
             return rmat, None
 
-        item_means = ratings.groupby('item').rating.mean()
+        item_means = ratings.groupby("item").rating.mean()
         item_means = item_means.reindex(items).values
         mcvals = rmat.values - item_means[rmat.colinds]
         nmat = rmat.copy(False)
         nmat.values = mcvals
         if np.allclose(nmat.values, 0):
-            _logger.warn('normalized ratings are zero, centering is not recommended')
-            warnings.warn("Ratings seem to have the same value, centering is not recommended.",
-                          DataWarning)
-        _logger.info('[%s] computed means for %d items', self._timer, len(item_means))
+            _logger.warn("normalized ratings are zero, centering is not recommended")
+            warnings.warn(
+                "Ratings seem to have the same value, centering is not recommended.", DataWarning
+            )
+        _logger.info("[%s] computed means for %d items", self._timer, len(item_means))
         return nmat, item_means
 
     def _normalize(self, rmat):
@@ -393,7 +409,7 @@ def _normalize(self, rmat):
         assert norm_mat.shape[1] == rmat.shape[1]
         # and reset NaN
         norm_mat.data[np.isnan(norm_mat.data)] = 0
-        _logger.info('[%s] normalized rating matrix columns', self._timer)
+        _logger.info("[%s] normalized rating matrix columns", self._timer)
         return CSR.from_scipy(norm_mat, False)
 
     def _compute_similarities(self, rmat):
@@ -404,11 +420,16 @@ def _compute_similarities(self, rmat):
             m_nbrs = 0
 
         bounds = _make_blocks(nitems, 1000)
-        _logger.info('[%s] splitting %d items (%d ratings) into %d blocks',
-                     self._timer, nitems, trmat.nnz, len(bounds))
+        _logger.info(
+            "[%s] splitting %d items (%d ratings) into %d blocks",
+            self._timer,
+            nitems,
+            trmat.nnz,
+            len(bounds),
+        )
         blocks = [trmat.subset_rows(sp, ep) for (sp, ep) in bounds]
 
-        _logger.info('[%s] computing similarities', self._timer)
+        _logger.info("[%s] computing similarities", self._timer)
         ptrs = List(bounds)
         nbs = List(blocks)
         if not nbs:
@@ -420,11 +441,17 @@ def _compute_similarities(self, rmat):
 
         nnz = sum(b.nnz for b in s_blocks)
         tot_rows = sum(b.nrows for b in s_blocks)
-        _logger.info('[%s] computed %d similarities for %d items in %d blocks',
-                     self._timer, nnz, tot_rows, len(s_blocks))
+        _logger.info(
+            "[%s] computed %d similarities for %d items in %d blocks",
+            self._timer,
+            nnz,
+            tot_rows,
+            len(s_blocks),
+        )
         row_nnzs = np.concatenate([b.row_nnzs() for b in s_blocks])
-        assert len(row_nnzs) == nitems, \
-            'only have {} rows for {} items'.format(len(row_nnzs), nitems)
+        assert len(row_nnzs) == nitems, "only have {} rows for {} items".format(
+            len(row_nnzs), nitems
+        )
 
         smat = CSR.empty(nitems, nitems, row_nnzs)
         start = 0
@@ -433,29 +460,38 @@ def _compute_similarities(self, rmat):
             end = start + bnr
             v_sp = smat.rowptrs[start]
             v_ep = smat.rowptrs[end]
-            _logger.debug('block %d (%d:%d) has %d entries, storing in %d:%d',
-                          bi, start, end, b.nnz, v_sp, v_ep)
+            _logger.debug(
+                "block %d (%d:%d) has %d entries, storing in %d:%d",
+                bi,
+                start,
+                end,
+                b.nnz,
+                v_sp,
+                v_ep,
+            )
             smat.colinds[v_sp:v_ep] = b.colinds
             smat.values[v_sp:v_ep] = b.values
             start = end
 
-        _logger.info('[%s] sorting similarity matrix with %d entries', self._timer, smat.nnz)
+        _logger.info("[%s] sorting similarity matrix with %d entries", self._timer, smat.nnz)
         _sort_nbrs(smat)
 
         return smat
 
     def predict_for_user(self, user, items, ratings=None):
-        _logger.debug('predicting %d items for user %s', len(items), user)
+        _logger.debug("predicting %d items for user %s", len(items), user)
         if ratings is None:
             if user not in self.user_index_:
-                _logger.debug('user %s missing, returning empty predictions', user)
+                _logger.debug("user %s missing, returning empty predictions", user)
                 return pd.Series(np.nan, index=items)
             upos = self.user_index_.get_loc(user)
-            ratings = pd.Series(self.rating_matrix_.row_vs(upos),
-                                index=pd.Index(self.item_index_[self.rating_matrix_.row_cs(upos)]))
+            ratings = pd.Series(
+                self.rating_matrix_.row_vs(upos),
+                index=pd.Index(self.item_index_[self.rating_matrix_.row_cs(upos)]),
+            )
 
         if not ratings.index.is_unique:
-            wmsg = 'user {} has duplicate ratings, this is likely to cause problems'.format(user)
+            wmsg = "user {} has duplicate ratings, this is likely to cause problems".format(user)
             warnings.warn(wmsg, DataWarning)
 
         # set up rating array
@@ -465,7 +501,7 @@ def predict_for_user(self, user, items, ratings=None):
         m_rates = ratings[ri_pos >= 0]
         ri_pos = ri_pos[ri_pos >= 0]
         rate_v = np.full(n_items, np.nan, dtype=np.float_)
-        rated = np.zeros(n_items, dtype='bool')
+        rated = np.zeros(n_items, dtype="bool")
         # mean-center the rating array
         if self.center:
             rate_v[ri_pos] = m_rates.values - self.item_means_[ri_pos]
@@ -473,7 +509,7 @@ def predict_for_user(self, user, items, ratings=None):
             rate_v[ri_pos] = m_rates.values
         rated[ri_pos] = True
 
-        _logger.debug('user %s: %d of %d rated items in model', user, len(ri_pos), len(ratings))
+        _logger.debug("user %s: %d of %d rated items in model", user, len(ri_pos), len(ratings))
         assert np.sum(np.logical_not(np.isnan(rate_v))) == len(ri_pos)
         assert np.all(np.isnan(rate_v) == np.logical_not(rated))
 
@@ -481,7 +517,7 @@ def predict_for_user(self, user, items, ratings=None):
         # ipos will be an array of item indices
         i_pos = self.item_index_.get_indexer(items)
         i_pos = i_pos[i_pos >= 0]
-        _logger.debug('user %s: %d of %d requested items in model', user, len(i_pos), len(items))
+        _logger.debug("user %s: %d of %d requested items in model", user, len(i_pos), len(items))
 
         # now we take a first pass through the data to count _viable_ targets
         # This computes the number of neighbors (and their weight sum) for
@@ -493,8 +529,9 @@ def predict_for_user(self, user, items, ratings=None):
         i_cts = i_cts[viable]
         i_sums = i_sums[viable]
         i_nbrs = i_nbrs[viable]
-        _logger.debug('user %s: %d of %d requested items possibly reachable',
-                      user, len(i_pos), len(items))
+        _logger.debug(
+            "user %s: %d of %d requested items possibly reachable", user, len(i_pos), len(items)
+        )
 
         # look for some fast paths
         if self.aggregate == self.AGG_SUM and self.min_sim >= 0:
@@ -507,15 +544,21 @@ def predict_for_user(self, user, items, ratings=None):
             else:
                 fast_items = i_pos
                 fast_scores = i_sums
-                slow_items = np.array([], dtype='i4')
+                slow_items = np.array([], dtype="i4")
 
-            _logger.debug('user %s: using fast-path similarity sum for %d items',
-                          user, len(fast_items))
+            _logger.debug(
+                "user %s: using fast-path similarity sum for %d items", user, len(fast_items)
+            )
 
             if len(slow_items):
-                iscores = _predict_sum(self.sim_matrix_, len(self.item_index_),
-                                       (self.min_nbrs, self.nnbrs),
-                                       rate_v, rated, slow_items)
+                iscores = _predict_sum(
+                    self.sim_matrix_,
+                    len(self.item_index_),
+                    (self.min_nbrs, self.nnbrs),
+                    rate_v,
+                    rated,
+                    slow_items,
+                )
             else:
                 iscores = np.full(len(self.item_index_), np.nan)
             iscores[fast_items] = fast_scores
@@ -527,19 +570,30 @@ def predict_for_user(self, user, items, ratings=None):
             fast_scores = rate_v[i_nbrs[fast_mask]]
             if self.min_sim < 0:
                 fast_scores *= np.sign(i_sums[fast_mask])
-            _logger.debug('user %s: fast-pathed %d scores', user, len(fast_scores))
+            _logger.debug("user %s: fast-pathed %d scores", user, len(fast_scores))
 
             slow_items = i_pos[i_cts > 1]
-            iscores = _predict_weighted_average(self.sim_matrix_, len(self.item_index_),
-                                                (self.min_nbrs, self.nnbrs),
-                                                rate_v, rated, slow_items)
+            iscores = _predict_weighted_average(
+                self.sim_matrix_,
+                len(self.item_index_),
+                (self.min_nbrs, self.nnbrs),
+                rate_v,
+                rated,
+                slow_items,
+            )
             iscores[fast_items] = fast_scores
         else:
             # now compute the predictions
-            _logger.debug('user %s: taking the slow path', user)
+            _logger.debug("user %s: taking the slow path", user)
             agg = _predictors[self.aggregate]
-            iscores = agg(self.sim_matrix_, len(self.item_index_), (self.min_nbrs, self.nnbrs),
-                          rate_v, rated, i_pos)
+            iscores = agg(
+                self.sim_matrix_,
+                len(self.item_index_),
+                (self.min_nbrs, self.nnbrs),
+                rate_v,
+                rated,
+                i_pos,
+            )
 
         if self.center and self.aggregate in self.RATING_AGGS:
             iscores += self.item_means_
@@ -547,8 +601,9 @@ def predict_for_user(self, user, items, ratings=None):
         results = pd.Series(iscores, index=self.item_index_)
         results = results.reindex(items, fill_value=np.nan)
 
-        _logger.debug('user %s: predicted for %d of %d items',
-                      user, results.notna().sum(), len(items))
+        _logger.debug(
+            "user %s: predicted for %d of %d items", user, results.notna().sum(), len(items)
+        )
 
         return results
 
@@ -557,7 +612,7 @@ def _count_viable_targets(self, targets, rated):
         # initialize counts to zero
         counts = np.zeros(len(self.item_index_), dtype=np.int32)
         sums = np.zeros(len(self.item_index_))
-        last_nbrs = np.full(len(self.item_index_), -1, 'i4')
+        last_nbrs = np.full(len(self.item_index_), -1, "i4")
         # count the number of times each item is reachable from the neighborhood
         for ri in rated:
             nbrs = self._sim_inv_.row_cs(ri)
@@ -570,14 +625,14 @@ def _count_viable_targets(self, targets, rated):
 
     def __getstate__(self):
         state = dict(self.__dict__)
-        if '_sim_inv_' in state and not in_share_context():
-            del state['_sim_inv_']
+        if "_sim_inv_" in state and not in_share_context():
+            del state["_sim_inv_"]
         return state
 
     def __setstate__(self, state):
         self.__dict__.update(state)
-        if hasattr(self, 'sim_matrix_') and not hasattr(self, '_sim_inv_'):
+        if hasattr(self, "sim_matrix_") and not hasattr(self, "_sim_inv_"):
             self._sim_inv_ = self.sim_matrix_.transpose()
 
     def __str__(self):
-        return 'ItemItem(nnbrs={}, msize={})'.format(self.nnbrs, self.save_nbrs)
+        return "ItemItem(nnbrs={}, msize={})".format(self.nnbrs, self.save_nbrs)
diff --git a/lenskit/algorithms/mf_common.py b/lenskit/algorithms/mf_common.py
index 59addf891..d7dcec646 100644
--- a/lenskit/algorithms/mf_common.py
+++ b/lenskit/algorithms/mf_common.py
@@ -93,7 +93,7 @@ def score_by_ids(self, user, items, u_features=None):
         if u_features is None:
             uidx = self.lookup_user(user)
             if uidx < 0:
-                _logger.debug('user %s not in model', user)
+                _logger.debug("user %s not in model", user)
                 return pd.Series(np.nan, index=items)
         else:
             uidx = None
@@ -106,7 +106,7 @@ def score_by_ids(self, user, items, u_features=None):
         good_iidx = iidx[good]
 
         # multiply
-        _logger.debug('scoring %d items for user %s', len(good_items), user)
+        _logger.debug("scoring %d items for user %s", len(good_items), user)
         rv = self.score(uidx, good_iidx, u_features)
 
         res = pd.Series(rv, index=good_items)
diff --git a/lenskit/algorithms/ranking.py b/lenskit/algorithms/ranking.py
index 91a75b614..7d8b3e515 100644
--- a/lenskit/algorithms/ranking.py
+++ b/lenskit/algorithms/ranking.py
@@ -40,6 +40,7 @@ class TopN(Recommender, Predictor):
 
     def __init__(self, predictor, selector=None):
         from .basic import UnratedItemCandidateSelector
+
         self.predictor = predictor
         self.selector = selector if selector is not None else UnratedItemCandidateSelector()
 
@@ -59,8 +60,8 @@ def fit(self, ratings, **kwargs):
         return self
 
     def fit_iters(self, ratings, **kwargs):
-        if not hasattr(self.predictor, 'fit_iters'):
-            raise AttributeError('predictor has no method fit_iters')
+        if not hasattr(self.predictor, "fit_iters"):
+            raise AttributeError("predictor has no method fit_iters")
 
         self.selector.fit(ratings, **kwargs)
         pred = self.predictor
@@ -80,8 +81,8 @@ def recommend(self, user, n=None, candidates=None, ratings=None):
             scores = scores.nlargest(n)
         else:
             scores = scores.sort_values(ascending=False)
-        scores.name = 'score'
-        scores.index.name = 'item'
+        scores.name = "score"
+        scores.index.name = "item"
         return scores.reset_index()
 
     def predict(self, pairs, ratings=None):
@@ -91,7 +92,7 @@ def predict_for_user(self, user, items, ratings=None):
         return self.predictor.predict_for_user(user, items, ratings)
 
     def __str__(self):
-        return 'TopN/' + str(self.predictor)
+        return "TopN/" + str(self.predictor)
 
 
 class PlackettLuce(Recommender):
@@ -112,10 +113,11 @@ class PlackettLuce(Recommender):
 
     def __init__(self, predictor, selector=None, *, rng_spec=None):
         from .basic import UnratedItemCandidateSelector, Popular
+
         if isinstance(predictor, TopN):
-            _log.warn('wrapping Top-N in PlackettLuce, candidate selector probably redundant')
+            _log.warn("wrapping Top-N in PlackettLuce, candidate selector probably redundant")
         elif isinstance(predictor, Popular):
-            _log.warn('wrapping Popular in Plackett-Luce, consider PopScore')
+            _log.warn("wrapping Popular in Plackett-Luce, consider PopScore")
 
         self.predictor = predictor
         self.selector = selector if selector is not None else UnratedItemCandidateSelector()
@@ -142,6 +144,6 @@ def recommend(self, user, n=None, candidates=None, ratings=None):
             scores = scores.nlargest(n)
         else:
             scores = scores.sort_values(ascending=False)
-        scores.name = 'score'
-        scores.index.name = 'item'
+        scores.name = "score"
+        scores.index.name = "item"
         return scores.reset_index()
diff --git a/lenskit/algorithms/svd.py b/lenskit/algorithms/svd.py
index 7253ab1a0..26ca725d7 100644
--- a/lenskit/algorithms/svd.py
+++ b/lenskit/algorithms/svd.py
@@ -5,6 +5,7 @@
 
 try:
     from sklearn.decomposition import TruncatedSVD
+
     SKL_AVAILABLE = True
 except ImportError:
     TruncatedSVD = None
@@ -29,9 +30,9 @@ class BiasedSVD(Predictor):
     example and for cases where you want to evaluate a pure SVD implementation.
     """
 
-    def __init__(self, features, *, damping=5, bias=True, algorithm='randomized'):
+    def __init__(self, features, *, damping=5, bias=True, algorithm="randomized"):
         if TruncatedSVD is None:
-            raise ImportError('sklearn.decomposition')
+            raise ImportError("sklearn.decomposition")
         if bias is True:
             self.bias = Bias(damping=damping)
         else:
@@ -40,14 +41,14 @@ def __init__(self, features, *, damping=5, bias=True, algorithm='randomized'):
 
     def fit(self, ratings, **kwargs):
         timer = Stopwatch()
-        _log.info('[%s] computing bias', timer)
+        _log.info("[%s] computing bias", timer)
         self.bias.fit(ratings)
 
         g_bias = self.bias.mean_
         u_bias = self.bias.user_offsets_
         i_bias = self.bias.item_offsets_
 
-        _log.info('[%s] sparsifying and normalizing matrix', timer)
+        _log.info("[%s] sparsifying and normalizing matrix", timer)
         r_mat, users, items = sparse_ratings(ratings, users=u_bias.index, items=i_bias.index)
         # global
         r_mat.values -= g_bias
@@ -56,7 +57,7 @@ def fit(self, ratings, **kwargs):
         r_mat = r_mat.to_scipy()
         assert r_mat.shape == (len(u_bias), len(i_bias))
 
-        _log.info('[%s] training SVD (k=%d)', timer, self.factorization.n_components)
+        _log.info("[%s] training SVD (k=%d)", timer, self.factorization.n_components)
         Xt = self.factorization.fit_transform(r_mat)
         self.user_components_ = Xt
         _log.info("finished model training in %s", timer)
@@ -83,15 +84,15 @@ def predict_for_user(self, user, items, ratings=None):
 
     def get_params(self, deep=True):
         params = {
-            'features': self.factorization.n_components,
-            'algorithm': self.factorization.algorithm
+            "features": self.factorization.n_components,
+            "algorithm": self.factorization.algorithm,
         }
         if deep and self.bias:
             for k, v in self.bias.get_params(True).items():
-                params['bias__' + k] = v
+                params["bias__" + k] = v
         else:
-            params['bias'] = self.bias
+            params["bias"] = self.bias
         return params
 
     def __str__(self):
-        return f'BiasedSVD({self.factorization})'
+        return f"BiasedSVD({self.factorization})"
diff --git a/lenskit/algorithms/user_knn.py b/lenskit/algorithms/user_knn.py
index 17c66b8a7..2793ab06f 100644
--- a/lenskit/algorithms/user_knn.py
+++ b/lenskit/algorithms/user_knn.py
@@ -130,36 +130,29 @@ class UserUser(Predictor):
         rating_matrix_(matrix.CSR): Normalized user-item rating matrix.
         transpose_matrix_(matrix.CSR): Transposed un-normalized rating matrix.
     """
-    IGNORED_PARAMS = ['feedback']
-    EXTRA_PARAMS = ['center', 'aggregate', 'use_ratings']
-    AGG_SUM = intern('sum')
-    AGG_WA = intern('weighted-average')
+
+    IGNORED_PARAMS = ["feedback"]
+    EXTRA_PARAMS = ["center", "aggregate", "use_ratings"]
+    AGG_SUM = intern("sum")
+    AGG_WA = intern("weighted-average")
     RATING_AGGS = [AGG_WA]
 
-    def __init__(self, nnbrs, min_nbrs=1, min_sim=0, feedback='explicit', **kwargs):
+    def __init__(self, nnbrs, min_nbrs=1, min_sim=0, feedback="explicit", **kwargs):
         self.nnbrs = nnbrs
         self.min_nbrs = min_nbrs
         self.min_sim = min_sim
 
-        if feedback == 'explicit':
-            defaults = {
-                'center': True,
-                'aggregate': self.AGG_WA,
-                'use_ratings': True
-            }
-        elif feedback == 'implicit':
-            defaults = {
-                'center': False,
-                'aggregate': self.AGG_SUM,
-                'use_ratings': False
-            }
+        if feedback == "explicit":
+            defaults = {"center": True, "aggregate": self.AGG_WA, "use_ratings": True}
+        elif feedback == "implicit":
+            defaults = {"center": False, "aggregate": self.AGG_SUM, "use_ratings": False}
         else:
-            raise ValueError(f'invalid feedback mode: {feedback}')
+            raise ValueError(f"invalid feedback mode: {feedback}")
 
         defaults.update(kwargs)
-        self.center = defaults['center']
-        self.aggregate = intern(defaults['aggregate'])
-        self.use_ratings = defaults['use_ratings']
+        self.center = defaults["center"]
+        self.aggregate = intern(defaults["aggregate"])
+        self.use_ratings = defaults["use_ratings"]
 
     def fit(self, ratings, **kwargs):
         """
@@ -174,7 +167,7 @@ def fit(self, ratings, **kwargs):
 
         # mean-center ratings
         if self.center:
-            umeans = uir.normalize_rows('center')
+            umeans = uir.normalize_rows("center")
         else:
             umeans = None
 
@@ -184,7 +177,7 @@ def fit(self, ratings, **kwargs):
         # L2-normalize ratings so dot product is cosine
         if uir.values is None or not self.use_ratings:
             uir.values = np.full(uir.nnz, 1.0)
-        uir.normalize_rows('unit')
+        uir.normalize_rows("unit")
 
         self.rating_matrix_ = uir
         self.user_index_ = users
@@ -210,11 +203,11 @@ def predict_for_user(self, user, items, ratings=None):
         """
 
         watch = util.Stopwatch()
-        items = pd.Index(items, name='item')
+        items = pd.Index(items, name="item")
 
         ratings, umean = self._get_user_data(user, ratings)
         if ratings is None:
-            return pd.Series(index=items, dtype='float64')
+            return pd.Series(index=items, dtype="float64")
         assert len(ratings) == len(self.item_index_)  # ratings is a dense vector
 
         # now ratings is normalized to be a mean-centered unit vector
@@ -225,7 +218,7 @@ def predict_for_user(self, user, items, ratings=None):
         if user in self.user_index_:
             nsims[self.user_index_.get_loc(user)] = 0
 
-        _logger.debug('computed user similarities')
+        _logger.debug("computed user similarities")
 
         results = np.full(len(items), np.nan, dtype=np.float_)
         ri_pos = self.item_index_.get_indexer(items.values)
@@ -234,17 +227,26 @@ def predict_for_user(self, user, items, ratings=None):
         elif self.aggregate == self.AGG_SUM:
             agg = _agg_sum
         else:
-            raise ValueError('invalid aggregate ' + self.aggregate)
-
-        _score(ri_pos, results, self.transpose_matrix_, nsims,
-               self.nnbrs, self.min_sim, self.min_nbrs, agg)
+            raise ValueError("invalid aggregate " + self.aggregate)
+
+        _score(
+            ri_pos,
+            results,
+            self.transpose_matrix_,
+            nsims,
+            self.nnbrs,
+            self.min_sim,
+            self.min_nbrs,
+            agg,
+        )
         if self.aggregate in self.RATING_AGGS:
             results += umean
 
-        results = pd.Series(results, index=items, name='prediction')
+        results = pd.Series(results, index=items, name="prediction")
 
-        _logger.debug('scored %d of %d items for %s in %s',
-                      results.notna().sum(), len(items), user, watch)
+        _logger.debug(
+            "scored %d of %d items for %s in %s", results.notna().sum(), len(items), user, watch
+        )
         return results
 
     def _get_user_data(self, user, ratings):
@@ -257,10 +259,10 @@ def _get_user_data(self, user, ratings):
                 ratings = rmat.row(upos)
                 umean = self.user_means_[upos] if self.user_means_ is not None else 0
             except KeyError:
-                _logger.warning('user %d has no ratings and none provided', user)
+                _logger.warning("user %d has no ratings and none provided", user)
                 return None, 0
         else:
-            _logger.debug('using provided ratings for user %d', user)
+            _logger.debug("using provided ratings for user %d", user)
             if self.center:
                 umean = ratings.mean()
                 ratings = ratings - umean
@@ -281,4 +283,4 @@ def __setstate__(self, state):
         self.aggregate = intern(self.aggregate)
 
     def __str__(self):
-        return 'UserUser(nnbrs={}, min_sim={})'.format(self.nnbrs, self.min_sim)
+        return "UserUser(nnbrs={}, min_sim={})".format(self.nnbrs, self.min_sim)
diff --git a/lenskit/batch/_predict.py b/lenskit/batch/_predict.py
index 405a0dbd7..e0e93cae6 100644
--- a/lenskit/batch/_predict.py
+++ b/lenskit/batch/_predict.py
@@ -12,10 +12,16 @@
 def _predict_user(model, req):
     user, udf = req
     watch = util.Stopwatch()
-    res = model.predict_for_user(user, udf['item'])
-    res = pd.DataFrame({'user': user, 'item': res.index, 'prediction': res.values})
-    _logger.debug('%s produced %f/%d predictions for %s in %s',
-                  model, res.prediction.notna().sum(), len(udf), user, watch)
+    res = model.predict_for_user(user, udf["item"])
+    res = pd.DataFrame({"user": user, "item": res.index, "prediction": res.values})
+    _logger.debug(
+        "%s produced %f/%d predictions for %s in %s",
+        model,
+        res.prediction.notna().sum(),
+        len(udf),
+        user,
+        watch,
+    )
     return res
 
 
@@ -62,24 +68,24 @@ def predict(algo, pairs, *, n_jobs=None, **kwargs):
             the prediction results. If ``pairs`` contains a `rating` column, this
             result will also contain a `rating` column.
     """
-    if n_jobs is None and 'nprocs' in kwargs:
-        n_jobs = kwargs['nprocs']
-        warnings.warn('nprocs is deprecated, use n_jobs', DeprecationWarning)
+    if n_jobs is None and "nprocs" in kwargs:
+        n_jobs = kwargs["nprocs"]
+        warnings.warn("nprocs is deprecated, use n_jobs", DeprecationWarning)
 
-    nusers = pairs['user'].nunique()
+    nusers = pairs["user"].nunique()
 
     timer = util.Stopwatch()
     with util.parallel.invoker(algo, _predict_user, n_jobs=n_jobs) as worker:
         del algo  # maybe free some memory
 
-        _logger.info('generating %d predictions for %d users (setup took %s)',
-                     len(pairs), nusers, timer)
+        _logger.info(
+            "generating %d predictions for %d users (setup took %s)", len(pairs), nusers, timer
+        )
         timer = util.Stopwatch()
-        results = worker.map((user, udf.copy()) for (user, udf) in pairs.groupby('user'))
+        results = worker.map((user, udf.copy()) for (user, udf) in pairs.groupby("user"))
         results = pd.concat(results)
-        _logger.info('generated %d predictions for %d users in %s',
-                     len(pairs), nusers, timer)
+        _logger.info("generated %d predictions for %d users in %s", len(pairs), nusers, timer)
 
-    if 'rating' in pairs:
-        return pairs.join(results.set_index(['user', 'item']), on=('user', 'item'))
+    if "rating" in pairs:
+        return pairs.join(results.set_index(["user", "item"]), on=("user", "item"))
     return results
diff --git a/lenskit/batch/_recommend.py b/lenskit/batch/_recommend.py
index 5de6089c5..4d2667346 100644
--- a/lenskit/batch/_recommend.py
+++ b/lenskit/batch/_recommend.py
@@ -14,14 +14,13 @@
 def _recommend_user(algo, req):
     user, n, candidates = req
 
-    _logger.debug('generating recommendations for %s', user)
+    _logger.debug("generating recommendations for %s", user)
     watch = util.Stopwatch()
     res = algo.recommend(user, n, candidates)
-    _logger.debug('%s recommended %d/%s items for %s in %s',
-                  str(algo), len(res), n, user, watch)
+    _logger.debug("%s recommended %d/%s items for %s in %s", str(algo), len(res), n, user, watch)
 
-    res['user'] = user
-    res['rank'] = np.arange(1, len(res) + 1)
+    res["user"] = user
+    res["rank"] = np.arange(1, len(res) + 1)
 
     return res.reset_index(drop=True)
 
@@ -63,29 +62,28 @@ def recommend(algo, users, n, candidates=None, *, n_jobs=None, **kwargs):
         ``score``, and any other columns returned by the recommender.
     """
 
-    if n_jobs is None and 'nprocs' in kwargs:
-        n_jobs = kwargs['nprocs']
-        warnings.warn('nprocs is deprecated, use n_jobs', DeprecationWarning)
+    if n_jobs is None and "nprocs" in kwargs:
+        n_jobs = kwargs["nprocs"]
+        warnings.warn("nprocs is deprecated, use n_jobs", DeprecationWarning)
 
     if not isinstance(algo, PersistedModel):
         rec_algo = Recommender.adapt(algo)
         if candidates is None and rec_algo is not algo:
-            warnings.warn('no candidates provided and algo is not a recommender, unlikely to work')
+            warnings.warn("no candidates provided and algo is not a recommender, unlikely to work")
         algo = rec_algo
         del rec_algo
 
-    if 'ratings' in kwargs:
-        warnings.warn('Providing ratings to recommend is not supported', DeprecationWarning)
+    if "ratings" in kwargs:
+        warnings.warn("Providing ratings to recommend is not supported", DeprecationWarning)
 
     candidates = __standard_cand_fun(candidates)
 
     with util.parallel.invoker(algo, _recommend_user, n_jobs=n_jobs) as worker:
-        _logger.info('recommending with %s for %d users (n_jobs=%s)',
-                     str(algo), len(users), n_jobs)
+        _logger.info("recommending with %s for %d users (n_jobs=%s)", str(algo), len(users), n_jobs)
         del algo
         timer = util.Stopwatch()
         results = worker.map((user, n, candidates(user)) for user in users)
         results = pd.concat(results, ignore_index=True, copy=False)
-        _logger.info('recommended for %d users in %s', len(users), timer)
+        _logger.info("recommended for %d users in %s", len(users), timer)
 
     return results
diff --git a/lenskit/batch/_train.py b/lenskit/batch/_train.py
index 89dd8fcf5..df6c78b40 100644
--- a/lenskit/batch/_train.py
+++ b/lenskit/batch/_train.py
@@ -9,10 +9,10 @@
 
 def _train_and_save(algo, file, ratings, kwargs):
     "Worker for subprocess model training"
-    _log.info('training %s on %d ratings', algo, len(ratings))
+    _log.info("training %s on %d ratings", algo, len(ratings))
     timer = Stopwatch()
     algo.fit(ratings, **kwargs)
-    _log.info('trained %s in %s', algo, timer)
+    _log.info("trained %s in %s", algo, timer)
     if file is None:
         return persist_binpickle(algo).transfer()
     else:
diff --git a/lenskit/crossfold.py b/lenskit/crossfold.py
index 3945731dc..8e9ece320 100644
--- a/lenskit/crossfold.py
+++ b/lenskit/crossfold.py
@@ -10,10 +10,10 @@
 import pandas as pd
 from . import util
 
-TTPair = namedtuple('TTPair', ['train', 'test'])
-TTPair.__doc__ = 'Train-test pair (named tuple).'
-TTPair.train.__doc__ = 'Train data for this pair.'
-TTPair.test.__doc__ = 'Test data for this pair.'
+TTPair = namedtuple("TTPair", ["train", "test"])
+TTPair.__doc__ = "Train-test pair (named tuple)."
+TTPair.train.__doc__ = "Train data for this pair."
+TTPair.test.__doc__ = "Test data for this pair."
 
 _logger = logging.getLogger(__name__)
 
@@ -36,7 +36,7 @@ def partition_rows(data, partitions, *, rng_spec=None):
     """
 
     confirm_unique_index(data)
-    _logger.info('partitioning %d ratings into %d partitions', len(data), partitions)
+    _logger.info("partitioning %d ratings into %d partitions", len(data), partitions)
 
     # create an array of indexes
     rows = np.arange(len(data))
@@ -48,7 +48,7 @@ def partition_rows(data, partitions, *, rng_spec=None):
     # convert each partition into a split
     for i, ts in enumerate(test_sets):
         test = data.iloc[ts, :]
-        trains = test_sets[:i] + test_sets[(i + 1):]
+        trains = test_sets[:i] + test_sets[(i + 1) :]
         train_idx = np.concatenate(trains)
         train = data.iloc[train_idx, :]
         yield TTPair(train, test)
@@ -105,8 +105,12 @@ def sample_rows(data, partitions, size, disjoint=True, *, rng_spec=None):
         return TTPair(train, test)
 
     if disjoint and partitions * size >= len(data):
-        _logger.warning('wanted %d disjoint splits of %d each, but only have %d rows; partitioning',
-                        partitions, size, len(data))
+        _logger.warning(
+            "wanted %d disjoint splits of %d each, but only have %d rows; partitioning",
+            partitions,
+            size,
+            len(data),
+        )
         return partition_rows(data, partitions)
 
     # create an array of indexes
@@ -115,11 +119,11 @@ def sample_rows(data, partitions, size, disjoint=True, *, rng_spec=None):
     rng = util.rng(rng_spec)
 
     if disjoint:
-        _logger.info('creating %d disjoint samples of size %d', partitions, size)
+        _logger.info("creating %d disjoint samples of size %d", partitions, size)
         ips = _disjoint_sample(rows, partitions, size, rng)
 
     else:
-        _logger.info('taking %d samples of size %d', partitions, size)
+        _logger.info("taking %d samples of size %d", partitions, size)
         ips = _n_samples(rows, partitions, size, rng)
 
     return (TTPair(data.iloc[ip.train, :], data.iloc[ip.test, :]) for ip in ips)
@@ -132,8 +136,8 @@ def _disjoint_sample(xs, n, size, rng):
     # convert each partition into a split
     for i in range(n):
         start = i * size
-        test = xs[start:start + size]
-        train = np.concatenate((xs[:start], xs[start + size:]))
+        test = xs[start : start + size]
+        train = np.concatenate((xs[:start], xs[start + size :]))
         yield TTPair(train, test)
 
 
@@ -190,6 +194,7 @@ class SampleFrac(PartitionMethod):
     Args:
         frac(float): the fraction items to select for testing.
     """
+
     def __init__(self, frac, rng_spec=None):
         self.fraction = frac
         self.rng = util.rng(rng_spec, legacy=True)
@@ -207,12 +212,12 @@ class LastN(PartitionMethod):
         n(int): The number of test items to select.
     """
 
-    def __init__(self, n, col='timestamp'):
+    def __init__(self, n, col="timestamp"):
         self.n = n
         self.column = col
 
     def __call__(self, udf):
-        return udf.sort_values(self.column).iloc[-self.n:]
+        return udf.sort_values(self.column).iloc[-self.n :]
 
 
 class LastFrac(PartitionMethod):
@@ -222,7 +227,8 @@ class LastFrac(PartitionMethod):
     Args:
         frac(double): the fraction of items to select for testing.
     """
-    def __init__(self, frac, col='timestamp'):
+
+    def __init__(self, frac, col="timestamp"):
         self.fraction = frac
         self.column = col
 
@@ -248,10 +254,11 @@ def partition_users(data, partitions: int, method: PartitionMethod, *, rng_spec=
     """
 
     confirm_unique_index(data)
-    user_col = data['user']
+    user_col = data["user"]
     users = user_col.unique()
-    _logger.info('partitioning %d rows for %d users into %d partitions',
-                 len(data), len(users), partitions)
+    _logger.info(
+        "partitioning %d rows for %d users into %d partitions", len(data), len(users), partitions
+    )
 
     # create an array of indexes into user row
     rows = np.arange(len(users))
@@ -265,21 +272,22 @@ def partition_users(data, partitions: int, method: PartitionMethod, *, rng_spec=
         # get our users!
         test_us = users[ts]
         # sample the data frame
-        _logger.info('fold %d: selecting test ratings', i)
-        ugf = data[data.user.isin(test_us)].groupby('user')
+        _logger.info("fold %d: selecting test ratings", i)
+        ugf = data[data.user.isin(test_us)].groupby("user")
         test = ugf.apply(method)
         # get rid of the group index
         test = test.reset_index(0, drop=True)
         # now test is indexed on the data frame! so we can get the rest
-        _logger.info('fold %d: partitioning training data', i)
+        _logger.info("fold %d: partitioning training data", i)
         mask = pd.Series(True, index=data.index)
         mask[test.index] = False
         train = data[mask]
         yield TTPair(train, test)
 
 
-def sample_users(data, partitions: int, size: int, method: PartitionMethod, disjoint=True, *,
-                 rng_spec=None):
+def sample_users(
+    data, partitions: int, size: int, method: PartitionMethod, disjoint=True, *, rng_spec=None
+):
     """
     Create train-test partitions by sampling users.
     This function does not care what kind of data is in `data`, so long as it is
@@ -304,16 +312,16 @@ def sample_users(data, partitions: int, size: int, method: PartitionMethod, disj
     confirm_unique_index(data)
     rng = util.rng(rng_spec, legacy=True)
 
-    user_col = data['user']
+    user_col = data["user"]
     users = user_col.unique()
     if disjoint and partitions * size >= len(users):
-        _logger.warning('cannot take %d disjoint samples of size %d from %d users',
-                        partitions, size, len(users))
+        _logger.warning(
+            "cannot take %d disjoint samples of size %d from %d users", partitions, size, len(users)
+        )
         yield from partition_users(data, partitions, method)
         return
 
-    _logger.info('sampling %d users into %d partitions (n=%d)',
-                 len(users), partitions, size)
+    _logger.info("sampling %d users into %d partitions (n=%d)", len(users), partitions, size)
 
     if disjoint:
         rng.shuffle(users)
@@ -322,12 +330,12 @@ def sample_users(data, partitions: int, size: int, method: PartitionMethod, disj
     for i in range(partitions):
         # get our test users!
         if disjoint:
-            test_us = users[i*size:(i+1)*size]
+            test_us = users[i * size : (i + 1) * size]
         else:
             test_us = rng.choice(users, size, False)
 
         # sample the data frame
-        test = data[data.user.isin(test_us)].groupby('user').apply(method)
+        test = data[data.user.isin(test_us)].groupby("user").apply(method)
         # get rid of the group index
         test = test.reset_index(0, drop=True)
         # now test is indexed on the data frame! so we can get the rest
@@ -354,10 +362,12 @@ def simple_test_pair(ratings, n_users=1000, n_rates=5, f_rates=None):
 
 def confirm_unique_index(data):
     """Confirms dataframe has unique index values, and if not,
-     throws ValueError with helpful log message"""
+    throws ValueError with helpful log message"""
 
     if not data.index.is_unique:
         _logger.error("Index has duplicate values")
-        _logger.info("If index values do not matter, consider running " +
-                     ".reset_index() on the dataframe before partitioning")
-        raise ValueError('Index is not uniquely valued')
+        _logger.info(
+            "If index values do not matter, consider running "
+            + ".reset_index() on the dataframe before partitioning"
+        )
+        raise ValueError("Index is not uniquely valued")
diff --git a/lenskit/data/matrix.py b/lenskit/data/matrix.py
index 21b26de6d..04567ffed 100644
--- a/lenskit/data/matrix.py
+++ b/lenskit/data/matrix.py
@@ -12,7 +12,7 @@
 
 _log = logging.getLogger(__name__)
 
-RatingMatrix = namedtuple('RatingMatrix', ['matrix', 'users', 'items'])
+RatingMatrix = namedtuple("RatingMatrix", ["matrix", "users", "items"])
 RatingMatrix.__doc__ = """
 A rating matrix with associated indices.
 
@@ -41,30 +41,32 @@ def sparse_ratings(ratings, scipy=False, *, users=None, items=None):
             a named tuple containing the sparse matrix, user index, and item index.
     """
     if users is None:
-        users = pd.Index(np.unique(ratings.user), name='user')
+        users = pd.Index(np.unique(ratings.user), name="user")
 
     if items is None:
-        items = pd.Index(np.unique(ratings.item), name='item')
+        items = pd.Index(np.unique(ratings.item), name="item")
 
-    _log.debug('creating matrix with %d ratings for %d items by %d users',
-               len(ratings), len(items), len(users))
+    _log.debug(
+        "creating matrix with %d ratings for %d items by %d users",
+        len(ratings),
+        len(items),
+        len(users),
+    )
 
     row_ind = users.get_indexer(ratings.user).astype(np.intc)
     if np.any(row_ind < 0):
-        raise ValueError('provided user index does not cover all users')
+        raise ValueError("provided user index does not cover all users")
     col_ind = items.get_indexer(ratings.item).astype(np.intc)
     if np.any(col_ind < 0):
-        raise ValueError('provided item index does not cover all users')
+        raise ValueError("provided item index does not cover all users")
 
-    if 'rating' in ratings.columns:
+    if "rating" in ratings.columns:
         vals = np.require(ratings.rating.values, np.float64)
     else:
         vals = None
 
-    if scipy == 'coo':
-        matrix = sps.coo_matrix(
-            (vals, (row_ind, col_ind)), shape=(len(users), len(items))
-        )
+    if scipy == "coo":
+        matrix = sps.coo_matrix((vals, (row_ind, col_ind)), shape=(len(users), len(items)))
     else:
         matrix = CSR.from_coo(row_ind, col_ind, vals, (len(users), len(items)))
         if scipy:
diff --git a/lenskit/datasets/__init__.py b/lenskit/datasets/__init__.py
index 262a14652..f2fb2a9dc 100644
--- a/lenskit/datasets/__init__.py
+++ b/lenskit/datasets/__init__.py
@@ -1 +1 @@
-from .movielens import * # noqa: F403
+from .movielens import *  # noqa: F403
diff --git a/lenskit/datasets/fetch.py b/lenskit/datasets/fetch.py
index 0be178a2e..ed8299bc8 100644
--- a/lenskit/datasets/fetch.py
+++ b/lenskit/datasets/fetch.py
@@ -5,17 +5,17 @@
 from pathlib import Path
 import logging
 
-_log = logging.getLogger('lenskit.datasets.fetch')
+_log = logging.getLogger("lenskit.datasets.fetch")
 
 ML_LOC = "http://files.grouplens.org/datasets/movielens/"
 ML_DATASETS = {
-    'ml-100k': 'ml-100k/u.data',
-    'ml-1m': 'ml-1m/ratings.dat',
-    'ml-10m': 'ml-10M100K/ratings.dat',
-    'ml-20m': 'ml-20m/ratings.csv',
-    'ml-25m': 'ml-25m/ratings.csv',
-    'ml-latest': 'ml-latest/ratings.csv',
-    'ml-latest-small': 'ml-latest-small/ratings.csv',
+    "ml-100k": "ml-100k/u.data",
+    "ml-1m": "ml-1m/ratings.dat",
+    "ml-10m": "ml-10M100K/ratings.dat",
+    "ml-20m": "ml-20m/ratings.csv",
+    "ml-25m": "ml-25m/ratings.csv",
+    "ml-latest": "ml-latest/ratings.csv",
+    "ml-latest-small": "ml-latest-small/ratings.csv",
 }
 
 
@@ -37,46 +37,48 @@ def fetch_ml(name: str, base_dir: Path):
         base_dir:
             The base directory into which data should be extracted.
     """
-    zipname = f'{name}.zip'
+    zipname = f"{name}.zip"
     zipfile = base_dir / zipname
     zipurl = ML_LOC + zipname
 
     test_file = base_dir / ML_DATASETS[name]
     if test_file.exists():
-        _log.info(test_file, 'already exists')
+        _log.info(test_file, "already exists")
         return
 
-    _log.info('downloading data set %s', name)
-    with zipfile.open('wb') as zf:
+    _log.info("downloading data set %s", name)
+    with zipfile.open("wb") as zf:
         res = urlopen(zipurl)
         block = res.read(8 * 1024 * 1024)
         while len(block):
-            _log.debug('received %d bytes', len(block))
+            _log.debug("received %d bytes", len(block))
             zf.write(block)
             block = res.read(8 * 1024 * 1024)
 
-    _log.info('unpacking data set')
-    with ZipFile(zipfile, 'r') as zf:
+    _log.info("unpacking data set")
+    with ZipFile(zipfile, "r") as zf:
         zf.extractall(base_dir)
 
 
 def _fetch_main():
     logging.basicConfig(stream=sys.stderr, level=logging.INFO)
     parser = argparse.ArgumentParser()
-    parser.add_argument('name', help='the name of the dataset to fetch')
-    parser.add_argument('--data-dir', metavar='DIR', help='save extracted data to DIR', default='data')
+    parser.add_argument("name", help="the name of the dataset to fetch")
+    parser.add_argument(
+        "--data-dir", metavar="DIR", help="save extracted data to DIR", default="data"
+    )
     args = parser.parse_args()
 
     name = args.name
-    _log.info('fetching data set %s', name)
+    _log.info("fetching data set %s", name)
     dir = Path(args.data_dir)
-    _log.info('extracting data to %s', dir)
-    if name.startswith('ml-'):
+    _log.info("extracting data to %s", dir)
+    if name.startswith("ml-"):
         fetch_ml(name, dir)
     else:
-        _log.error('unknown data set %s', name)
-        raise ValueError('invalid data set')
+        _log.error("unknown data set %s", name)
+        raise ValueError("invalid data set")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     _fetch_main()
diff --git a/lenskit/datasets/movielens.py b/lenskit/datasets/movielens.py
index 01bca6ba0..5ba34556e 100644
--- a/lenskit/datasets/movielens.py
+++ b/lenskit/datasets/movielens.py
@@ -13,17 +13,17 @@
 _log = logging.getLogger(__name__)
 
 __doctest_skip__ = []
-if not os.path.exists('data/ml-100k'):
-    __doctest_skip__.append('ML100K.*')
-if not os.path.exists('data/ml-20m'):
-    __doctest_skip__.append('MovieLens.tag_genome')
-if not os.path.exists('data/ml-1m.*'):
-    __doctest_skip__.append('ML1M.*')
-if not os.path.exists('data/ml-10M100K'):
-    __doctest_skip__.append('ML10M.*')
-    __doctest_skip__.append('MLM.*')
+if not os.path.exists("data/ml-100k"):
+    __doctest_skip__.append("ML100K.*")
+if not os.path.exists("data/ml-20m"):
+    __doctest_skip__.append("MovieLens.tag_genome")
+if not os.path.exists("data/ml-1m.*"):
+    __doctest_skip__.append("ML1M.*")
+if not os.path.exists("data/ml-10M100K"):
+    __doctest_skip__.append("ML10M.*")
+    __doctest_skip__.append("MLM.*")
 
-__all__ = ['MovieLens', 'ML100K', 'ML1M', 'ML10M']
+__all__ = ["MovieLens", "ML100K", "ML1M", "ML10M"]
 
 
 class MovieLens:
@@ -35,7 +35,7 @@ class MovieLens:
         path(str or pathlib.Path): Path to the directory containing the data set.
     """
 
-    def __init__(self, path='data/ml-20m'):
+    def __init__(self, path="data/ml-20m"):
         self.path = Path(path)
 
     @cached
@@ -55,15 +55,18 @@ def ratings(self):
         [100004 rows x 4 columns]
         """
 
-        fn = self.path / 'ratings.csv'
-        ratings = pd.read_csv(fn, dtype={
-            'movieId': np.int32,
-            'userId': np.int32,
-            'rating': np.float64,
-            'timestamp': np.int32
-        })
-        ratings.rename(columns={'userId': 'user', 'movieId': 'item'}, inplace=True)
-        _log.debug('loaded %s, takes %d bytes', fn, ratings.memory_usage().sum())
+        fn = self.path / "ratings.csv"
+        ratings = pd.read_csv(
+            fn,
+            dtype={
+                "movieId": np.int32,
+                "userId": np.int32,
+                "rating": np.float64,
+                "timestamp": np.int32,
+            },
+        )
+        ratings.rename(columns={"userId": "user", "movieId": "item"}, inplace=True)
+        _log.debug("loaded %s, takes %d bytes", fn, ratings.memory_usage().sum())
         return ratings
 
     @cached
@@ -84,15 +87,18 @@ def movies(self):
         [9125 rows x 2 columns]
         """
 
-        fn = self.path / 'movies.csv'
-        movies = pd.read_csv(fn, dtype={
-            'movieId': np.int32,
-            'title': object,
-            'genres': object,
-        })
-        movies.rename(columns={'movieId': 'item'}, inplace=True)
-        movies.set_index('item', inplace=True)
-        _log.debug('loaded %s, takes %d bytes', fn, movies.memory_usage().sum())
+        fn = self.path / "movies.csv"
+        movies = pd.read_csv(
+            fn,
+            dtype={
+                "movieId": np.int32,
+                "title": object,
+                "genres": object,
+            },
+        )
+        movies.rename(columns={"movieId": "item"}, inplace=True)
+        movies.set_index("item", inplace=True)
+        _log.debug("loaded %s, takes %d bytes", fn, movies.memory_usage().sum())
         return movies
 
     @cached
@@ -114,15 +120,13 @@ def links(self):
         [9125 rows x 2 columns]
         """
 
-        fn = self.path / 'links.csv'
-        links = pd.read_csv(fn, dtype={
-            'movieId': np.int32,
-            'imdbId': np.int64,
-            'tmdbId': pd.Int64Dtype()
-        })
-        links.rename(columns={'movieId': 'item'}, inplace=True)
-        links.set_index('item', inplace=True)
-        _log.debug('loaded %s, takes %d bytes', fn, links.memory_usage().sum())
+        fn = self.path / "links.csv"
+        links = pd.read_csv(
+            fn, dtype={"movieId": np.int32, "imdbId": np.int64, "tmdbId": pd.Int64Dtype()}
+        )
+        links.rename(columns={"movieId": "item"}, inplace=True)
+        links.set_index("item", inplace=True)
+        _log.debug("loaded %s, takes %d bytes", fn, links.memory_usage().sum())
         return links
 
     @cached
@@ -143,15 +147,18 @@ def tags(self):
         [1296 rows x 4 columns]
         """
 
-        fn = self.path / 'tags.csv'
-        tags = pd.read_csv(fn, dtype={
-            'movieId': np.int32,
-            'userId': np.int32,
-            'tag': object,
-            'timestamp': np.int32,
-        })
-        tags.rename(columns={'userId': 'user', 'movieId': 'item'}, inplace=True)
-        _log.debug('loaded %s, takes %d bytes', fn, tags.memory_usage().sum())
+        fn = self.path / "tags.csv"
+        tags = pd.read_csv(
+            fn,
+            dtype={
+                "movieId": np.int32,
+                "userId": np.int32,
+                "tag": object,
+                "timestamp": np.int32,
+            },
+        )
+        tags.rename(columns={"userId": "user", "movieId": "item"}, inplace=True)
+        _log.debug("loaded %s, takes %d bytes", fn, tags.memory_usage().sum())
         return tags
 
     @cached
@@ -173,19 +180,22 @@ def tag_genome(self):
         [10381 rows x 1128 columns]
         """
 
-        fn = self.path / 'genome-scores.csv'
-        tags = pd.read_csv(self.path / 'genome-tags.csv')
-        tags = tags.set_index('tagId')
-        tags = tags['tag'].astype('category')
-        genome = pd.read_csv(fn, dtype={
-            'movieId': np.int32,
-            'tagId': np.int32,
-            'relevance': np.float64,
-        })
-        genome.rename(columns={'userId': 'user', 'movieId': 'item'}, inplace=True)
-        genome = genome.join(tags, on='tagId')
-        genome = genome.pivot(index='item', columns='tag', values='relevance')
-        _log.debug('loaded %s, takes %d bytes', fn, genome.memory_usage().sum())
+        fn = self.path / "genome-scores.csv"
+        tags = pd.read_csv(self.path / "genome-tags.csv")
+        tags = tags.set_index("tagId")
+        tags = tags["tag"].astype("category")
+        genome = pd.read_csv(
+            fn,
+            dtype={
+                "movieId": np.int32,
+                "tagId": np.int32,
+                "relevance": np.float64,
+            },
+        )
+        genome.rename(columns={"userId": "user", "movieId": "item"}, inplace=True)
+        genome = genome.join(tags, on="tagId")
+        genome = genome.pivot(index="item", columns="tag", values="relevance")
+        _log.debug("loaded %s, takes %d bytes", fn, genome.memory_usage().sum())
         return genome
 
 
@@ -195,13 +205,13 @@ class ML100K:
     the more current data sets loaded by :class:`MovieLens`.
     """
 
-    def __init__(self, path='data/ml-100k'):
+    def __init__(self, path="data/ml-100k"):
         self.path = Path(path)
 
     @property
     def available(self):
         "Query whether the data set exists."
-        return (self.path / 'u.data').exists()
+        return (self.path / "u.data").exists()
 
     @cached
     def ratings(self):
@@ -219,12 +229,15 @@ def ratings(self):
         ...
         [100000 rows x 4 columns]
         """
-        fn = self.path / 'u.data'
-        ratings = pd.read_csv(fn, sep='\t', header=None,
-                              names=['user', 'item', 'rating', 'timestamp'],
-                              dtype={'user': np.int32, 'item': np.int32,
-                                     'rating': np.float32, 'timestamp': np.int32})
-        _log.debug('loaded %s', fn)
+        fn = self.path / "u.data"
+        ratings = pd.read_csv(
+            fn,
+            sep="\t",
+            header=None,
+            names=["user", "item", "rating", "timestamp"],
+            dtype={"user": np.int32, "item": np.int32, "rating": np.float32, "timestamp": np.int32},
+        )
+        _log.debug("loaded %s", fn)
         return ratings
 
     @cached
@@ -244,13 +257,16 @@ def users(self):
         ...
         [943 rows x 4 columns]
         """
-        fn = self.path / 'u.user'
-        users = pd.read_csv(fn, sep='|', header=None,
-                            names=['user', 'age', 'gender', 'occupation', 'zip'],
-                            dtype={'user': np.int32, 'age': np.int8,
-                                   'occupation': 'category'})
-        _log.debug('loaded %s', fn)
-        return users.set_index('user')
+        fn = self.path / "u.user"
+        users = pd.read_csv(
+            fn,
+            sep="|",
+            header=None,
+            names=["user", "age", "gender", "occupation", "zip"],
+            dtype={"user": np.int32, "age": np.int8, "occupation": "category"},
+        )
+        _log.debug("loaded %s", fn)
+        return users.set_index("user")
 
     @cached
     def movies(self):
@@ -269,17 +285,37 @@ def movies(self):
         ...
         [1682 rows x 23 columns]
         """
-        fn = self.path / 'u.item'
+        fn = self.path / "u.item"
         genres = [
-            'unknown', 'Action', 'Adventure', 'Animation',
-            "Children's", 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
-            'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi',
-            'Thriller', 'War', 'Western'
+            "unknown",
+            "Action",
+            "Adventure",
+            "Animation",
+            "Children's",
+            "Comedy",
+            "Crime",
+            "Documentary",
+            "Drama",
+            "Fantasy",
+            "Film-Noir",
+            "Horror",
+            "Musical",
+            "Mystery",
+            "Romance",
+            "Sci-Fi",
+            "Thriller",
+            "War",
+            "Western",
         ]
-        items = pd.read_csv(fn, sep='|', header=None, encoding='latin1',
-                            names=['item', 'title', 'release', 'vidrelease', 'imdb'] + genres)
-        _log.debug('loaded %s', fn)
-        return items.set_index('item')
+        items = pd.read_csv(
+            fn,
+            sep="|",
+            header=None,
+            encoding="latin1",
+            names=["item", "title", "release", "vidrelease", "imdb"] + genres,
+        )
+        _log.debug("loaded %s", fn)
+        return items.set_index("item")
 
 
 class MLM:
@@ -306,13 +342,16 @@ def ratings(self):
         ...
         [10000054 rows x 4 columns]
         """
-        fn = self.path / 'ratings.dat'
-        ratings = pd.read_csv(fn, sep=':', header=None,
-                              names=['user', '_ui', 'item', '_ir', 'rating', '_rt', 'timestamp'],
-                              usecols=[0, 2, 4, 6],
-                              dtype={'user': np.int32, 'item': np.int32,
-                                     'rating': np.float32, 'timestamp': np.int32})
-        _log.debug('loaded %s', fn)
+        fn = self.path / "ratings.dat"
+        ratings = pd.read_csv(
+            fn,
+            sep=":",
+            header=None,
+            names=["user", "_ui", "item", "_ir", "rating", "_rt", "timestamp"],
+            usecols=[0, 2, 4, 6],
+            dtype={"user": np.int32, "item": np.int32, "rating": np.float32, "timestamp": np.int32},
+        )
+        _log.debug("loaded %s", fn)
         return ratings
 
     @cached
@@ -332,13 +371,17 @@ def movies(self):
         ...
         [10681 rows x 2 columns]
         """
-        fn = self.path / 'movies.dat'
-        movies = pd.read_csv(fn, sep=':', header=None,
-                             names=['item', '_ir', 'title', '_tg', 'genres'],
-                             usecols=[0, 2, 4],
-                             dtype={'item': np.int32})
-        movies.set_index('item', inplace=True)
-        _log.debug('loaded %s', fn)
+        fn = self.path / "movies.dat"
+        movies = pd.read_csv(
+            fn,
+            sep=":",
+            header=None,
+            names=["item", "_ir", "title", "_tg", "genres"],
+            usecols=[0, 2, 4],
+            dtype={"item": np.int32},
+        )
+        movies.set_index("item", inplace=True)
+        _log.debug("loaded %s", fn)
         return movies
 
 
@@ -346,7 +389,8 @@ class ML10M(MLM):
     """
     MovieLens 10M100K data set.
     """
-    def __init__(self, path='data/ml-10M100K'):
+
+    def __init__(self, path="data/ml-10M100K"):
         super().__init__(path)
 
 
@@ -359,7 +403,7 @@ class ML1M(MLM):
        with the 10M data set.
     """
 
-    def __init__(self, path='data/ml-1m'):
+    def __init__(self, path="data/ml-1m"):
         super().__init__(path)
 
     @cached
@@ -379,13 +423,15 @@ def users(self):
         ...
         [6040 rows x 3 columns]
         """
-        fn = self.path / 'users.dat'
-        users = pd.read_csv(fn, sep=':', header=None,
-                            names=['user', '_ug', 'gender', '_ga', 'age',
-                                   '_ao', 'occupation', '_oz', 'zip'],
-                            usecols=[0, 2, 4, 8],
-                            dtype={'user': np.int32, 'gender': 'category', 'age': np.int8,
-                                   'timestamp': np.int32})
-        users.set_index('user', inplace=True)
-        _log.debug('loaded %s', fn)
+        fn = self.path / "users.dat"
+        users = pd.read_csv(
+            fn,
+            sep=":",
+            header=None,
+            names=["user", "_ug", "gender", "_ga", "age", "_ao", "occupation", "_oz", "zip"],
+            usecols=[0, 2, 4, 8],
+            dtype={"user": np.int32, "gender": "category", "age": np.int8, "timestamp": np.int32},
+        )
+        users.set_index("user", inplace=True)
+        _log.debug("loaded %s", fn)
         return users
diff --git a/lenskit/math/solve.py b/lenskit/math/solve.py
index dfbf966bd..8f12ec364 100644
--- a/lenskit/math/solve.py
+++ b/lenskit/math/solve.py
@@ -11,19 +11,23 @@
 
 __ffi = cffi.FFI()
 
-__uplo_U = np.array([ord('U')], dtype=np.int8)
-__uplo_L = np.array([ord('L')], dtype=np.int8)
-__trans_N = np.array([ord('N')], dtype=np.int8)
-__trans_T = np.array([ord('T')], dtype=np.int8)
-__trans_C = np.array([ord('C')], dtype=np.int8)
-__diag_U = np.array([ord('U')], dtype=np.int8)
-__diag_N = np.array([ord('N')], dtype=np.int8)
+__uplo_U = np.array([ord("U")], dtype=np.int8)
+__uplo_L = np.array([ord("L")], dtype=np.int8)
+__trans_N = np.array([ord("N")], dtype=np.int8)
+__trans_T = np.array([ord("T")], dtype=np.int8)
+__trans_C = np.array([ord("C")], dtype=np.int8)
+__diag_U = np.array([ord("U")], dtype=np.int8)
+__diag_N = np.array([ord("N")], dtype=np.int8)
 __inc_1 = np.ones(1, dtype=np.int32)
 
-__dtrsv = __ffi.cast("void (*) (char*, char*, char*, int*, double*, int*, double*, int*)",
-                     get_cython_function_address("scipy.linalg.cython_blas", "dtrsv"))
-__dposv = __ffi.cast("void (*) (char*, int*, int*, double*, int*, double*, int*, int*)",
-                     get_cython_function_address("scipy.linalg.cython_lapack", "dposv"))
+__dtrsv = __ffi.cast(
+    "void (*) (char*, char*, char*, int*, double*, int*, double*, int*)",
+    get_cython_function_address("scipy.linalg.cython_blas", "dtrsv"),
+)
+__dposv = __ffi.cast(
+    "void (*) (char*, int*, int*, double*, int*, double*, int*, int*)",
+    get_cython_function_address("scipy.linalg.cython_lapack", "dposv"),
+)
 
 
 @n.njit
@@ -49,10 +53,16 @@ def _dposv(A, b, lower):
     info = np.zeros(1, dtype=np.intc)
     info_p = __ffi.from_buffer(info)
 
-    __dposv(__ffi.from_buffer(uplo), n_p, nrhs_p,
-            __ffi.from_buffer(A), n_p,
-            __ffi.from_buffer(b), n_p,
-            info_p)
+    __dposv(
+        __ffi.from_buffer(uplo),
+        n_p,
+        nrhs_p,
+        __ffi.from_buffer(A),
+        n_p,
+        __ffi.from_buffer(b),
+        n_p,
+        info_p,
+    )
 
     _ref_sink(narr, n_p, nrhs, nrhs_p, info, info_p)
 
@@ -66,6 +76,6 @@ def dposv(A, b, lower=False):
     """
     info = _dposv(A, b, lower)
     if info < 0:
-        raise ValueError('invalid args to dposv, code ' + str(info))
+        raise ValueError("invalid args to dposv, code " + str(info))
     elif info > 0:
-        raise RuntimeError('error in dposv, code ' + str(info))
+        raise RuntimeError("error in dposv, code " + str(info))
diff --git a/lenskit/metrics/predict.py b/lenskit/metrics/predict.py
index a6e662f69..9f245b6dc 100644
--- a/lenskit/metrics/predict.py
+++ b/lenskit/metrics/predict.py
@@ -14,12 +14,12 @@ def _check_missing(truth, missing):
         truth: the series of truth values
         missing: what to do with missing values
     """
-    if missing == 'error' and truth.isna().any():
+    if missing == "error" and truth.isna().any():
         nmissing = truth.isna().sum()
-        raise ValueError('missing truth for {} predictions'.format(nmissing))
+        raise ValueError("missing truth for {} predictions".format(nmissing))
 
 
-def rmse(predictions, truth, missing='error'):
+def rmse(predictions, truth, missing="error"):
     """
     Compute RMSE (root mean squared error).  This is computed as:
 
@@ -48,7 +48,7 @@ def rmse(predictions, truth, missing='error'):
     truth = pd.Series(truth)
 
     # realign
-    predictions, truth = predictions.align(truth, join='left')
+    predictions, truth = predictions.align(truth, join="left")
     _check_missing(truth, missing)
 
     diff = predictions - truth
@@ -58,7 +58,7 @@ def rmse(predictions, truth, missing='error'):
     return np.sqrt(msq)
 
 
-def mae(predictions, truth, missing='error'):
+def mae(predictions, truth, missing="error"):
     """
     Compute MAE (mean absolute error).  This is computed as:
 
@@ -86,7 +86,7 @@ def mae(predictions, truth, missing='error'):
     predictions = pd.Series(predictions)
     truth = pd.Series(truth)
 
-    predictions, truth = predictions.align(truth, join='left')
+    predictions, truth = predictions.align(truth, join="left")
     _check_missing(truth, missing)
 
     diff = predictions - truth
@@ -95,7 +95,7 @@ def mae(predictions, truth, missing='error'):
     return adiff.mean()
 
 
-def global_metric(predictions, *, score_column='prediction', metric=rmse, **kwargs):
+def global_metric(predictions, *, score_column="prediction", metric=rmse, **kwargs):
     """
     Compute a global prediction accuracy metric for a set of predictions.
 
@@ -114,11 +114,11 @@ def global_metric(predictions, *, score_column='prediction', metric=rmse, **kwar
     """
 
     scores = predictions[score_column]
-    truth = predictions['rating']
+    truth = predictions["rating"]
     return metric(scores, truth, **kwargs)
 
 
-def user_metric(predictions, *, score_column='prediction', metric=rmse, **kwargs):
+def user_metric(predictions, *, score_column="prediction", metric=rmse, **kwargs):
     """
     Compute a mean per-user prediction accuracy metric for a set of predictions.
 
@@ -138,7 +138,7 @@ def user_metric(predictions, *, score_column='prediction', metric=rmse, **kwargs
     """
 
     def score(df):
-        return metric(df[score_column], df['rating'])
+        return metric(df[score_column], df["rating"])
 
-    u_scores = predictions.groupby('user').apply(score)
+    u_scores = predictions.groupby("user").apply(score)
     return u_scores.mean()
diff --git a/lenskit/metrics/topn.py b/lenskit/metrics/topn.py
index d89d1351a..5f66380de 100644
--- a/lenskit/metrics/topn.py
+++ b/lenskit/metrics/topn.py
@@ -14,6 +14,7 @@ def bulk_impl(metric):
     """
     Decorator to register a bulk implementation for a metric.
     """
+
     def wrap(impl):
         metric.bulk_score = impl
         return impl
@@ -40,23 +41,23 @@ def precision(recs, truth, k=None):
     if nrecs == 0:
         return None
 
-    ngood = recs['item'].isin(truth.index).sum()
+    ngood = recs["item"].isin(truth.index).sum()
     return ngood / nrecs
 
 
 @bulk_impl(precision)
 def _bulk_precision(recs, truth, k=None):
     if k is not None:
-        recs = recs[recs['rank'] <= k]
-        lcounts = pd.Series(k, index=recs['LKRecID'].unique())
-        lcounts.index.name = 'LKRecID'
+        recs = recs[recs["rank"] <= k]
+        lcounts = pd.Series(k, index=recs["LKRecID"].unique())
+        lcounts.index.name = "LKRecID"
     else:
-        lcounts = recs.groupby(['LKRecID'])['item'].count()
+        lcounts = recs.groupby(["LKRecID"])["item"].count()
 
-    good = recs.join(truth, on=['LKTruthID', 'item'], how='inner')
-    gcounts = good.groupby(['LKRecID'])['item'].count()
+    good = recs.join(truth, on=["LKTruthID", "item"], how="inner")
+    gcounts = good.groupby(["LKRecID"])["item"].count()
 
-    lcounts, gcounts = lcounts.align(gcounts, join='left', fill_value=0)
+    lcounts, gcounts = lcounts.align(gcounts, join="left", fill_value=0)
 
     return gcounts / lcounts
 
@@ -78,30 +79,30 @@ def recall(recs, truth, k=None):
         nrel = min(nrel, k)
         recs = recs.iloc[:k]
 
-    ngood = recs['item'].isin(truth.index).sum()
+    ngood = recs["item"].isin(truth.index).sum()
     return ngood / nrel
 
 
 @bulk_impl(recall)
 def _bulk_recall(recs, truth, k=None):
-    tcounts = truth.reset_index().groupby('LKTruthID')['item'].count()
+    tcounts = truth.reset_index().groupby("LKTruthID")["item"].count()
 
     if k is not None:
-        _log.debug('truncating to k for recall')
+        _log.debug("truncating to k for recall")
         tcounts = np.minimum(tcounts, k)
-        recs = recs[recs['rank'] <= k]
+        recs = recs[recs["rank"] <= k]
 
-    good = recs.join(truth, on=['LKTruthID', 'item'], how='inner')
-    gcounts = good.groupby('LKRecID')['item'].count()
+    good = recs.join(truth, on=["LKTruthID", "item"], how="inner")
+    gcounts = good.groupby("LKRecID")["item"].count()
 
     # we need all lists, because some might have no truth (oops), some no recs (also oops)
-    lists = recs[['LKRecID', 'LKTruthID']].drop_duplicates()
+    lists = recs[["LKRecID", "LKTruthID"]].drop_duplicates()
 
-    scores = lists.join(gcounts.to_frame('ngood'), on='LKRecID', how='left')
-    scores['ngood'].fillna(0, inplace=True)
-    scores = scores.join(tcounts.to_frame('nrel'), on='LKTruthID', how='left')
-    scores = scores.set_index('LKRecID')
-    return scores['ngood'] / scores['nrel']
+    scores = lists.join(gcounts.to_frame("ngood"), on="LKRecID", how="left")
+    scores["ngood"].fillna(0, inplace=True)
+    scores = scores.join(tcounts.to_frame("nrel"), on="LKTruthID", how="left")
+    scores = scores.set_index("LKRecID")
+    return scores["ngood"] / scores["nrel"]
 
 
 def hit(recs, truth, k=None):
@@ -124,7 +125,7 @@ def hit(recs, truth, k=None):
         nrel = min(nrel, k)
         recs = recs.iloc[:k]
 
-    good = recs['item'].isin(truth.index)
+    good = recs["item"].isin(truth.index)
     if np.any(good):
         return 1
     else:
@@ -133,28 +134,28 @@ def hit(recs, truth, k=None):
 
 @bulk_impl(hit)
 def _bulk_hit(recs, truth, k=None):
-    tcounts = truth.reset_index().groupby('LKTruthID')['item'].count()
+    tcounts = truth.reset_index().groupby("LKTruthID")["item"].count()
 
     if k is not None:
-        _log.debug('truncating to k for recall')
+        _log.debug("truncating to k for recall")
         tcounts = np.minimum(tcounts, k)
-        recs = recs[recs['rank'] <= k]
+        recs = recs[recs["rank"] <= k]
 
-    good = recs.join(truth, on=['LKTruthID', 'item'], how='inner')
-    gcounts = good.groupby('LKRecID')['item'].count()
+    good = recs.join(truth, on=["LKTruthID", "item"], how="inner")
+    gcounts = good.groupby("LKRecID")["item"].count()
 
     # we need all lists, because some might have no truth (oops), some no recs (also oops)
-    lists = recs[['LKRecID', 'LKTruthID']].drop_duplicates()
+    lists = recs[["LKRecID", "LKTruthID"]].drop_duplicates()
 
-    scores = lists.join(gcounts.to_frame('ngood'), on='LKRecID', how='left')
-    scores['ngood'].fillna(0, inplace=True)
+    scores = lists.join(gcounts.to_frame("ngood"), on="LKRecID", how="left")
+    scores["ngood"].fillna(0, inplace=True)
 
-    scores = scores.join(tcounts.to_frame('nrel'), on='LKTruthID', how='left')
-    scores = scores.set_index('LKRecID')
+    scores = scores.join(tcounts.to_frame("nrel"), on="LKTruthID", how="left")
+    scores = scores.set_index("LKRecID")
 
-    good = scores['ngood'] > 0
-    good = good.astype('f4')
-    good[scores['nrel'] == 0] = np.nan
+    good = scores["ngood"] > 0
+    good = good.astype("f4")
+    good[scores["nrel"] == 0] = np.nan
     return good
 
 
@@ -173,8 +174,8 @@ def recip_rank(recs, truth, k=None):
     if k is not None:
         recs = recs.iloc[:k]
 
-    good = recs['item'].isin(truth.index)
-    npz, = np.nonzero(good.to_numpy())
+    good = recs["item"].isin(truth.index)
+    (npz,) = np.nonzero(good.to_numpy())
     if len(npz):
         return 1.0 / (npz[0] + 1.0)
     else:
@@ -185,17 +186,17 @@ def recip_rank(recs, truth, k=None):
 def _bulk_rr(recs, truth, k=None):
     # find everything with truth
     if k is not None:
-        recs = recs[recs['rank'] <= k]
-    joined = recs.join(truth, on=['LKTruthID', 'item'], how='inner')
+        recs = recs[recs["rank"] <= k]
+    joined = recs.join(truth, on=["LKTruthID", "item"], how="inner")
     # compute min ranks
-    ranks = joined.groupby('LKRecID')['rank'].agg('min')
+    ranks = joined.groupby("LKRecID")["rank"].agg("min")
     # reciprocal ranks
     scores = 1.0 / ranks
-    _log.debug('have %d scores with MRR %.3f', len(scores), scores.mean())
+    _log.debug("have %d scores with MRR %.3f", len(scores), scores.mean())
     # fill with zeros
-    rec_ids = recs['LKRecID'].unique()
+    rec_ids = recs["LKRecID"].unique()
     scores = scores.reindex(rec_ids, fill_value=0.0)
-    _log.debug('filled to get %s scores w/ MRR %.3f', len(scores), scores.mean())
+    _log.debug("filled to get %s scores w/ MRR %.3f", len(scores), scores.mean())
     # and we're done
     return scores
 
@@ -226,7 +227,7 @@ def _dcg(scores, discount=np.log2):
 
 
 def _fixed_dcg(n, discount=np.log2):
-    ranks = np.arange(1, n+1)
+    ranks = np.arange(1, n + 1)
     disc = discount(ranks)
     disc = np.maximum(disc, 1)
     disc = np.reciprocal(disc)
@@ -255,11 +256,11 @@ def dcg(recs, truth, discount=np.log2):
             if the discount is greater than 1.
     """
 
-    tpos = truth.index.get_indexer(recs['item'])
+    tpos = truth.index.get_indexer(recs["item"])
     tgood = tpos >= 0
-    if 'rating' in truth.columns:
+    if "rating" in truth.columns:
         # make an array of ratings for this rec list
-        r_rates = truth['rating'].values[tpos]
+        r_rates = truth["rating"].values[tpos]
         r_rates[tpos < 0] = 0
         achieved = _dcg(r_rates, discount)
     else:
@@ -304,15 +305,15 @@ def ndcg(recs, truth, discount=np.log2, k=None):
     if k is not None:
         recs = recs.iloc[:k]
 
-    tpos = truth.index.get_indexer(recs['item'])
+    tpos = truth.index.get_indexer(recs["item"])
 
-    if 'rating' in truth.columns:
+    if "rating" in truth.columns:
         i_rates = np.sort(truth.rating.values)[::-1]
         if k is not None:
             i_rates = i_rates[:k]
         ideal = _dcg(i_rates, discount)
         # make an array of ratings for this rec list
-        r_rates = truth['rating'].values[tpos]
+        r_rates = truth["rating"].values[tpos]
         r_rates[tpos < 0] = 0
         achieved = _dcg(r_rates, discount)
     else:
@@ -328,36 +329,36 @@ def ndcg(recs, truth, discount=np.log2, k=None):
 
 @bulk_impl(ndcg)
 def _bulk_ndcg(recs, truth, discount=np.log2, k=None):
-    if 'rating' not in truth.columns:
+    if "rating" not in truth.columns:
         truth = truth.assign(rating=np.ones(len(truth), dtype=np.float32))
 
-    ideal = truth.groupby(level='LKTruthID')['rating'].rank(method='first', ascending=False)
+    ideal = truth.groupby(level="LKTruthID")["rating"].rank(method="first", ascending=False)
     if k is not None:
         ideal = ideal[ideal <= k]
     ideal = discount(ideal)
     ideal = np.maximum(ideal, 1)
-    ideal = truth['rating'] / ideal
-    ideal = ideal.groupby(level='LKTruthID').sum()
-    ideal.name = 'ideal'
+    ideal = truth["rating"] / ideal
+    ideal = ideal.groupby(level="LKTruthID").sum()
+    ideal.name = "ideal"
 
-    list_ideal = recs[['LKRecID', 'LKTruthID']].drop_duplicates()
-    list_ideal = list_ideal.join(ideal, on='LKTruthID', how='left')
-    list_ideal = list_ideal.set_index('LKRecID')
+    list_ideal = recs[["LKRecID", "LKTruthID"]].drop_duplicates()
+    list_ideal = list_ideal.join(ideal, on="LKTruthID", how="left")
+    list_ideal = list_ideal.set_index("LKRecID")
 
     if k is not None:
-        recs = recs[recs['rank'] <= k]
-    rated = recs.join(truth, on=['LKTruthID', 'item'], how='inner')
-    rd = discount(rated['rank'])
+        recs = recs[recs["rank"] <= k]
+    rated = recs.join(truth, on=["LKTruthID", "item"], how="inner")
+    rd = discount(rated["rank"])
     rd = np.maximum(rd, 1)
-    rd = rated['rating'] / rd
-    rd = rated[['LKRecID']].assign(util=rd)
-    dcg = rd.groupby(['LKRecID'])['util'].sum().reset_index(name='dcg')
-    dcg = dcg.set_index('LKRecID')
+    rd = rated["rating"] / rd
+    rd = rated[["LKRecID"]].assign(util=rd)
+    dcg = rd.groupby(["LKRecID"])["util"].sum().reset_index(name="dcg")
+    dcg = dcg.set_index("LKRecID")
 
-    dcg = dcg.join(list_ideal, how='outer')
-    dcg['ndcg'] = dcg['dcg'].fillna(0) / dcg['ideal']
+    dcg = dcg.join(list_ideal, how="outer")
+    dcg["ndcg"] = dcg["dcg"].fillna(0) / dcg["ideal"]
 
-    return dcg['ndcg']
+    return dcg["ndcg"]
 
 
 def rbp(recs, truth, k=None, patience=0.5, normalize=False):
@@ -396,18 +397,18 @@ def rbp(recs, truth, k=None, patience=0.5, normalize=False):
     else:
         k = len(recs)
 
-    if 'rank' not in recs.columns:
-        recs = recs.assign(rank=np.arange(1, len(recs)+1))
+    if "rank" not in recs.columns:
+        recs = recs.assign(rank=np.arange(1, len(recs) + 1))
 
-    if np.min(recs['rank']) != 1:
-        warnings.warn('rank should start with 1')
+    if np.min(recs["rank"]) != 1:
+        warnings.warn("rank should start with 1")
 
     nrel = len(truth)
     if nrel == 0:
         return None
 
-    good = recs['item'].isin(truth.index)
-    ranks = recs['rank'][good]
+    good = recs["item"].isin(truth.index)
+    ranks = recs["rank"][good]
     disc = patience ** (ranks - 1)
     rbp = np.sum(disc)
     if normalize:
@@ -423,36 +424,36 @@ def rbp(recs, truth, k=None, patience=0.5, normalize=False):
 @bulk_impl(rbp)
 def _bulk_rbp(recs, truth, k=None, patience=0.5, normalize=False):
     if k is not None:
-        recs = recs[recs['rank'] <= k]
+        recs = recs[recs["rank"] <= k]
 
-    good = recs.join(truth, on=['LKTruthID', 'item'], how='inner')
-    good['rbp_disc'] = patience ** (good['rank'] - 1)
-    scores = good.groupby('LKRecID')['rbp_disc'].sum()
+    good = recs.join(truth, on=["LKTruthID", "item"], how="inner")
+    good["rbp_disc"] = patience ** (good["rank"] - 1)
+    scores = good.groupby("LKRecID")["rbp_disc"].sum()
 
     if normalize:
-        tns = truth.reset_index().groupby('LKTruthID')['item'].count()
+        tns = truth.reset_index().groupby("LKTruthID")["item"].count()
         if k is not None:
             tns[tns > k] = k
         max_nrel = np.max(tns)
         # compute 0...k-1 (the powers of k-1 for 1..k)
         kseq = np.arange(max_nrel)
         # compute the discounts at each k-1
-        nd = patience ** kseq
+        nd = patience**kseq
         # convert to a series of the sums, up through each k
         max_rbps = pd.Series(np.cumsum(nd), index=kseq + 1)
 
         # get a rec/truth mapping
-        map = recs[['LKRecID', 'LKTruthID']].drop_duplicates()
-        map.set_index('LKRecID', inplace=True)
+        map = recs[["LKRecID", "LKTruthID"]].drop_duplicates()
+        map.set_index("LKRecID", inplace=True)
         map = map.reindex(scores.index)
         # map to nrel, and then to the max RBPs
-        map = map.join(tns.to_frame('nrel'), on='LKTruthID', how='left')
-        map = map.join(max_rbps.to_frame('rbp_max'), on='nrel', how='left')
+        map = map.join(tns.to_frame("nrel"), on="LKTruthID", how="left")
+        map = map.join(max_rbps.to_frame("rbp_max"), on="nrel", how="left")
 
         # divide each score by max RBP
-        scores /= map['rbp_max']
+        scores /= map["rbp_max"]
     else:
-        scores *= (1 - patience)
+        scores *= 1 - patience
 
-    scores = scores.reindex(recs['LKRecID'].unique(), fill_value=0)
+    scores = scores.reindex(recs["LKRecID"].unique(), fill_value=0)
     return scores
diff --git a/lenskit/sharing/__init__.py b/lenskit/sharing/__init__.py
index 7ee142c43..eb17ac946 100644
--- a/lenskit/sharing/__init__.py
+++ b/lenskit/sharing/__init__.py
@@ -15,7 +15,7 @@
 
 
 def _save_mode():
-    return getattr(_store_state, 'mode', 'save')
+    return getattr(_store_state, "mode", "save")
 
 
 @contextmanager
@@ -25,7 +25,7 @@ def sharing_mode():
     sharing, not model persistence.
     """
     old = _save_mode()
-    _store_state.mode = 'share'
+    _store_state.mode = "share"
     try:
         yield
     finally:
@@ -38,7 +38,7 @@ def in_share_context():
     :func:`sharing_mode` context, which means model pickling will be used for
     cross-process sharing.
     """
-    return _save_mode() == 'share'
+    return _save_mode() == "share"
 
 
 class PersistedModel(ABC):
@@ -80,9 +80,9 @@ def transfer(self):
             ``self`` (for convenience)
         """
         if not self.is_owner:
-            warnings.warning('non-owning objects should not be transferred', stacklevel=1)
+            warnings.warning("non-owning objects should not be transferred", stacklevel=1)
         else:
-            self.is_owner = 'transfer'
+            self.is_owner = "transfer"
         return self
 
 
@@ -113,15 +113,15 @@ def persist(model, *, method=None):
         PersistedModel: The persisted object.
     """
     if method is not None:
-        if method == 'binpickle':
+        if method == "binpickle":
             method = persist_binpickle
-        elif method == 'shm':
+        elif method == "shm":
             method = persist_shm
-        elif not hasattr(method, '__call__'):
-            raise ValueError('invalid method %s: must be one of binpickle, shm, or a funciton')
+        elif not hasattr(method, "__call__"):
+            raise ValueError("invalid method %s: must be one of binpickle, shm, or a funciton")
 
     if method is None:
-        if SHM_AVAILABLE and 'LK_TEMP_DIR' not in os.environ:
+        if SHM_AVAILABLE and "LK_TEMP_DIR" not in os.environ:
             method = persist_shm
         else:
             method = persist_binpickle
@@ -129,5 +129,5 @@ def persist(model, *, method=None):
     return method(model)
 
 
-from .binpickle import persist_binpickle, BPKPersisted     # noqa: E402,F401
+from .binpickle import persist_binpickle, BPKPersisted  # noqa: E402,F401
 from .shm import persist_shm, SHMPersisted, SHM_AVAILABLE  # noqa: E402,F401
diff --git a/lenskit/sharing/binpickle.py b/lenskit/sharing/binpickle.py
index be9240936..c3d850761 100644
--- a/lenskit/sharing/binpickle.py
+++ b/lenskit/sharing/binpickle.py
@@ -27,11 +27,11 @@ def persist_binpickle(model, dir=None, file=None):
         path = pathlib.Path(file)
     else:
         if dir is None:
-            dir = os.environ.get('LK_TEMP_DIR', None)
-        fd, path = tempfile.mkstemp(suffix='.bpk', prefix='lkpy-', dir=dir)
+            dir = os.environ.get("LK_TEMP_DIR", None)
+        fd, path = tempfile.mkstemp(suffix=".bpk", prefix="lkpy-", dir=dir)
         os.close(fd)
         path = pathlib.Path(path)
-    _log.debug('persisting %s to %s', model, path)
+    _log.debug("persisting %s to %s", model, path)
     with binpickle.BinPickler.mappable(path) as bp, sharing_mode():
         bp.dump(model)
     return BPKPersisted(path)
@@ -46,7 +46,7 @@ def __init__(self, path):
 
     def get(self):
         if self._bpk_file is None:
-            _log.debug('loading %s', self.path)
+            _log.debug("loading %s", self.path)
             self._bpk_file = binpickle.BinPickleFile(self.path, direct=True)
             self._model = self._bpk_file.load()
         return self._model
@@ -55,35 +55,35 @@ def close(self, unlink=True):
         if self._bpk_file is not None:
             self._model = None
             try:
-                _log.debug('closing BPK file')
+                _log.debug("closing BPK file")
                 try:
                     self._bpk_file.close()
                 except BufferError:
-                    _log.debug('could not close %s, collecting garbage and retrying', self.path)
+                    _log.debug("could not close %s, collecting garbage and retrying", self.path)
                     gc.collect()
                     self._bpk_file.close()
             except (BufferError, IOError) as e:
-                _log.warn('error closing %s: %s', self.path, e)
+                _log.warn("error closing %s: %s", self.path, e)
             self._bpk_file = None
 
         if self.is_owner and unlink:
             assert self._model is None
             if unlink:
-                _log.debug('deleting %s', self.path)
+                _log.debug("deleting %s", self.path)
                 try:
                     self.path.unlink()
                 except IOError as e:
-                    _log.warn('could not remove %s: %s', self.path, e)
+                    _log.warn("could not remove %s: %s", self.path, e)
             self.is_owner = False
 
     def __getstate__(self):
         d = dict(self.__dict__)
-        d['_bpk_file'] = None
-        d['_model'] = None
-        if self.is_owner == 'transfer':
-            d['is_owner'] = True
+        d["_bpk_file"] = None
+        d["_model"] = None
+        if self.is_owner == "transfer":
+            d["is_owner"] = True
         else:
-            d['is_owner'] = False
+            d["is_owner"] = False
         return d
 
     def __del___(self):
diff --git a/lenskit/sharing/shm.py b/lenskit/sharing/shm.py
index 22729cd1e..1409049b2 100644
--- a/lenskit/sharing/shm.py
+++ b/lenskit/sharing/shm.py
@@ -4,7 +4,8 @@
 from . import sharing_mode, PersistedModel
 
 import multiprocessing.shared_memory as shm
-SHM_AVAILABLE = sys.platform != 'win32'
+
+SHM_AVAILABLE = sys.platform != "win32"
 
 _log = logging.getLogger(__name__)
 
@@ -27,12 +28,17 @@ def persist_shm(model, dir=None):
         data = pickle.dumps(model, protocol=5, buffer_callback=buffers.append)
 
     total_size = sum(memoryview(b).nbytes for b in buffers)
-    _log.info('serialized %s to %d pickle bytes with %d buffers of %d bytes',
-              model, len(data), len(buffers), total_size)
+    _log.info(
+        "serialized %s to %d pickle bytes with %d buffers of %d bytes",
+        model,
+        len(data),
+        len(buffers),
+        total_size,
+    )
 
     if buffers:
         # blit the buffers to the SHM block
-        _log.debug('preparing to share %d buffers', len(buffers))
+        _log.debug("preparing to share %d buffers", len(buffers))
         memory = shm.SharedMemory(create=True, size=total_size)
         cur_offset = 0
         blocks = []
@@ -40,7 +46,7 @@ def persist_shm(model, dir=None):
             ba = buf.raw()
             blen = ba.nbytes
             bend = cur_offset + blen
-            _log.debug('saving %d bytes in buffer %d/%d', blen, i+1, len(buffers))
+            _log.debug("saving %d bytes in buffer %d/%d", blen, i + 1, len(buffers))
             memory.buf[cur_offset:bend] = ba
             blocks.append((cur_offset, bend))
             cur_offset = bend
@@ -65,7 +71,7 @@ def __init__(self, data, memory, blocks):
 
     def get(self):
         if self._model is None:
-            _log.debug('loading model from shared memory')
+            _log.debug("loading model from shared memory")
             shm = self._open()
             buffers = []
             for bs, be in self.blocks:
@@ -78,11 +84,11 @@ def get(self):
     def close(self, unlink=True):
         self._model = None
 
-        _log.debug('releasing SHM buffers')
+        _log.debug("releasing SHM buffers")
         self.buffers = None
         if self.memory is not None:
             self.memory.close()
-            if unlink and self.is_owner and self.is_owner != 'transfer':
+            if unlink and self.is_owner and self.is_owner != "transfer":
                 self.memory.unlink()
                 self.is_owner = False
             self.memory = None
@@ -94,16 +100,16 @@ def _open(self):
 
     def __getstate__(self):
         return {
-            'pickle_data': self.pickle_data,
-            'blocks': self.blocks,
-            'shm_name': self.shm_name,
-            'is_owner': True if self.is_owner == 'transfer' else False
+            "pickle_data": self.pickle_data,
+            "blocks": self.blocks,
+            "shm_name": self.shm_name,
+            "is_owner": True if self.is_owner == "transfer" else False,
         }
 
     def __setstate__(self, state):
         self.__dict__.update(state)
         if self.is_owner:
-            _log.debug('opening shared buffers after ownership transfer')
+            _log.debug("opening shared buffers after ownership transfer")
             self._open()
 
     def __del__(self):
diff --git a/lenskit/topn.py b/lenskit/topn.py
index 9bb19c3b4..0ca22072b 100644
--- a/lenskit/topn.py
+++ b/lenskit/topn.py
@@ -16,7 +16,7 @@ def _length(df, *args, **kwargs):
 
 @bulk_impl(_length)
 def _bulk_length(df, *args):
-    return df.groupby('LKRecID')['item'].count()
+    return df.groupby("LKRecID")["item"].count()
 
 
 class RecListAnalysis:
@@ -42,11 +42,11 @@ class RecListAnalysis:
             The columns to group by, or ``None`` to use the default.
     """
 
-    DEFAULT_SKIP_COLS = ['item', 'rank', 'score', 'rating']
+    DEFAULT_SKIP_COLS = ["item", "rank", "score", "rating"]
 
     def __init__(self, group_cols=None, n_jobs=None):
         self.group_cols = group_cols
-        self.metrics = [(_length, 'nrecs', {})]
+        self.metrics = [(_length, "nrecs", {})]
         self.n_jobs = n_jobs
 
     def add_metric(self, metric, *, name=None, **kwargs):
@@ -86,7 +86,7 @@ def compute(self, recs, truth, *, include_missing=False):
         Returns:
             pandas.DataFrame: The results of the analysis.
         """
-        _log.info('analyzing %d recommendations (%d truth rows)', len(recs), len(truth))
+        _log.info("analyzing %d recommendations (%d truth rows)", len(recs), len(truth))
 
         rec_key, truth_key = _df_keys(recs.columns, truth.columns, self.group_cols)
 
@@ -95,19 +95,19 @@ def compute(self, recs, truth, *, include_missing=False):
 
         timer = Stopwatch()
 
-        _log.info('collecting metric results')
+        _log.info("collecting metric results")
         bulk_res = []
         ind_metrics = []
         for mf, mn, margs in self.metrics:
-            if hasattr(mf, 'bulk_score') and 'rank' in r_data.columns:
-                _log.debug('bulk-scoring %s', mn)
+            if hasattr(mf, "bulk_score") and "rank" in r_data.columns:
+                _log.debug("bulk-scoring %s", mn)
                 mbs = mf.bulk_score(r_data, t_data, **margs).to_frame(name=mn)
-                assert mbs.index.name == 'LKRecID'
+                assert mbs.index.name == "LKRecID"
                 bulk_res.append(mbs)
             else:
                 ind_metrics.append((mf, mn, margs))
         if bulk_res:
-            bulk_res = ft.reduce(lambda df1, df2: df1.join(df2, how='outer'), bulk_res)
+            bulk_res = ft.reduce(lambda df1, df2: df1.join(df2, how="outer"), bulk_res)
         else:
             bulk_res = None
 
@@ -118,13 +118,13 @@ def worker(rdf):
             return res
 
         if ind_metrics:
-            _log.debug('applying individual metrics')
-            groups = r_data.groupby(['LKRecID', 'LKTruthID'], sort=False)
-            if hasattr(groups, 'progress_apply'):
+            _log.debug("applying individual metrics")
+            groups = r_data.groupby(["LKRecID", "LKTruthID"], sort=False)
+            if hasattr(groups, "progress_apply"):
                 ind_res = groups.progress_apply(worker)
             else:
                 ind_res = groups.apply(worker)
-            ind_res = ind_res.reset_index('LKTruthID', drop=True)
+            ind_res = ind_res.reset_index("LKTruthID", drop=True)
 
             if bulk_res is not None:
                 res = bulk_res.join(ind_res)
@@ -133,60 +133,60 @@ def worker(rdf):
         else:
             res = bulk_res
 
-        _log.debug('transforming results')
-        res = r_ident.join(res, on='LKRecID').drop(columns=['LKRecID', 'LKTruthID'])
+        _log.debug("transforming results")
+        res = r_ident.join(res, on="LKRecID").drop(columns=["LKRecID", "LKTruthID"])
 
-        _log.info('measured %d lists in %s', len(res), timer)
+        _log.info("measured %d lists in %s", len(res), timer)
 
         if include_missing:
-            _log.info('filling in missing user info (%d initial rows)', len(res))
+            _log.info("filling in missing user info (%d initial rows)", len(res))
             ug_cols = [c for c in rec_key if c not in truth_key]
-            tcount = truth.groupby(truth_key)['item'].count().to_frame('ntruth')
-            _log.debug('truth counts:\n%s', tcount)
+            tcount = truth.groupby(truth_key)["item"].count().to_frame("ntruth")
+            _log.debug("truth counts:\n%s", tcount)
             if ug_cols:
-                _log.debug('regrouping by %s to fill', ug_cols)
-                _log.debug('pre-group series:\n%s', res)
+                _log.debug("regrouping by %s to fill", ug_cols)
+                _log.debug("pre-group series:\n%s", res)
 
                 rdict = {}
 
                 for key, df in res.groupby(ug_cols):
-                    df2 = df.drop(columns=ug_cols).join(tcount, how='outer', on=truth_key)
+                    df2 = df.drop(columns=ug_cols).join(tcount, how="outer", on=truth_key)
                     rdict[key] = df2
 
                 res = pd.concat(rdict, names=ug_cols)
-                _log.debug('joined result:\n%s', res)
+                _log.debug("joined result:\n%s", res)
                 res = res.reset_index(ug_cols)
                 res.reset_index(inplace=True, drop=True)
-                _log.debug('final joined result:\n%s', res)
+                _log.debug("final joined result:\n%s", res)
 
             else:
-                _log.debug('no ungroup cols, directly merging to fill')
-                res = res.join(tcount, how='outer', on=truth_key)
-            _log.debug('final columns: %s', res.columns)
-            _log.debug('index levels: %s', res.index.names)
-            _log.debug('expanded to %d rows', len(res))
-            res['ntruth'] = res['ntruth'].fillna(0)
-            res['nrecs'] = res['nrecs'].fillna(0)
+                _log.debug("no ungroup cols, directly merging to fill")
+                res = res.join(tcount, how="outer", on=truth_key)
+            _log.debug("final columns: %s", res.columns)
+            _log.debug("index levels: %s", res.index.names)
+            _log.debug("expanded to %d rows", len(res))
+            res["ntruth"] = res["ntruth"].fillna(0)
+            res["nrecs"] = res["nrecs"].fillna(0)
 
         return res.set_index(rec_key)
 
     def _number_truth(self, truth, truth_key):
-        _log.info('numbering truth lists')
+        _log.info("numbering truth lists")
         truth_df = truth[truth_key].drop_duplicates()
-        truth_df['LKTruthID'] = np.arange(len(truth_df))
+        truth_df["LKTruthID"] = np.arange(len(truth_df))
         truth = pd.merge(truth_df, truth, on=truth_key).drop(columns=truth_key)
 
-        truth.set_index(['LKTruthID', 'item'], inplace=True)
+        truth.set_index(["LKTruthID", "item"], inplace=True)
         if not truth.index.is_unique:
-            _log.warn('truth index not unique: may have duplicate items\n%s', truth)
+            _log.warn("truth index not unique: may have duplicate items\n%s", truth)
 
         return truth_df, truth
 
     def _number_recs(self, recs, truth_key, rec_key, t_ident):
-        _log.info('numbering rec lists')
+        _log.info("numbering rec lists")
         rec_df = recs[rec_key].drop_duplicates()
-        rec_df['LKRecID'] = np.arange(len(rec_df))
-        rec_df = pd.merge(rec_df, t_ident, on=truth_key, how='left')
+        rec_df["LKRecID"] = np.arange(len(rec_df))
+        rec_df = pd.merge(rec_df, t_ident, on=truth_key, how="left")
         recs = pd.merge(rec_df, recs, on=rec_key).drop(columns=rec_key)
 
         return rec_df, recs
@@ -199,6 +199,6 @@ def _df_keys(r_cols, t_cols, g_cols=None, skip_cols=RecListAnalysis.DEFAULT_SKIP
 
     truth_key = [c for c in g_cols if c in t_cols]
     rec_key = [c for c in g_cols if c not in t_cols] + truth_key
-    _log.info('using rec key columns %s', rec_key)
-    _log.info('using truth key columns %s', truth_key)
+    _log.info("using rec key columns %s", rec_key)
+    _log.info("using truth key columns %s", truth_key)
     return rec_key, truth_key
diff --git a/lenskit/util/__init__.py b/lenskit/util/__init__.py
index 8740287f6..2267e6835 100644
--- a/lenskit/util/__init__.py
+++ b/lenskit/util/__init__.py
@@ -20,12 +20,16 @@
 _log = logging.getLogger(__name__)
 
 __all__ = [
-    'log_to_stderr', 'log_to_notebook',
-    'Stopwatch',
-    'read_df_detect',
-    'rng', 'init_rng', 'derivable_rng',
-    'proc_count',
-    'clone', 'clean_str'
+    "log_to_stderr",
+    "log_to_notebook",
+    "Stopwatch",
+    "read_df_detect",
+    "rng",
+    "init_rng",
+    "derivable_rng",
+    "proc_count",
+    "clone",
+    "clean_str",
 ]
 
 
@@ -46,8 +50,8 @@ def clone(algo):
     >>> copy.damping == orig.damping
     True
     """
-    _log.debug('cloning %s', algo)
-    if isinstance(algo, Algorithm) or hasattr(algo, 'get_params'):
+    _log.debug("cloning %s", algo)
+    if isinstance(algo, Algorithm) or hasattr(algo, "get_params"):
         params = algo.get_params(deep=False)
 
         sps = dict([(k, clone(v)) for (k, v) in params.items()])
@@ -59,7 +63,7 @@ def clone(algo):
 
 
 class LastMemo:
-    def __init__(self, func, check_type='identity'):
+    def __init__(self, func, check_type="identity"):
         self.function = func
         self.check = check_type
         self.memory = None
@@ -73,13 +77,13 @@ def __call__(self, arg):
         return self.result
 
     def _arg_is_last(self, arg):
-        if self.check == 'identity':
+        if self.check == "identity":
             return arg is self.memory
-        elif self.check == 'equality':
+        elif self.check == "equality":
             return arg == self.memory
 
 
-def last_memo(func=None, check_type='identity'):
+def last_memo(func=None, check_type="identity"):
     if func is None:
         return lambda f: LastMemo(f, check_type)
     else:
@@ -90,7 +94,7 @@ def cached(prop):
     """
     Decorator for property getters to cache the property value.
     """
-    cache = '_cached_' + prop.__name__
+    cache = "_cached_" + prop.__name__
 
     def getter(self):
         val = getattr(self, cache, None)
@@ -114,7 +118,7 @@ def max_memory():
         res = resource.getrusage(resource.RUSAGE_SELF)
         return "%.1f MiB" % (res.ru_maxrss / 1024,)
     else:
-        return 'unknown'
+        return "unknown"
 
 
 def cur_memory():
@@ -123,7 +127,7 @@ def cur_memory():
         res = resource.getrusage(resource.RUSAGE_SELF)
         return "%.1f MiB" % (res.ru_idrss,)
     else:
-        return 'unknown'
+        return "unknown"
 
 
 def clean_str(s):
@@ -135,4 +139,5 @@ def check_env():
     Check the runtime environment for potential performance or stability problems.
     """
     from .debug import check_env
+
     return check_env()
diff --git a/lenskit/util/accum.py b/lenskit/util/accum.py
index ed725c838..060da81ae 100644
--- a/lenskit/util/accum.py
+++ b/lenskit/util/accum.py
@@ -11,8 +11,8 @@ def _pair_downheap(pos: int, sp, limit, ks, vs):
     finished = False
     while not finished:
         min = pos
-        left = 2*pos + 1
-        right = 2*pos + 2
+        left = 2 * pos + 1
+        right = 2 * pos + 2
         if left < limit and vs[sp + left] < vs[sp + min]:
             min = left
         if right < limit and vs[sp + right] < vs[sp + min]:
@@ -87,7 +87,7 @@ def kvp_minheap_sort(sp, ep, keys, vals):
         vals: the value array
     """
 
-    for i in range(ep-1, sp, -1):
+    for i in range(ep - 1, sp, -1):
         swap(keys, i, sp)
         swap(vals, i, sp)
-        _pair_downheap(0, sp, i-sp, keys, vals)
+        _pair_downheap(0, sp, i - sp, keys, vals)
diff --git a/lenskit/util/debug.py b/lenskit/util/debug.py
index dd9694559..a34a83d2f 100644
--- a/lenskit/util/debug.py
+++ b/lenskit/util/debug.py
@@ -2,7 +2,6 @@
 Debugging utility code.  Also runnable as a Python command.
 
 Usage:
-    lenskit.util.debug [options] --libraries
     lenskit.util.debug [options] --blas-info
     lenskit.util.debug [options] --numba-info
     lenskit.util.debug [options] --check-env
@@ -12,15 +11,12 @@
         Turn on verbose logging
 """
 
-from pathlib import Path
 import sys
 import logging
-import ctypes
 from typing import Optional
 from dataclasses import dataclass
 import numba
-import psutil
-import numpy as np
+import threadpoolctl
 
 from .parallel import is_worker
 
@@ -52,168 +48,38 @@ class NumbaInfo:
     threads: int
 
 
-def _get_shlibs():
-    proc = psutil.Process()
-    if hasattr(proc, 'memory_maps'):
-        return [mm.path for mm in proc.memory_maps()]
-    else:
-        return []
-
-
-def _guess_layer():
-    layer = None
-
-    _log.debug('scanning process memory maps for MKL threading layers')
-    for mm in _get_shlibs():
-        if 'mkl_intel_thread' in mm:
-            _log.debug('found library %s linked', mm)
-            if layer:
-                _log.warn('multiple threading layers detected')
-            layer = 'intel'
-        elif 'mkl_tbb_thread' in mm:
-            _log.debug('found library %s linked', mm)
-            if layer:
-                _log.warn('multiple threading layers detected')
-            layer = 'tbb'
-        elif 'mkl_gnu_thread' in mm:
-            _log.debug('found library %s linked', mm)
-            if layer:
-                _log.warn('multiple threading layers detected')
-            layer = 'gnu'
-
-    return layer
-
-
-def guess_blas_unix():
-    _log.info('opening self DLL')
-    dll = ctypes.CDLL(None)
-
-    _log.debug('checking for MKL')
-    try:
-        mkl_vstr = dll.mkl_get_version_string
-        mkl_vbuf = ctypes.create_string_buffer(256)
-        mkl_vstr(mkl_vbuf, 256)
-        version = mkl_vbuf.value.decode().strip()
-        _log.debug('version %s', version)
-
-        mkl_mth = dll.mkl_get_max_threads
-        mkl_mth.restype = ctypes.c_int
-        threads = mkl_mth()
-
-        layer = _guess_layer()
-
-        return BlasInfo('mkl', layer, threads, version)
-    except AttributeError as e:
-        _log.debug('MKL attribute error: %s', e)
-        pass  # no MKL
-
-    _log.debug('checking BLAS for OpenBLAS')
-    np_dll = ctypes.CDLL(np.core._multiarray_umath.__file__)
-    try:
-        openblas_vstr = np_dll.openblas_get_config
-        openblas_vstr.restype = ctypes.c_char_p
-        version = openblas_vstr().decode()
-        _log.debug('version %s', version)
-
-        openblas_th = np_dll.openblas_get_num_threads
-        openblas_th.restype = ctypes.c_int
-        threads = openblas_th()
-        _log.debug('threads %d', threads)
-
-        return BlasInfo('openblas', None, threads, version)
-    except AttributeError as e:
-        _log.info('OpenBLAS error: %s', e)
-
-    return BlasInfo(None, None, None, 'unknown')
-
-
-def _find_win_blas_path():
-    for lib in _get_shlibs():
-        path = Path(lib)
-        name = path.name
-        if not name.startswith('libopenblas'):
+def blas_info():
+    pools = threadpoolctl.threadpool_info()
+    blas = None
+    for pool in pools:
+        if pool["user_api"] != "blas":
             continue
 
-        if path.parent.parent.name == 'numpy':
-            _log.debug('found BLAS at %s', lib)
-            return lib
-        elif path.parent.name == 'numpy.libs':
-            _log.debug('found BLAS at %s', lib)
-            return lib
-
-
-def _find_win_blas():
-    try:
-        blas_dll = ctypes.cdll.libblas
-        _log.debug('loaded BLAS dll %s', blas_dll)
-        return blas_dll
-    except (FileNotFoundError, OSError) as e:
-        _log.debug('no LIBBLAS, searching')
-        path = _find_win_blas_path()
-        if path is not None:
-            return ctypes.CDLL(path)
-        else:
-            _log.error('could not load LIBBLAS: %s', e)
-            return BlasInfo(None, None, None, 'unknown')
-
-
-def guess_blas_windows():
-    blas_dll = _find_win_blas()
-
-    _log.debug('checking BLAS for MKL')
-    try:
-        mkl_vstr = blas_dll.mkl_get_version_string
-        mkl_vbuf = ctypes.create_string_buffer(256)
-        mkl_vstr(mkl_vbuf, 256)
-        version = mkl_vbuf.value.decode().strip()
-        _log.debug('version %s', version)
-
-        mkl_mth = blas_dll.mkl_get_max_threads
-        mkl_mth.restype = ctypes.c_int
-        threads = mkl_mth()
-
-        layer = _guess_layer()
-
-        return BlasInfo('mkl', layer, threads, version)
-    except AttributeError as e:
-        _log.debug('MKL attribute error: %s', e)
-        pass  # no MKL
-
-    _log.debug('checking BLAS for OpenBLAS')
-    try:
-        openblas_vstr = blas_dll.openblas_get_config
-        openblas_vstr.restype = ctypes.c_char_p
-        version = openblas_vstr().decode()
-
-        openblas_th = blas_dll.openblas_get_num_threads
-        openblas_th.restype = ctypes.c_int
-        threads = openblas_th()
-        _log.debug('threads %d', threads)
-
-        return BlasInfo('openblas', None, threads, version)
-    except AttributeError as e:
-        _log.info('OpenBLAS error: %s', e)
+        if blas is not None:
+            _log.warning("found multiple BLAS layers, using first")
+            _log.info("later layer is: %s", pool)
+            continue
 
-    return BlasInfo(None, None, None, 'unknown')
+        blas = BlasInfo(
+            pool["internal_api"],
+            pool.get("threading_layer", None),
+            pool.get("num_threads", None),
+            pool["version"],
+        )
 
-
-def blas_info():
-    if sys.platform == 'win32':
-        return guess_blas_windows()
-    else:
-        return guess_blas_unix()
+    return blas
 
 
 def numba_info():
     x = _par_test(100)
-    _log.debug('sum: %d', x)
+    _log.debug("sum: %d", x)
 
     try:
         layer = numba.threading_layer()
     except ValueError:
-        _log.info('Numba threading not initialized')
+        _log.info("Numba threading not initialized")
         return None
-    _log.info('numba threading layer: %s', layer)
+    _log.info("numba threading layer: %s", layer)
     nth = numba.get_num_threads()
     return NumbaInfo(layer, nth)
 
@@ -231,44 +97,46 @@ def check_env():
         blas = blas_info()
         numba = numba_info()
     except Exception as e:
-        _log.error('error inspecting runtime environment: %s', e)
+        _log.error("error inspecting runtime environment: %s", e)
         _already_checked = True
         return
 
     if numba is None:
-        _log.warning('Numba JIT seems to be disabled - this will hurt performance')
+        _log.warning("Numba JIT seems to be disabled - this will hurt performance")
         _already_checked = True
         return
 
-    _log.info('Using BLAS %s', blas.impl)
+    if blas is None:
+        _log.warning("threadpoolctl could not find your BLAS")
+        _already_checked = True
+        return
+
+    _log.info("Using BLAS %s", blas.impl)
+
+    if numba.threading != "tbb":
+        _log.info("Numba is using threading layer %s - consider TBB", numba.threading)
 
-    if numba.threading != 'tbb':
-        _log.warning('Numba is using threading layer %s - consider TBB', numba.threading)
-        _log.info('Non-TBB threading is often slower and can cause crashes')
+    if numba.threading == "tbb" and blas.threading == "tbb":
+        _log.info("Numba and BLAS both using TBB - good")
+
+    if numba.threading == "tbb" and blas.impl == "mkl" and blas.threading != "tbb":
+        _log.warning("Numba using TBB but MKL is using %s", blas.threading)
+        _log.info("Set MKL_THREADING_LAYER=tbb for improved performance")
         problems += 1
 
-    if numba.threading == 'tbb' and blas.threading == 'tbb':
-        _log.info('Numba and BLAS both using TBB - good')
-    elif blas.threads and blas.threads > 1 and numba.threads > 1:
-        _log.warning('BLAS using multiple threads - can cause oversubscription')
+    if blas.threads and blas.threads > 1 and numba.threads > 1:
+        # TODO make this be fine in OpenMP configurations
+        _log.warning("BLAS using multiple threads - can cause oversubscription")
+        _log.info("See https://mde.one/lkpy-blas for information on tuning BLAS for LensKit")
         problems += 1
 
     if problems:
-        _log.warning('found %d potential runtime problems - see https://boi.st/lkpy-perf',
-                     problems)
+        _log.warning("found %d potential runtime problems - see https://boi.st/lkpy-perf", problems)
 
     _already_checked = True
     return problems
 
 
-def print_libraries():
-    p = psutil.Process()
-
-    _log.info('printing process libraries')
-    for map in p.memory_maps():
-        print(map.path)
-
-
 def print_blas_info():
     blas = blas_info()
     print(blas)
@@ -281,20 +149,20 @@ def print_numba_info():
 
 def main():
     from docopt import docopt
+
     opts = docopt(__doc__)
-    level = logging.DEBUG if opts['--verbose'] else logging.INFO
-    logging.basicConfig(level=level, stream=sys.stderr, format='%(levelname)s %(name)s %(message)s')
+    level = logging.DEBUG if opts["--verbose"] else logging.INFO
+    logging.basicConfig(level=level, stream=sys.stderr, format="%(levelname)s %(name)s %(message)s")
+    logging.getLogger("numba").setLevel(logging.INFO)
 
-    if opts['--libraries']:
-        print_libraries()
-    if opts['--blas-info']:
+    if opts["--blas-info"]:
         print_blas_info()
-    if opts['--numba-info']:
+    if opts["--numba-info"]:
         print_numba_info()
-    if opts['--check-env']:
+    if opts["--check-env"]:
         check_env()
 
 
-if __name__ == '__main__':
-    _log = logging.getLogger('lenskit.util.debug')
+if __name__ == "__main__":
+    _log = logging.getLogger("lenskit.util.debug")
     main()
diff --git a/lenskit/util/log.py b/lenskit/util/log.py
index 91c939518..05b05672a 100644
--- a/lenskit/util/log.py
+++ b/lenskit/util/log.py
@@ -16,6 +16,7 @@
 
 class InjectHandler:
     "Handler that re-injects a message into parent process logging"
+
     level = logging.DEBUG
 
     def handle(self, record):
@@ -36,16 +37,16 @@ def log_to_stderr(level=logging.INFO):
     """
     global _lts_initialized
     if _lts_initialized:
-        _log.info('log already initialized')
+        _log.info("log already initialized")
 
     h = logging.StreamHandler(sys.stderr)
-    f = logging.Formatter('[%(levelname)7s] %(name)s %(message)s')
+    f = logging.Formatter("[%(levelname)7s] %(name)s %(message)s")
     h.setFormatter(f)
     root = logging.getLogger()
     root.addHandler(h)
     root.setLevel(level)
 
-    _log.info('stderr logging configured')
+    _log.info("stderr logging configured")
     _lts_initialized = True
 
 
@@ -55,10 +56,10 @@ def log_to_notebook(level=logging.INFO):
     """
     global _ltn_initialized
     if _ltn_initialized:
-        _log.info('log already initialized')
+        _log.info("log already initialized")
 
     h = logging.StreamHandler(sys.stderr)
-    f = logging.Formatter('[%(levelname)7s] %(name)s %(message)s')
+    f = logging.Formatter("[%(levelname)7s] %(name)s %(message)s")
     h.setFormatter(f)
     h.setLevel(logging.WARNING)
 
@@ -71,7 +72,7 @@ def log_to_notebook(level=logging.INFO):
     root.addHandler(oh)
     root.setLevel(level)
 
-    _log.info('notebook logging configured')
+    _log.info("notebook logging configured")
     _ltn_initialized = True
 
 
@@ -81,6 +82,7 @@ def log_queue():
     """
     global _log_queue, _log_listener
     from lenskit.util.parallel import LKContext
+
     ctx = LKContext.INSTANCE
     if _log_queue is None:
         _log_queue = ctx.Queue()
diff --git a/lenskit/util/parallel.py b/lenskit/util/parallel.py
index 27b00a4df..37fcff7e2 100644
--- a/lenskit/util/parallel.py
+++ b/lenskit/util/parallel.py
@@ -12,6 +12,7 @@
 from concurrent.futures import ProcessPoolExecutor
 from abc import ABC, abstractmethod
 import pickle
+from threadpoolctl import threadpool_limits
 
 from lenskit.sharing import persist, PersistedModel
 from lenskit.util.log import log_queue
@@ -30,7 +31,7 @@ def is_worker():
 
 def is_mp_worker():
     "Query whether the current process is a multiprocessing worker."
-    return os.environ.get('_LK_IN_MP', 'no') == 'yes'
+    return os.environ.get("_LK_IN_MP", "no") == "yes"
 
 
 def _p5_recv(self):
@@ -47,6 +48,7 @@ class FastQ(SimpleQueue):
     """
     SimpleQueue subclass that uses Pickle5 instead of default pickling.
     """
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.__patch()
@@ -103,23 +105,14 @@ def _initialize_mp_worker(mkey, func, threads, log_queue, seed):
     _initialize_worker(log_queue, seed)
     global __work_model, __work_func
 
-    nnt_env = os.environ.get('NUMBA_NUM_THREADS', None)
-    if nnt_env is None or int(nnt_env) > threads:
-        _log.debug('configuring Numba thread count')
-        import numba
-        numba.config.NUMBA_NUM_THREADS = threads
-    try:
-        import mkl
-        _log.debug('configuring MKL thread count')
-        mkl.set_num_threads(threads)
-    except ImportError:
-        pass
+    # disable BLAS threading
+    threadpool_limits(limits=1, user_api="blas")
 
     __work_model = mkey
     # deferred function unpickling to minimize imports before initialization
     __work_func = pickle.loads(func)
 
-    _log.debug('worker %d ready (process %s)', os.getpid(), mp.current_process())
+    _log.debug("worker %d ready (process %s)", os.getpid(), mp.current_process())
 
 
 def _mp_invoke_worker(*args):
@@ -129,13 +122,13 @@ def _mp_invoke_worker(*args):
 
 def _sp_worker(log_queue, seed, res_queue, func, args, kwargs):
     _initialize_worker(log_queue, seed)
-    _log.debug('running %s in worker', func)
+    _log.debug("running %s in worker", func)
     try:
         res = func(*args, **kwargs)
-        _log.debug('completed successfully')
+        _log.debug("completed successfully")
         res_queue.put((True, res))
     except Exception as e:
-        _log.error('failed, transmitting error %r', e)
+        _log.error("failed, transmitting error %r", e)
         res_queue.put((False, e))
 
 
@@ -167,9 +160,9 @@ def proc_count(core_div=2, max_default=None, level=0):
         int: The number of jobs desired.
     """
 
-    nprocs = os.environ.get('LK_NUM_PROCS', None)
+    nprocs = os.environ.get("LK_NUM_PROCS", None)
     if nprocs is not None:
-        nprocs = [int(s) for s in nprocs.split(',')]
+        nprocs = [int(s) for s in nprocs.split(",")]
     elif core_div is not None:
         nprocs = max(mp.cpu_count() // core_div, 1)
         if max_default is not None:
@@ -192,22 +185,22 @@ def run_sp(func, *args, **kwargs):
     rq = ctx.SimpleQueue()
     seed = derive_seed()
     worker_args = (log_queue(), seed, rq, func, args, kwargs)
-    _log.debug('spawning subprocess to run %s', func)
+    _log.debug("spawning subprocess to run %s", func)
     proc = ctx.Process(target=_sp_worker, args=worker_args)
     proc.start()
-    _log.debug('waiting for process %s to return', proc)
+    _log.debug("waiting for process %s to return", proc)
     success, payload = rq.get()
-    _log.debug('received success=%s', success)
-    _log.debug('waiting for process %s to exit', proc)
+    _log.debug("received success=%s", success)
+    _log.debug("waiting for process %s to exit", proc)
     proc.join()
     if proc.exitcode:
-        _log.error('subprocess failed with code %d', proc.exitcode)
-        raise RuntimeError('subprocess failed with code ' + str(proc.exitcode))
+        _log.error("subprocess failed with code %d", proc.exitcode)
+        raise RuntimeError("subprocess failed with code " + str(proc.exitcode))
     if success:
         return payload
     else:
-        _log.error('subprocess raised exception: %s', payload)
-        raise ChildProcessError('error in child process', payload)
+        _log.error("subprocess raised exception: %s", payload)
+        raise ChildProcessError("error in child process", payload)
 
 
 def invoker(model, func, n_jobs=None, *, persist_method=None):
@@ -273,7 +266,7 @@ def __exit__(self, *args):
 
 class InProcessOpInvoker(ModelOpInvoker):
     def __init__(self, model, func):
-        _log.info('setting up in-process worker')
+        _log.info("setting up in-process worker")
         if isinstance(model, PersistedModel):
             self.model = model.get()
         else:
@@ -293,28 +286,29 @@ class ProcessPoolOpInvoker(ModelOpInvoker):
 
     def __init__(self, model, func, n_jobs, persist_method):
         if isinstance(model, PersistedModel):
-            _log.debug('model already persisted')
+            _log.debug("model already persisted")
             key = model
         else:
-            _log.debug('persisting model with method %s', persist_method)
+            _log.debug("persisting model with method %s", persist_method)
             key = persist(model, method=persist_method)
             self._close_key = key
 
-        _log.debug('persisting function')
+        _log.debug("persisting function")
         func = pickle.dumps(func)
         ctx = LKContext.INSTANCE
-        _log.info('setting up ProcessPoolExecutor w/ %d workers', n_jobs)
-        os.environ['_LK_IN_MP'] = 'yes'
+        _log.info("setting up ProcessPoolExecutor w/ %d workers", n_jobs)
+        os.environ["_LK_IN_MP"] = "yes"
         kid_tc = proc_count(level=1)
-        self.executor = ProcessPoolExecutor(n_jobs, ctx, _initialize_mp_worker,
-                                            (key, func, kid_tc, log_queue(), get_root_seed()))
+        self.executor = ProcessPoolExecutor(
+            n_jobs, ctx, _initialize_mp_worker, (key, func, kid_tc, log_queue(), get_root_seed())
+        )
 
     def map(self, *iterables):
         return self.executor.map(_mp_invoke_worker, *iterables)
 
     def shutdown(self):
         self.executor.shutdown()
-        os.environ.pop('_LK_IN_MP', 'yes')
+        os.environ.pop("_LK_IN_MP", "yes")
         if self._close_key is not None:
             self._close_key.close()
             del self._close_key
diff --git a/lenskit/util/random.py b/lenskit/util/random.py
index 482696d69..a8e6af820 100644
--- a/lenskit/util/random.py
+++ b/lenskit/util/random.py
@@ -21,7 +21,7 @@ def get_root_seed():
     Returns:
         numpy.random.SeedSequence: The LensKit root seed.
     """
-    warnings.warn('get_root_seed is deprecated, use seedbank.root_seed', DeprecationWarning)
+    warnings.warn("get_root_seed is deprecated, use seedbank.root_seed", DeprecationWarning)
     return seedbank.root_seed()
 
 
@@ -54,7 +54,7 @@ def init_rng(seed, *keys, propagate=True):
     Returns:
         The random seed.
     """
-    warnings.warn('init_rng is deprecated, use seedbank.initialize', DeprecationWarning)
+    warnings.warn("init_rng is deprecated, use seedbank.initialize", DeprecationWarning)
     seedbank.initialize(seed, *keys)
 
 
@@ -84,7 +84,7 @@ def rng(spec=None, *, legacy=False):
     Returns:
         numpy.random.Generator: A random number generator.
     """
-    warnings.warn('rng is deprecated, use seedbank.numpy_rng', DeprecationWarning)
+    warnings.warn("rng is deprecated, use seedbank.numpy_rng", DeprecationWarning)
 
     if legacy:
         return seedbank.numpy_random_state(spec)
@@ -94,6 +94,7 @@ def rng(spec=None, *, legacy=False):
 
 class FixedRNG:
     "RNG provider that always provides the same RNG"
+
     def __init__(self, rng):
         self.rng = rng
 
@@ -101,11 +102,12 @@ def __call__(self, *keys):
         return self.rng
 
     def __str__(self):
-        return 'Fixed({})'.format(self.rng)
+        return "Fixed({})".format(self.rng)
 
 
 class DerivingRNG:
     "RNG provider that derives new RNGs from the key"
+
     def __init__(self, seed, legacy):
         self.seed = seed
         self.legacy = legacy
@@ -119,7 +121,7 @@ def __call__(self, *keys):
             return np.random.default_rng(seed)
 
     def __str__(self):
-        return 'Derive({})'.format(self.seed)
+        return "Derive({})".format(self.seed)
 
 
 def derivable_rng(spec, *, legacy=False):
@@ -144,12 +146,12 @@ def derivable_rng(spec, *, legacy=False):
             the ``legacy`` parameter).
     """
 
-    if spec == 'user':
+    if spec == "user":
         return DerivingRNG(derive_seed(), legacy)
     elif isinstance(spec, tuple):
         seed, key = spec
-        if key != 'user':
-            raise ValueError('unrecognized key %s', key)
+        if key != "user":
+            raise ValueError("unrecognized key %s", key)
         return DerivingRNG(seed, legacy)
     else:
         return FixedRNG(rng(spec, legacy=legacy))
diff --git a/lenskit/util/test.py b/lenskit/util/test.py
index a6fb2d277..e008c0dae 100644
--- a/lenskit/util/test.py
+++ b/lenskit/util/test.py
@@ -14,20 +14,20 @@
 from lenskit.algorithms.ranking import PlackettLuce
 from lenskit.batch import recommend
 
-ml_test = MovieLens('data/ml-latest-small')
-ml100k = ML100K('data/ml-100k')
+ml_test = MovieLens("data/ml-latest-small")
+ml100k = ML100K("data/ml-100k")
 
 
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
 def demo_recs():
     """
     A demo set of train, test, and recommendation data.
     """
     train, test = simple_test_pair(ml_test.ratings, f_rates=0.5)
 
-    users = test['user'].unique()
+    users = test["user"].unique()
     algo = PopScore()
-    algo = PlackettLuce(algo, rng_spec='user')
+    algo = PlackettLuce(algo, rng_spec="user")
     algo.fit(train)
 
     recs = recommend(algo, users, 500)
@@ -55,5 +55,4 @@ def set_env_var(var, val):
             del os.environ[var]
 
 
-wantjit = pytest.mark.skipif('NUMBA_DISABLE_JIT' in os.environ,
-                             reason='JIT required')
+wantjit = pytest.mark.skipif("NUMBA_DISABLE_JIT" in os.environ, reason="JIT required")
diff --git a/lenskit/util/timing.py b/lenskit/util/timing.py
index 1eb2188e2..760dbd610 100644
--- a/lenskit/util/timing.py
+++ b/lenskit/util/timing.py
@@ -5,10 +5,11 @@
 import time
 
 
-class Stopwatch():
+class Stopwatch:
     """
     Timer class for recording elapsed wall time in operations.
     """
+
     start_time = None
     stop_time = None
 
diff --git a/pyproject.toml b/pyproject.toml
index a9927b01e..a77760f06 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,64 +5,59 @@ build-backend = "flit_core.buildapi"
 [project]
 name = "lenskit"
 authors = [
-    {name = "Michael Ekstrand", email = "michaelekstrand@boisestate.edu"}
+  {name="Michael Ekstrand", email="mdekstrand@drexel.edu"}
 ]
 classifiers = [
-    "License :: OSI Approved :: MIT License",
-    "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.10",
-    "Programming Language :: Python :: 3.11",
-    "Operating System :: OS Independent",
-    "Intended Audience :: Science/Research",
+  "License :: OSI Approved :: MIT License",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Operating System :: OS Independent",
+  "Intended Audience :: Science/Research",
 ]
+requires-python = ">= 3.10"
 readme = "README.md"
 license = { file = "LICENSE.md" }
-requires-python = ">= 3.8"
-dynamic = ['version', 'description']
+dynamic = ["version", "description"]
 dependencies = [
-    "pandas >=1.4, <2",
-    "numpy >= 1.22",
-    "scipy >= 1.8.0",
-    "numba >= 0.56, < 0.59",
-    "cffi >= 1.15.0",
-    "psutil >= 5",
-    "binpickle >= 0.3.2",
-    "seedbank >= 0.1.0",
-    "csr >= 0.5",
+  "pandas >=1.4, <2",
+  "numpy >= 1.22",
+  "scipy >= 1.8.0",
+  "numba >= 0.56, < 0.59",
+  "cffi >= 1.15.0",
+  "threadpoolctl >=3.0",
+  "binpickle >= 0.3.2",
+  "seedbank >= 0.1.0",
+  "csr >= 0.5",
 ]
 
-[project.urls]
-homepage = "https://lenskit.org"
-documentation = "https://lkpy.lenskit.org"
-source = "https://github.com/lenskit/lkpy"
-
 [project.optional-dependencies]
-test = [
-    "pytest ==7.*",
-    "pytest-doctestplus >= 0.9",
-    "coverage >= 5",
-    "pytest-cov >= 2.12",
-    "hypothesis >= 6"
-]
 dev = [
-    "lenskit-build-helpers >=0.1",  # p2c: -p
-    "flit >= 3",
-    "pyproject2conda >=0.8",
-    "invoke >=1",
-    "requests >=2",
-    "packaging >= 20",
-    "flake8 >= 3",
-    "ipython >= 7",
-    "docopt >= 0.6",
-    "tqdm >= 4",
-    "keyring",
-    "sphinx-autobuild >= 2021",
+  "lenskit-build-helpers >=0.1",  # p2c: -p
+  "flit >= 3.8",
+  "ruff",
+  "copier",
+  "pyproject2conda >=0.8",
+  "invoke >=1",
+  "packaging >= 20",
+  "ipython >= 7",
+  "docopt >= 0.6",
+  "tqdm >= 4",
+  "keyring",
+  "sphinx-autobuild >= 2021",
+]
+test = [
+  "pytest ==7.*",
+  "pytest-doctestplus >= 0.9",
+  "coverage >= 5",
+  "pytest-cov >= 2.12",
+  "hypothesis >= 6"
 ]
 doc = [
-    "sphinx >= 4.2",
-    "sphinxcontrib-bibtex >= 2.0",
-    "sphinx_rtd_theme >= 0.5",
-    "myst-nb >= 0.13",
+  "sphinx >= 4.2",
+  "sphinxcontrib-bibtex >= 2.0",
+  "sphinx_rtd_theme >= 0.5",
+  "myst-nb >= 0.13",
 ]
 demo = [
     "notebook >= 6",
@@ -73,19 +68,30 @@ demo = [
 ]
 sklearn = ["scikit-learn >= 1.1"]
 
+[project.urls]
+homepage = "https://lenskit.org"
+documentation = "https://lkpy.lenskit.org"
+source = "https://github.com/lenskit/lkpy"
+
+# configure build tools
 [tool.flit.sdist]
+include = ["tests/*"]
 exclude = [
-    ".github",
-    "*.ipynb",
-    "docs",
-    "data",
-    "examples",
-    "build-tools",
-    "lkbuild",
-    "tasks.py",
-    "tests",
+  ".github",
+  "*.ipynb",
+  "docs",
+  "data",
+  "examples",
+  "build-tools",
+  "lkbuild",
+  "tasks.py",
 ]
 
+# need this for the SCM plugins to work
+[tool.setuptools.packages.find]
+exclude = ["envs"]
+
+# settings for generating conda environments for dev & CI, when needed
 [tool.pyproject2conda]
 channels = ["conda-forge"]
 python = ["3.10", "3.11"]
@@ -99,3 +105,14 @@ extras = ["dev", "test", "doc", "demo", "sklearn"]
 
 [tool.pyproject2conda.envs.ci]
 extras = ["test", "sklearn"]
+
+[tool.ruff]
+line-length = 100
+target-version = "py310"
+exclude = [
+  ".git",
+  "__pycache__",
+  "docs/conf.py",
+  "build",
+  "dist",
+]
diff --git a/tests/test_als_explicit.py b/tests/test_als_explicit.py
index 762e4f296..84411526a 100644
--- a/tests/test_als_explicit.py
+++ b/tests/test_als_explicit.py
@@ -22,11 +22,11 @@
 
 _log = logging.getLogger(__name__)
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13],
-                          'rating': [4.0, 3.0, 5.0, 2.0]})
+simple_df = pd.DataFrame(
+    {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]}
+)
 
-methods = mark.parametrize('m', ['lu', 'cd'])
+methods = mark.parametrize("m", ["lu", "cd"])
 
 
 @methods
@@ -80,7 +80,7 @@ def test_als_predict_basic_for_new_ratings():
 
     assert algo.bias.mean_ == approx(simple_df.rating.mean())
 
-    new_ratings = pd.Series([4.0, 5.0], index=[1, 2]) # items as index and ratings as values
+    new_ratings = pd.Series([4.0, 5.0], index=[1, 2])  # items as index and ratings as values
 
     preds = algo.predict_for_user(15, [3], new_ratings)
 
@@ -100,7 +100,7 @@ def test_als_predict_basic_for_new_user_with_new_ratings():
     preds = algo.predict_for_user(u, [i])
 
     new_u_id = -1
-    new_ratings = pd.Series([4.0, 5.0], index=[1, 2]) # items as index and ratings as values
+    new_ratings = pd.Series([4.0, 5.0], index=[1, 2])  # items as index and ratings as values
 
     new_preds = algo.predict_for_user(new_u_id, [i], new_ratings)
 
@@ -127,9 +127,13 @@ def test_als_predict_for_new_users_with_new_ratings():
 
         user_data = ratings[ratings.user == u]
 
-        _log.debug("user_features from fit: " + str(algo.user_features_[algo.user_index_.get_loc(u), :]))
+        _log.debug(
+            "user_features from fit: " + str(algo.user_features_[algo.user_index_.get_loc(u), :])
+        )
 
-        new_ratings = pd.Series(user_data.rating.to_numpy(), index=user_data.item) # items as index and ratings as values
+        new_ratings = pd.Series(
+            user_data.rating.to_numpy(), index=user_data.item
+        )  # items as index and ratings as values
         new_preds = algo.predict_for_user(new_u_id, items, new_ratings)
 
         _log.debug("preds: " + str(preds.values))
@@ -186,9 +190,13 @@ def test_als_predict_no_user_features_basic():
 
     user_data = ratings[ratings.user == u]
 
-    _log.debug("user_features from fit: " + str(algo.user_features_[algo.user_index_.get_loc(u), :]))
+    _log.debug(
+        "user_features from fit: " + str(algo.user_features_[algo.user_index_.get_loc(u), :])
+    )
 
-    new_ratings = pd.Series(user_data.rating.to_numpy(), index=user_data.item) # items as index and ratings as values
+    new_ratings = pd.Series(
+        user_data.rating.to_numpy(), index=user_data.item
+    )  # items as index and ratings as values
     new_preds = algo_no_user_features.predict_for_user(new_u_id, items, new_ratings)
 
     _log.debug("preds: " + str(preds.values))
@@ -209,8 +217,8 @@ def test_als_train_large():
     assert algo.n_items == ratings.item.nunique()
     assert algo.n_users == ratings.user.nunique()
 
-    icounts = ratings.groupby('item').rating.count()
-    isums = ratings.groupby('item').rating.sum()
+    icounts = ratings.groupby("item").rating.count()
+    isums = ratings.groupby("item").rating.sum()
     is2 = isums - icounts * ratings.rating.mean()
     imeans = is2 / (icounts + 5)
     ibias = pd.Series(algo.bias.item_offsets_, index=algo.item_index_)
@@ -220,14 +228,14 @@ def test_als_train_large():
 
 # don't use wantjit, use this to do a non-JIT test
 def test_als_save_load():
-    original = als.BiasedMF(5, iterations=5, method='lu')
+    original = als.BiasedMF(5, iterations=5, method="lu")
     ratings = lktu.ml_test.ratings
     original.fit(ratings)
 
     assert original.bias.mean_ == approx(ratings.rating.mean())
 
     mod = pickle.dumps(original)
-    _log.info('serialized to %d bytes', len(mod))
+    _log.info("serialized to %d bytes", len(mod))
 
     algo = pickle.loads(mod)
     assert algo.bias.mean_ == original.bias.mean_
@@ -239,26 +247,26 @@ def test_als_save_load():
     assert np.all(algo.user_index_ == original.user_index_)
 
     # make sure it still works
-    preds = algo.predict_for_user(10, np.arange(0, 50, dtype='i8'))
+    preds = algo.predict_for_user(10, np.arange(0, 50, dtype="i8"))
     assert len(preds) == 50
 
 
-@mark.skipif(not binpickle, reason='binpickle not available')
+@mark.skipif(not binpickle, reason="binpickle not available")
 def test_als_binpickle(tmp_path):
     "Test saving ALS with BinPickle"
 
-    original = als.BiasedMF(20, iterations=5, method='lu')
+    original = als.BiasedMF(20, iterations=5, method="lu")
     ratings = lktu.ml_test.ratings
     original.fit(ratings)
 
     assert original.bias.mean_ == approx(ratings.rating.mean())
 
-    file = tmp_path / 'als.bpk'
+    file = tmp_path / "als.bpk"
     binpickle.dump(original, file)
 
     with binpickle.BinPickleFile(file) as bpf:
         # the pickle data should be small
-        _log.info('serialized to %d pickle bytes', bpf.entries[-1].dec_length)
+        _log.info("serialized to %d pickle bytes", bpf.entries[-1].dec_length)
         pickle_dis(bpf._read_buffer(bpf.entries[-1]))
         assert bpf.entries[-1].dec_length < 2048
 
@@ -273,27 +281,27 @@ def test_als_binpickle(tmp_path):
         assert np.all(algo.user_index_ == original.user_index_)
 
         # make sure it still works
-        preds = algo.predict_for_user(10, np.arange(0, 50, dtype='i8'))
+        preds = algo.predict_for_user(10, np.arange(0, 50, dtype="i8"))
         assert len(preds) == 50
 
 
 @lktu.wantjit
 @mark.slow
 def test_als_method_match():
-    lu = als.BiasedMF(20, iterations=15, reg=(2, 0.001), method='lu', rng_spec=42)
-    cd = als.BiasedMF(20, iterations=20, reg=(2, 0.001), method='cd', rng_spec=42)
+    lu = als.BiasedMF(20, iterations=15, reg=(2, 0.001), method="lu", rng_spec=42)
+    cd = als.BiasedMF(20, iterations=20, reg=(2, 0.001), method="cd", rng_spec=42)
 
     ratings = lktu.ml_test.ratings
 
     timer = Stopwatch()
     lu.fit(ratings)
     timer.stop()
-    _log.info('fit with LU solver in %s', timer)
+    _log.info("fit with LU solver in %s", timer)
 
     timer = Stopwatch()
     cd.fit(ratings)
     timer.stop()
-    _log.info('fit with CD solver in %s', timer)
+    _log.info("fit with CD solver in %s", timer)
 
     assert lu.bias.mean_ == approx(ratings.rating.mean())
     assert cd.bias.mean_ == approx(ratings.rating.mean())
@@ -307,29 +315,31 @@ def test_als_method_match():
         cd_preds = cd.predict_for_user(u, items)
         diff = lu_preds - cd_preds
         adiff = np.abs(diff)
-        _log.info('user %s diffs: L2 = %f, min = %f, med = %f, max = %f, 90%% = %f', u,
-                  np.linalg.norm(diff, 2),
-                  np.min(adiff), np.median(adiff), np.max(adiff), np.quantile(adiff, 0.9))
-
-        preds.append(pd.DataFrame({
-            'user': u,
-            'item': items,
-            'lu': lu_preds,
-            'cd': cd_preds,
-            'adiff': adiff
-        }))
+        _log.info(
+            "user %s diffs: L2 = %f, min = %f, med = %f, max = %f, 90%% = %f",
+            u,
+            np.linalg.norm(diff, 2),
+            np.min(adiff),
+            np.median(adiff),
+            np.max(adiff),
+            np.quantile(adiff, 0.9),
+        )
+
+        preds.append(
+            pd.DataFrame({"user": u, "item": items, "lu": lu_preds, "cd": cd_preds, "adiff": adiff})
+        )
 
     preds = pd.concat(preds, ignore_index=True)
-    _log.info('LU preds:\n%s', preds.lu.describe())
-    _log.info('CD preds:\n%s', preds.cd.describe())
-    _log.info('overall differences:\n%s', preds.adiff.describe())
+    _log.info("LU preds:\n%s", preds.lu.describe())
+    _log.info("CD preds:\n%s", preds.cd.describe())
+    _log.info("overall differences:\n%s", preds.adiff.describe())
     # there are differences. our check: the 90% are under a quarter star
     assert np.quantile(adiff, 0.9) <= 0.27
 
 
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present')
+@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
 def test_als_batch_accuracy():
     from lenskit.algorithms import bias
     import lenskit.crossfold as xf
@@ -337,30 +347,30 @@ def test_als_batch_accuracy():
 
     ratings = lktu.ml100k.ratings
 
-    lu_algo = als.BiasedMF(25, iterations=20, damping=5, method='lu')
-    cd_algo = als.BiasedMF(25, iterations=25, damping=5, method='cd')
+    lu_algo = als.BiasedMF(25, iterations=20, damping=5, method="lu")
+    cd_algo = als.BiasedMF(25, iterations=25, damping=5, method="cd")
     # algo = bias.Fallback(svd_algo, bias.Bias(damping=5))
 
     def eval(train, test):
-        _log.info('training LU')
+        _log.info("training LU")
         lu_algo.fit(train)
-        _log.info('training CD')
+        _log.info("training CD")
         cd_algo.fit(train)
-        _log.info('testing %d users', test.user.nunique())
+        _log.info("testing %d users", test.user.nunique())
         return test.assign(lu_pred=lu_algo.predict(test), cd_pred=cd_algo.predict(test))
 
     folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
     preds = pd.concat(eval(train, test) for (train, test) in folds)
-    preds['abs_diff'] = np.abs(preds.lu_pred - preds.cd_pred)
-    _log.info('predictions:\n%s', preds.sort_values('abs_diff', ascending=False))
-    _log.info('diff summary:\n%s', preds.abs_diff.describe())
+    preds["abs_diff"] = np.abs(preds.lu_pred - preds.cd_pred)
+    _log.info("predictions:\n%s", preds.sort_values("abs_diff", ascending=False))
+    _log.info("diff summary:\n%s", preds.abs_diff.describe())
 
     lu_mae = pm.mae(preds.lu_pred, preds.rating)
     assert lu_mae == approx(0.73, abs=0.045)
     cd_mae = pm.mae(preds.cd_pred, preds.rating)
     assert cd_mae == approx(0.73, abs=0.045)
 
-    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.lu_pred, df.rating))
+    user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.lu_pred, df.rating))
     assert user_rmse.mean() == approx(0.94, abs=0.05)
-    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.cd_pred, df.rating))
+    user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.cd_pred, df.rating))
     assert user_rmse.mean() == approx(0.94, abs=0.05)
diff --git a/tests/test_als_implicit.py b/tests/test_als_implicit.py
index d72ab6757..94006cb69 100644
--- a/tests/test_als_implicit.py
+++ b/tests/test_als_implicit.py
@@ -20,12 +20,11 @@
 
 _log = logging.getLogger(__name__)
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13]})
+simple_df = pd.DataFrame({"item": [1, 1, 2, 3], "user": [10, 12, 10, 13]})
 
 simple_dfr = simple_df.assign(rating=[4.0, 3.0, 5.0, 2.0])
 
-methods = mark.parametrize('m', ['lu', 'cg'])
+methods = mark.parametrize("m", ["lu", "cg"])
 
 
 @methods
@@ -52,7 +51,7 @@ def test_als_predict_basic():
 
 
 def test_als_predict_basic_for_new_ratings():
-    """ Test ImplicitMF ability to support new ratings """
+    """Test ImplicitMF ability to support new ratings"""
     algo = als.ImplicitMF(20, iterations=10)
     algo.fit(simple_df)
 
@@ -115,7 +114,7 @@ def test_als_predict_for_new_users_with_new_ratings():
         _log.debug("user_features from fit: " + str(algo.user_features_[upos, :]))
 
         # get the user's rating series
-        new_ratings = user_data.set_index('item')['rating'].copy()
+        new_ratings = user_data.set_index("item")["rating"].copy()
         new_preds = algo.predict_for_user(new_u_id, items, new_ratings)
 
         _log.debug("preds: " + str(preds.values))
@@ -151,26 +150,28 @@ def test_als_recs_topn_for_new_users_with_new_ratings(rng):
         recs = rec_algo.recommend(u, 10)
         user_data = ratings[ratings.user == u]
         upos = algo.user_index_.get_loc(u)
-        _log.info('user %s: %s ratings', u, len(user_data))
+        _log.info("user %s: %s ratings", u, len(user_data))
 
         _log.debug("user_features from fit: " + str(algo.user_features_[upos, :]))
 
         # get the user's rating series
-        new_ratings = user_data.set_index('item')['rating'].copy()
+        new_ratings = user_data.set_index("item")["rating"].copy()
         new_recs = rec_algo.recommend(new_u_id, 10, ratings=new_ratings)
 
         # merge new & old recs
-        all_recs = pd.merge(recs.rename(columns={'score': 'old_score'}),
-                            new_recs.rename(columns={'score': 'new_score'}),
-                            how='outer').fillna(-np.inf)
+        all_recs = pd.merge(
+            recs.rename(columns={"score": "old_score"}),
+            new_recs.rename(columns={"score": "new_score"}),
+            how="outer",
+        ).fillna(-np.inf)
 
         tau = stats.kendalltau(all_recs.old_score, all_recs.new_score)
-        _log.info('correlation for user %s: %f', u, tau.correlation)
+        _log.info("correlation for user %s: %f", u, tau.correlation)
         correlations.loc[u] = tau.correlation
 
-    _log.debug('correlations: %s', correlations)
+    _log.debug("correlations: %s", correlations)
 
-    assert not(any(correlations.isnull()))
+    assert not (any(correlations.isnull()))
     assert all(correlations >= 0.5)
 
 
@@ -206,7 +207,7 @@ def test_als_predict_no_user_features_basic():
     preds = algo.predict_for_user(u, items)
 
     user_data = ratings[ratings.user == u]
-    new_ratings = user_data.set_index('item')['rating'].copy()
+    new_ratings = user_data.set_index("item")["rating"].copy()
 
     algo_no_user_features = als.ImplicitMF(5, iterations=10, method="lu", save_user_features=False)
     algo_no_user_features.fit(ratings)
@@ -236,7 +237,7 @@ def test_als_save_load(tmp_path):
     ratings = lktu.ml_test.ratings
     algo.fit(ratings)
 
-    fn = tmp_path / 'model.bpk'
+    fn = tmp_path / "model.bpk"
     binpickle.dump(algo, fn, codec=None)
 
     restored = binpickle.load(fn)
@@ -250,7 +251,7 @@ def test_als_save_load(tmp_path):
 def test_als_train_large_noratings():
     algo = als.ImplicitMF(20, iterations=20)
     ratings = lktu.ml_test.ratings
-    ratings = ratings.loc[:, ['user', 'item']]
+    ratings = ratings.loc[:, ["user", "item"]]
     algo.fit(ratings)
 
     assert len(algo.user_index_) == ratings.user.nunique()
@@ -274,20 +275,20 @@ def test_als_train_large_ratings():
 @lktu.wantjit
 @mark.slow
 def test_als_method_match():
-    lu = als.ImplicitMF(20, iterations=15, method='lu', rng_spec=42)
-    cg = als.ImplicitMF(20, iterations=15, method='cg', rng_spec=42)
+    lu = als.ImplicitMF(20, iterations=15, method="lu", rng_spec=42)
+    cg = als.ImplicitMF(20, iterations=15, method="cg", rng_spec=42)
 
     ratings = lktu.ml_test.ratings
 
     timer = Stopwatch()
     lu.fit(ratings)
     timer.stop()
-    _log.info('fit with LU solver in %s', timer)
+    _log.info("fit with LU solver in %s", timer)
 
     timer = Stopwatch()
     cg.fit(ratings)
     timer.stop()
-    _log.info('fit with CG solver in %s', timer)
+    _log.info("fit with CG solver in %s", timer)
 
     preds = []
 
@@ -298,30 +299,32 @@ def test_als_method_match():
         cd_preds = cg.predict_for_user(u, items)
         diff = lu_preds - cd_preds
         adiff = np.abs(diff)
-        _log.info('user %s diffs: L2 = %f, min = %f, med = %f, max = %f, 90%% = %f', u,
-                  np.linalg.norm(diff, 2),
-                  np.min(adiff), np.median(adiff), np.max(adiff), np.quantile(adiff, 0.9))
-
-        preds.append(pd.DataFrame({
-            'user': u,
-            'item': items,
-            'lu': lu_preds,
-            'cg': cd_preds,
-            'adiff': adiff
-        }))
-        _log.info('user %s tau: %s', u, stats.kendalltau(lu_preds, cd_preds))
+        _log.info(
+            "user %s diffs: L2 = %f, min = %f, med = %f, max = %f, 90%% = %f",
+            u,
+            np.linalg.norm(diff, 2),
+            np.min(adiff),
+            np.median(adiff),
+            np.max(adiff),
+            np.quantile(adiff, 0.9),
+        )
+
+        preds.append(
+            pd.DataFrame({"user": u, "item": items, "lu": lu_preds, "cg": cd_preds, "adiff": adiff})
+        )
+        _log.info("user %s tau: %s", u, stats.kendalltau(lu_preds, cd_preds))
 
     preds = pd.concat(preds, ignore_index=True)
-    _log.info('LU preds:\n%s', preds.lu.describe())
-    _log.info('CD preds:\n%s', preds.cg.describe())
-    _log.info('overall differences:\n%s', preds.adiff.describe())
+    _log.info("LU preds:\n%s", preds.lu.describe())
+    _log.info("CD preds:\n%s", preds.cg.describe())
+    _log.info("overall differences:\n%s", preds.adiff.describe())
     # there are differences. our check: the 90% are reasonable
     assert np.quantile(adiff, 0.9) < 0.5
 
 
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present')
+@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
 def test_als_implicit_batch_accuracy():
     import lenskit.crossfold as xf
     from lenskit import batch
@@ -330,31 +333,31 @@ def test_als_implicit_batch_accuracy():
     ratings = lktu.ml100k.ratings
 
     def eval(train, test):
-        train = train.astype({'rating': np.float_})
-        _log.info('training CG')
-        cg_algo = als.ImplicitMF(25, iterations=20, method='cg')
+        train = train.astype({"rating": np.float_})
+        _log.info("training CG")
+        cg_algo = als.ImplicitMF(25, iterations=20, method="cg")
         cg_algo = Recommender.adapt(cg_algo)
         cg_algo.fit(train)
-        _log.info('training LU')
-        lu_algo = als.ImplicitMF(25, iterations=20, method='lu')
+        _log.info("training LU")
+        lu_algo = als.ImplicitMF(25, iterations=20, method="lu")
         lu_algo = Recommender.adapt(lu_algo)
         lu_algo.fit(train)
         users = test.user.unique()
-        _log.info('testing %d users', len(users))
+        _log.info("testing %d users", len(users))
         cg_recs = batch.recommend(cg_algo, users, 100, n_jobs=2)
         lu_recs = batch.recommend(lu_algo, users, 100, n_jobs=2)
-        return pd.concat({'CG': cg_recs, 'LU': lu_recs}, names=['Method']).reset_index('Method')
+        return pd.concat({"CG": cg_recs, "LU": lu_recs}, names=["Method"]).reset_index("Method")
 
     folds = list(xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))
     test = pd.concat(te for (tr, te) in folds)
     recs = pd.concat((eval(train, test) for (train, test) in folds), ignore_index=True)
 
-    _log.info('analyzing recommendations')
+    _log.info("analyzing recommendations")
     rla = topn.RecListAnalysis()
     rla.add_metric(topn.ndcg)
     results = rla.compute(recs, test)
-    results = results.groupby('Method')['ndcg'].mean()
-    _log.info('LU nDCG for users is %.4f', results.loc['LU'].mean())
-    _log.info('CG nDCG for users is %.4f', results.loc['CG'].mean())
+    results = results.groupby("Method")["ndcg"].mean()
+    _log.info("LU nDCG for users is %.4f", results.loc["LU"].mean())
+    _log.info("CG nDCG for users is %.4f", results.loc["CG"].mean())
     assert all(results > 0.28)
-    assert results.loc['LU'] == approx(results.loc['CG'], rel=0.05)
+    assert results.loc["LU"] == approx(results.loc["CG"], rel=0.05)
diff --git a/tests/test_batch_predict.py b/tests/test_batch_predict.py
index 6daa308bd..e176a6370 100644
--- a/tests/test_batch_predict.py
+++ b/tests/test_batch_predict.py
@@ -12,7 +12,7 @@
 
 _log = logging.getLogger(__name__)
 
-MLB = namedtuple('MLB', ['ratings', 'algo'])
+MLB = namedtuple("MLB", ["ratings", "algo"])
 
 
 @pytest.fixture
@@ -24,12 +24,12 @@ def mlb():
 
 
 def test_predict_single(mlb):
-    tf = pd.DataFrame({'user': [1], 'item': [31]})
+    tf = pd.DataFrame({"user": [1], "item": [31]})
     res = lkb.predict(mlb.algo, tf)
 
     assert len(res) == 1
     assert all(res.user == 1)
-    assert set(res.columns) == set(['user', 'item', 'prediction'])
+    assert set(res.columns) == set(["user", "item", "prediction"])
     assert all(res.item == 31)
 
     expected = mlb.algo.mean_ + mlb.algo.item_offsets_.loc[31] + mlb.algo.user_offsets_.loc[1]
@@ -45,19 +45,19 @@ def test_predict_user(mlb):
     test_unrated = np.random.choice(unrated, 10, replace=False)
     test_items = pd.concat([test_rated, pd.Series(test_unrated)])
 
-    tf = pd.DataFrame({'user': uid, 'item': test_items})
+    tf = pd.DataFrame({"user": uid, "item": test_items})
     res = lkb.predict(mlb.algo, tf)
 
     assert len(res) == 15
-    assert set(res.columns) == set(['user', 'item', 'prediction'])
+    assert set(res.columns) == set(["user", "item", "prediction"])
     assert all(res.user == uid)
     assert set(res.item) == set(test_items)
 
     # did we get the right predictions?
-    preds = res.set_index(['user', 'item'])
-    preds['rating'] = mlb.algo.mean_
-    preds['rating'] += mlb.algo.item_offsets_
-    preds['rating'] += mlb.algo.user_offsets_.loc[uid]
+    preds = res.set_index(["user", "item"])
+    preds["rating"] = mlb.algo.mean_
+    preds["rating"] += mlb.algo.item_offsets_
+    preds["rating"] += mlb.algo.user_offsets_.loc[uid]
     assert preds.prediction.values == pytest.approx(preds.rating.values)
 
 
@@ -66,17 +66,17 @@ def test_predict_two_users(mlb):
     tf = None
     # make sure we get both UIDs
     while tf is None or len(set(tf.user)) < 2:
-        tf = mlb.ratings[mlb.ratings.user.isin(uids)].loc[:, ('user', 'item')].sample(10)
+        tf = mlb.ratings[mlb.ratings.user.isin(uids)].loc[:, ("user", "item")].sample(10)
 
     res = lkb.predict(mlb.algo, tf)
 
     assert len(res) == 10
     assert set(res.user) == set(uids)
 
-    preds = res.set_index(['user', 'item'])
-    preds['rating'] = mlb.algo.mean_
-    preds['rating'] += mlb.algo.item_offsets_
-    preds['rating'] += mlb.algo.user_offsets_
+    preds = res.set_index(["user", "item"])
+    preds["rating"] = mlb.algo.mean_
+    preds["rating"] += mlb.algo.item_offsets_
+    preds["rating"] += mlb.algo.user_offsets_
     assert preds.prediction.values == pytest.approx(preds.rating.values)
 
 
@@ -85,26 +85,26 @@ def test_predict_include_rating(mlb):
     tf = None
     # make sure we get both UIDs
     while tf is None or len(set(tf.user)) < 2:
-        tf = mlb.ratings[mlb.ratings.user.isin(uids)].loc[:, ('user', 'item', 'rating')].sample(10)
+        tf = mlb.ratings[mlb.ratings.user.isin(uids)].loc[:, ("user", "item", "rating")].sample(10)
 
     res = lkb.predict(mlb.algo, tf)
 
     assert len(res) == 10
     assert set(res.user) == set(uids)
 
-    preds = res.set_index(['user', 'item'])
-    preds['expected'] = mlb.algo.mean_
-    preds['expected'] += mlb.algo.item_offsets_
-    preds['expected'] += mlb.algo.user_offsets_
+    preds = res.set_index(["user", "item"])
+    preds["expected"] = mlb.algo.mean_
+    preds["expected"] += mlb.algo.item_offsets_
+    preds["expected"] += mlb.algo.user_offsets_
     assert preds.prediction.values == pytest.approx(preds.expected.values)
 
-    urv = mlb.ratings.set_index(['user', 'item'])
+    urv = mlb.ratings.set_index(["user", "item"])
     assert all(preds.rating.values == urv.loc[preds.index, :].rating.values)
 
 
-@pytest.mark.skipif(not lktu.ml100k.available, reason='ML-100K required')
+@pytest.mark.skipif(not lktu.ml100k.available, reason="ML-100K required")
 @pytest.mark.eval
-@pytest.mark.parametrize('ncpus', [None, 1, 2])
+@pytest.mark.parametrize("ncpus", [None, 1, 2])
 def test_bias_batch_predict(ncpus):
     from lenskit.algorithms import bias
     import lenskit.crossfold as xf
@@ -116,19 +116,19 @@ def test_bias_batch_predict(ncpus):
     algo = bias.Bias(damping=5)
 
     def eval(train, test):
-        _log.info('running training')
+        _log.info("running training")
         algo.fit(train)
-        _log.info('testing %d users', test.user.nunique())
+        _log.info("testing %d users", test.user.nunique())
         recs = batch.predict(algo, test, n_jobs=ncpus)
         return recs
 
-    preds = pd.concat((eval(train, test)
-                       for (train, test)
-                       in xf.partition_users(ratings, 5, xf.SampleFrac(0.2))))
+    preds = pd.concat(
+        (eval(train, test) for (train, test) in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))
+    )
 
-    _log.info('analyzing predictions')
+    _log.info("analyzing predictions")
     rmse = pm.rmse(preds.prediction, preds.rating)
-    _log.info('RMSE is %f', rmse)
+    _log.info("RMSE is %f", rmse)
     assert rmse == pytest.approx(0.95, abs=0.1)
 
 
@@ -144,4 +144,4 @@ def test_batch_predict_preshared():
     ares = lkb.train_isolated(algo, train)
     preds = lkb.predict(ares, test)
     assert len(preds) == len(test)
-    assert not any(preds['prediction'].isna())
+    assert not any(preds["prediction"].isna())
diff --git a/tests/test_batch_recommend.py b/tests/test_batch_recommend.py
index 92ad52e6a..2d8a3d08b 100644
--- a/tests/test_batch_recommend.py
+++ b/tests/test_batch_recommend.py
@@ -12,7 +12,7 @@
 from lenskit import batch, topn
 import lenskit.crossfold as xf
 
-MLB = namedtuple('MLB', ['ratings', 'algo'])
+MLB = namedtuple("MLB", ["ratings", "algo"])
 _log = logging.getLogger(__name__)
 
 
@@ -32,33 +32,32 @@ def __init__(self, ratings):
         self.isolate = False
 
     def evaluate(self, algo, train, test, **kwargs):
-        _log.info('running training')
+        _log.info("running training")
         if self.isolate:
             algo = batch.train_isolated(algo, train)
         else:
             algo.fit(train)
-        _log.info('testing %d users', test.user.nunique())
+        _log.info("testing %d users", test.user.nunique())
         recs = batch.recommend(algo, test.user.unique(), 100, **kwargs)
         return recs
 
     def eval_all(self, algo, **kwargs):
-        return pd.concat(self.evaluate(algo, train, test, **kwargs)
-                         for (train, test) in self.folds)
+        return pd.concat(self.evaluate(algo, train, test, **kwargs) for (train, test) in self.folds)
 
     def check_positive_ndcg(self, recs):
-        _log.info('analyzing recommendations')
+        _log.info("analyzing recommendations")
         rla = topn.RecListAnalysis()
         rla.add_metric(topn.ndcg)
         results = rla.compute(recs, self.test)
         dcg = results.ndcg
-        _log.info('nDCG for %d users is %f (max=%f)', len(dcg), dcg.mean(), dcg.max())
+        _log.info("nDCG for %d users is %f (max=%f)", len(dcg), dcg.mean(), dcg.max())
         assert dcg.mean() > 0
 
 
 @pytest.fixture
 def ml_folds() -> MLFolds:
     if not lktu.ml100k.available:
-        raise pytest.skip('ML-100K not available')
+        raise pytest.skip("ML-100K not available")
     ratings = lktu.ml100k.ratings
     return MLFolds(ratings)
 
@@ -67,9 +66,9 @@ def test_recommend_single(mlb):
     res = batch.recommend(mlb.algo, [1], None, {1: [31]})
 
     assert len(res) == 1
-    assert all(res['user'] == 1)
-    assert all(res['rank'] == 1)
-    assert set(res.columns) == set(['user', 'rank', 'item', 'score'])
+    assert all(res["user"] == 1)
+    assert all(res["rank"] == 1)
+    assert set(res.columns) == set(["user", "rank", "item", "score"])
 
     algo = mlb.algo.predictor
     expected = algo.mean_ + algo.item_offsets_.loc[31] + algo.user_offsets_.loc[1]
@@ -87,9 +86,9 @@ def candidates(user):
     res = batch.recommend(mlb.algo, [5], 10, candidates)
 
     assert len(res) == 10
-    assert set(res.columns) == set(['user', 'rank', 'item', 'score'])
-    assert all(res['user'] == uid)
-    assert all(res['rank'] == np.arange(10) + 1)
+    assert set(res.columns) == set(["user", "rank", "item", "score"])
+    assert all(res["user"] == uid)
+    assert all(res["rank"] == np.arange(10) + 1)
     # they should be in decreasing order
     assert all(np.diff(res.score) <= 0)
 
@@ -105,12 +104,12 @@ def candidates(user):
 
     assert len(res) == 20
     assert set(res.user) == set([5, 10])
-    assert all(res.groupby('user').item.count() == 10)
-    assert all(res.groupby('user')['rank'].max() == 10)
+    assert all(res.groupby("user").item.count() == 10)
+    assert all(res.groupby("user")["rank"].max() == 10)
     assert all(np.diff(res[res.user == 5].score) <= 0)
-    assert all(np.diff(res[res.user == 5]['rank']) == 1)
+    assert all(np.diff(res[res.user == 5]["rank"]) == 1)
     assert all(np.diff(res[res.user == 10].score) <= 0)
-    assert all(np.diff(res[res.user == 10]['rank']) == 1)
+    assert all(np.diff(res[res.user == 10]["rank"]) == 1)
 
 
 def test_recommend_no_cands(mlb):
@@ -118,19 +117,19 @@ def test_recommend_no_cands(mlb):
 
     assert len(res) == 20
     assert set(res.user) == set([5, 10])
-    assert all(res.groupby('user').item.count() == 10)
-    assert all(res.groupby('user')['rank'].max() == 10)
+    assert all(res.groupby("user").item.count() == 10)
+    assert all(res.groupby("user")["rank"].max() == 10)
     assert all(np.diff(res[res.user == 5].score) <= 0)
-    assert all(np.diff(res[res.user == 5]['rank']) == 1)
+    assert all(np.diff(res[res.user == 5]["rank"]) == 1)
     assert all(np.diff(res[res.user == 10].score) <= 0)
-    assert all(np.diff(res[res.user == 10]['rank']) == 1)
+    assert all(np.diff(res[res.user == 10]["rank"]) == 1)
 
-    idx_rates = mlb.ratings.set_index(['user', 'item'])
-    merged = res.join(idx_rates, on=['user', 'item'], how='inner')
+    idx_rates = mlb.ratings.set_index(["user", "item"])
+    merged = res.join(idx_rates, on=["user", "item"], how="inner")
     assert len(merged) == 0
 
 
-@pytest.mark.parametrize(('ncpus', 'isolate'), [(None, False), (1, False), (2, True)])
+@pytest.mark.parametrize(("ncpus", "isolate"), [(None, False), (1, False), (2, True)])
 @pytest.mark.eval
 def test_bias_batch_recommend(ml_folds: MLFolds, ncpus, isolate):
     algo = Bias(damping=5)
@@ -142,7 +141,7 @@ def test_bias_batch_recommend(ml_folds: MLFolds, ncpus, isolate):
     ml_folds.check_positive_ndcg(recs)
 
 
-@pytest.mark.parametrize('ncpus', [None, 1, 2])
+@pytest.mark.parametrize("ncpus", [None, 1, 2])
 @pytest.mark.eval
 def test_pop_batch_recommend(ml_folds: MLFolds, ncpus):
     algo = Popular()
diff --git a/tests/test_batch_train.py b/tests/test_batch_train.py
index 1f79cc841..fbd1a4ca5 100644
--- a/tests/test_batch_train.py
+++ b/tests/test_batch_train.py
@@ -21,7 +21,7 @@ def test_train_isolate():
 
 
 def test_train_isolate_file(tmp_path):
-    fn = tmp_path / 'saved.bpk'
+    fn = tmp_path / "saved.bpk"
     algo = Bias()
     algo = Recommender.adapt(algo)
 
diff --git a/tests/test_bias.py b/tests/test_bias.py
index db459427d..275c71c26 100644
--- a/tests/test_bias.py
+++ b/tests/test_bias.py
@@ -14,9 +14,9 @@
 
 _log = logging.getLogger(__name__)
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13],
-                          'rating': [4.0, 3.0, 5.0, 2.0]})
+simple_df = pd.DataFrame(
+    {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]}
+)
 
 
 def test_bias_check_arguments():
@@ -39,12 +39,12 @@ def test_bias_full():
     assert algo.mean_ == approx(3.5)
 
     assert algo.item_offsets_ is not None
-    assert algo.item_offsets_.index.name == 'item'
+    assert algo.item_offsets_.index.name == "item"
     assert set(algo.item_offsets_.index) == set([1, 2, 3])
     assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 1.5, -1.5]))
 
     assert algo.user_offsets_ is not None
-    assert algo.user_offsets_.index.name == 'user'
+    assert algo.user_offsets_.index.name == "user"
     assert set(algo.user_offsets_.index) == set([10, 12, 13])
     assert algo.user_offsets_.loc[[10, 12, 13]].values == approx(np.array([0.25, -0.5, 0]))
 
@@ -54,13 +54,13 @@ def test_bias_clone():
     algo.fit(simple_df)
 
     params = algo.get_params()
-    assert sorted(params.keys()) == ['damping', 'items', 'users']
+    assert sorted(params.keys()) == ["damping", "items", "users"]
 
     a2 = lku.clone(algo)
     assert a2 is not algo
-    assert getattr(a2, 'mean_', None) is None
-    assert getattr(a2, 'item_offsets_', None) is None
-    assert getattr(a2, 'user_offsets_', None) is None
+    assert getattr(a2, "mean_", None) is None
+    assert getattr(a2, "item_offsets_", None) is None
+    assert getattr(a2, "user_offsets_", None) is None
 
 
 def test_bias_global_only():
@@ -77,7 +77,7 @@ def test_bias_no_user():
     assert algo.mean_ == approx(3.5)
 
     assert algo.item_offsets_ is not None
-    assert algo.item_offsets_.index.name == 'item'
+    assert algo.item_offsets_.index.name == "item"
     assert set(algo.item_offsets_.index) == set([1, 2, 3])
     assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 1.5, -1.5]))
 
@@ -91,7 +91,7 @@ def test_bias_no_item():
     assert algo.item_offsets_ is None
 
     assert algo.user_offsets_ is not None
-    assert algo.user_offsets_.index.name == 'user'
+    assert algo.user_offsets_.index.name == "user"
     assert set(algo.user_offsets_.index) == set([10, 12, 13])
     assert algo.user_offsets_.loc[[10, 12, 13]].values == approx(np.array([1.0, -0.5, -1.5]))
 
@@ -99,8 +99,8 @@ def test_bias_no_item():
 def test_bias_index_props():
     algo = Bias()
     algo.fit(simple_df)
-    assert all(np.sort(algo.user_index) == np.unique(simple_df['user']))
-    assert all(np.sort(algo.item_index) == np.unique(simple_df['item']))
+    assert all(np.sort(algo.user_index) == np.unique(simple_df["user"]))
+    assert all(np.sort(algo.item_index) == np.unique(simple_df["item"]))
 
 
 def test_bias_global_predict():
@@ -140,13 +140,13 @@ def test_bias_new_user_predict():
     algo = Bias()
     algo.fit(simple_df)
 
-    ratings = pd.DataFrame({'item': [1, 2, 3], 'rating': [1.5, 2.5, 3.5]})
-    ratings = ratings.set_index('item').rating
+    ratings = pd.DataFrame({"item": [1, 2, 3], "rating": [1.5, 2.5, 3.5]})
+    ratings = ratings.set_index("item").rating
     p = algo.predict_for_user(None, [1, 3], ratings=ratings)
 
     offs = ratings - algo.mean_ - algo.item_offsets_
     umean = offs.mean()
-    _log.info('user mean is %f', umean)
+    _log.info("user mean is %f", umean)
 
     assert len(p) == 2
     assert p.values == approx((algo.mean_ + algo.item_offsets_ + umean).loc[[1, 3]].values)
@@ -180,12 +180,12 @@ def test_bias_train_ml_ratings():
     algo.fit(ratings)
 
     assert algo.mean_ == approx(ratings.rating.mean())
-    imeans_data = ratings.groupby('item').rating.mean()
+    imeans_data = ratings.groupby("item").rating.mean()
     imeans_algo = algo.item_offsets_ + algo.mean_
     ares, data = imeans_algo.align(imeans_data)
     assert ares.values == approx(data.values)
 
-    urates = ratings.set_index('user').loc[2].set_index('item').rating
+    urates = ratings.set_index("user").loc[2].set_index("item").rating
     umean = (urates - imeans_data[urates.index]).mean()
     p = algo.predict_for_user(2, [10, 11, -1])
     assert len(p) == 3
@@ -200,15 +200,15 @@ def test_bias_transform():
 
     normed = algo.fit_transform(ratings)
 
-    assert all(normed['user'] == ratings['user'])
-    assert all(normed['item'] == ratings['item'])
+    assert all(normed["user"] == ratings["user"])
+    assert all(normed["item"] == ratings["item"])
     denorm = algo.inverse_transform(normed)
-    assert denorm['rating'].values == approx(ratings['rating'], 1.0e-6)
+    assert denorm["rating"].values == approx(ratings["rating"], 1.0e-6)
 
-    n2 = ratings.join(algo.item_offsets_, on='item')
-    n2 = n2.join(algo.user_offsets_, on='user')
+    n2 = ratings.join(algo.item_offsets_, on="item")
+    n2 = n2.join(algo.user_offsets_, on="user")
     nr = n2.rating - algo.mean_ - n2.i_off - n2.u_off
-    assert normed['rating'].values == approx(nr.values)
+    assert normed["rating"].values == approx(nr.values)
 
 
 def test_bias_transform_indexes():
@@ -217,35 +217,35 @@ def test_bias_transform_indexes():
 
     normed = algo.fit_transform(ratings, indexes=True)
 
-    assert all(normed['user'] == ratings['user'])
-    assert all(normed['item'] == ratings['item'])
-    assert all(normed['uidx'] == algo.user_offsets_.index.get_indexer(ratings['user']))
-    assert all(normed['iidx'] == algo.item_offsets_.index.get_indexer(ratings['item']))
+    assert all(normed["user"] == ratings["user"])
+    assert all(normed["item"] == ratings["item"])
+    assert all(normed["uidx"] == algo.user_offsets_.index.get_indexer(ratings["user"]))
+    assert all(normed["iidx"] == algo.item_offsets_.index.get_indexer(ratings["item"]))
     denorm = algo.inverse_transform(normed)
-    assert denorm['rating'].values == approx(ratings['rating'].values, 1.0e-6)
+    assert denorm["rating"].values == approx(ratings["rating"].values, 1.0e-6)
 
 
-@mark.parametrize(['users', 'items'], [(True, False), (False, True), (False, False)])
+@mark.parametrize(["users", "items"], [(True, False), (False, True), (False, False)])
 def test_bias_transform_disable(users, items):
     algo = Bias(users=users, items=items)
     ratings = ml_test.ratings
 
     normed = algo.fit_transform(ratings)
 
-    assert all(normed['user'] == ratings['user'])
-    assert all(normed['item'] == ratings['item'])
+    assert all(normed["user"] == ratings["user"])
+    assert all(normed["item"] == ratings["item"])
     denorm = algo.inverse_transform(normed)
-    assert denorm['rating'].values == approx(ratings['rating'], 1.0e-6)
+    assert denorm["rating"].values == approx(ratings["rating"], 1.0e-6)
 
     n2 = ratings
     nr = n2.rating - algo.mean_
     if items:
-        n2 = n2.join(algo.item_offsets_, on='item')
+        n2 = n2.join(algo.item_offsets_, on="item")
         nr = nr - n2.i_off
     if users:
-        n2 = n2.join(algo.user_offsets_, on='user')
+        n2 = n2.join(algo.user_offsets_, on="user")
         nr = nr - n2.u_off
-    assert normed['rating'].values == approx(nr.values)
+    assert normed["rating"].values == approx(nr.values)
 
 
 def test_bias_item_damp():
@@ -254,7 +254,7 @@ def test_bias_item_damp():
     assert algo.mean_ == approx(3.5)
 
     assert algo.item_offsets_ is not None
-    assert algo.item_offsets_.index.name == 'item'
+    assert algo.item_offsets_.index.name == "item"
     assert set(algo.item_offsets_.index) == set([1, 2, 3])
     assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 0.25, -0.25]))
 
@@ -268,10 +268,11 @@ def test_bias_user_damp():
     assert algo.item_offsets_ is None
 
     assert algo.user_offsets_ is not None
-    assert algo.user_offsets_.index.name == 'user'
+    assert algo.user_offsets_.index.name == "user"
     assert set(algo.user_offsets_.index) == set([10, 12, 13])
-    assert algo.user_offsets_.loc[[10, 12, 13]].values == \
-        approx(np.array([0.2857, -0.08333, -0.25]), abs=1.0e-4)
+    assert algo.user_offsets_.loc[[10, 12, 13]].values == approx(
+        np.array([0.2857, -0.08333, -0.25]), abs=1.0e-4
+    )
 
 
 def test_bias_damped():
@@ -280,15 +281,16 @@ def test_bias_damped():
     assert algo.mean_ == approx(3.5)
 
     assert algo.item_offsets_ is not None
-    assert algo.item_offsets_.index.name == 'item'
+    assert algo.item_offsets_.index.name == "item"
     assert set(algo.item_offsets_.index) == set([1, 2, 3])
     assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 0.25, -0.25]))
 
     assert algo.user_offsets_ is not None
-    assert algo.user_offsets_.index.name == 'user'
+    assert algo.user_offsets_.index.name == "user"
     assert set(algo.user_offsets_.index) == set([10, 12, 13])
-    assert algo.user_offsets_.loc[[10, 12, 13]].values == \
-        approx(np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4)
+    assert algo.user_offsets_.loc[[10, 12, 13]].values == approx(
+        np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4
+    )
 
 
 def test_bias_separate_damping():
@@ -297,64 +299,76 @@ def test_bias_separate_damping():
     assert algo.mean_ == approx(3.5)
 
     assert algo.item_offsets_ is not None
-    assert algo.item_offsets_.index.name == 'item'
+    assert algo.item_offsets_.index.name == "item"
     assert set(algo.item_offsets_.index) == set([1, 2, 3])
-    assert algo.item_offsets_.loc[1:3].values == \
-        approx(np.array([0, 0.136364, -0.13636]), abs=1.0e-4)
+    assert algo.item_offsets_.loc[1:3].values == approx(
+        np.array([0, 0.136364, -0.13636]), abs=1.0e-4
+    )
 
     assert algo.user_offsets_ is not None
-    assert algo.user_offsets_.index.name == 'user'
+    assert algo.user_offsets_.index.name == "user"
     assert set(algo.user_offsets_.index) == set([10, 12, 13])
-    assert algo.user_offsets_.loc[[10, 12, 13]].values == \
-        approx(np.array([0.266234, -0.08333, -0.22727]), abs=1.0e-4)
+    assert algo.user_offsets_.loc[[10, 12, 13]].values == approx(
+        np.array([0.266234, -0.08333, -0.22727]), abs=1.0e-4
+    )
+
 
 def test_transform_user_with_user_bias():
     algo = Bias()
     algo.fit(simple_df)
 
-    new_ratings = pd.Series([4.0, 5.0], index=[1, 2]) # items as index and ratings as values
+    new_ratings = pd.Series([4.0, 5.0], index=[1, 2])  # items as index and ratings as values
 
-    ratings_with_bias, user_bias = algo.transform_user(new_ratings) # user: 13
+    ratings_with_bias, user_bias = algo.transform_user(new_ratings)  # user: 13
     result = algo.inverse_transform_user(13, ratings_with_bias, user_bias)
 
     assert new_ratings[1] == result[1]
     assert new_ratings[2] == result[2]
 
+
 def test_transform_user_without_user_bias():
     user = 12
     algo = Bias()
     algo.fit(simple_df)
 
-    new_ratings = pd.Series([-0.5, 1.5], index=[2, 3]) # items as index and ratings as values
+    new_ratings = pd.Series([-0.5, 1.5], index=[2, 3])  # items as index and ratings as values
 
     v = algo.inverse_transform_user(user, new_ratings)
 
-    assert v[2] == new_ratings[2] + algo.user_offsets_.loc[user] + algo.item_offsets_.loc[2] + algo.mean_
-    assert v[3] == new_ratings[3] + algo.user_offsets_.loc[user] + algo.item_offsets_.loc[3] + algo.mean_
+    assert (
+        v[2]
+        == new_ratings[2] + algo.user_offsets_.loc[user] + algo.item_offsets_.loc[2] + algo.mean_
+    )
+    assert (
+        v[3]
+        == new_ratings[3] + algo.user_offsets_.loc[user] + algo.item_offsets_.loc[3] + algo.mean_
+    )
+
 
 def test_bias_save():
     original = Bias(damping=5)
     original.fit(simple_df)
     assert original.mean_ == approx(3.5)
 
-    _log.info('saving baseline model')
+    _log.info("saving baseline model")
     mod = pickle.dumps(original)
-    _log.info('serialized to %d bytes', len(mod))
+    _log.info("serialized to %d bytes", len(mod))
 
     algo = pickle.loads(mod)
 
     assert algo.mean_ == original.mean_
 
     assert algo.item_offsets_ is not None
-    assert algo.item_offsets_.index.name == 'item'
+    assert algo.item_offsets_.index.name == "item"
     assert set(algo.item_offsets_.index) == set([1, 2, 3])
     assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 0.25, -0.25]))
 
     assert algo.user_offsets_ is not None
-    assert algo.user_offsets_.index.name == 'user'
+    assert algo.user_offsets_.index.name == "user"
     assert set(algo.user_offsets_.index) == set([10, 12, 13])
-    assert algo.user_offsets_.loc[[10, 12, 13]].values == \
-        approx(np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4)
+    assert algo.user_offsets_.loc[[10, 12, 13]].values == approx(
+        np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4
+    )
 
 
 def test_bias_binpickle(tmp_path):
@@ -362,20 +376,21 @@ def test_bias_binpickle(tmp_path):
     original.fit(simple_df)
     assert original.mean_ == approx(3.5)
 
-    _log.info('saving baseline model')
-    fn = tmp_path / 'bias.bpk'
+    _log.info("saving baseline model")
+    fn = tmp_path / "bias.bpk"
     binpickle.dump(original, fn)
     algo = binpickle.load(fn)
 
     assert algo.mean_ == original.mean_
 
     assert algo.item_offsets_ is not None
-    assert algo.item_offsets_.index.name == 'item'
+    assert algo.item_offsets_.index.name == "item"
     assert set(algo.item_offsets_.index) == set([1, 2, 3])
     assert algo.item_offsets_.loc[1:3].values == approx(np.array([0, 0.25, -0.25]))
 
     assert algo.user_offsets_ is not None
-    assert algo.user_offsets_.index.name == 'user'
+    assert algo.user_offsets_.index.name == "user"
     assert set(algo.user_offsets_.index) == set([10, 12, 13])
-    assert algo.user_offsets_.loc[[10, 12, 13]].values == \
-        approx(np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4)
+    assert algo.user_offsets_.loc[[10, 12, 13]].values == approx(
+        np.array([0.25, -00.08333, -0.20833]), abs=1.0e-4
+    )
diff --git a/tests/test_candidate_selector.py b/tests/test_candidate_selector.py
index e5eae1615..0d614a0b2 100644
--- a/tests/test_candidate_selector.py
+++ b/tests/test_candidate_selector.py
@@ -4,9 +4,9 @@
 import pandas as pd
 import numpy as np
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13],
-                          'rating': [4.0, 3.0, 5.0, 2.0]})
+simple_df = pd.DataFrame(
+    {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]}
+)
 
 
 def test_empty():
@@ -45,7 +45,7 @@ def test_unrated_big():
     ratings = lktu.ml_test.ratings
     users = ratings.user.unique()
     items = ratings.item.unique()
-    user_items = ratings.set_index('user').item
+    user_items = ratings.set_index("user").item
 
     sel = basic.UnratedItemCandidateSelector()
     s2 = sel.fit(ratings)
diff --git a/tests/test_crossfold.py b/tests/test_crossfold.py
index 04efadb53..aa660f2c3 100644
--- a/tests/test_crossfold.py
+++ b/tests/test_crossfold.py
@@ -19,8 +19,8 @@ def test_partition_rows():
     for s in splits:
         assert len(s.test) + len(s.train) == len(ratings)
         assert all(s.test.index.union(s.train.index) == ratings.index)
-        test_idx = s.test.set_index(['user', 'item']).index
-        train_idx = s.train.set_index(['user', 'item']).index
+        test_idx = s.test.set_index(["user", "item"]).index
+        train_idx = s.train.set_index(["user", "item"]).index
         assert len(test_idx.intersection(train_idx)) == 0
 
     # we should partition!
@@ -28,8 +28,8 @@ def test_partition_rows():
         if s1 is s2:
             continue
 
-        i1 = s1.test.set_index(['user', 'item']).index
-        i2 = s2.test.set_index(['user', 'item']).index
+        i1 = s1.test.set_index(["user", "item"]).index
+        i2 = s2.test.set_index(["user", "item"]).index
         inter = i1.intersection(i2)
         assert len(inter) == 0
 
@@ -46,16 +46,16 @@ def test_sample_rows():
     for s in splits:
         assert len(s.test) == 1000
         assert len(s.test) + len(s.train) == len(ratings)
-        test_idx = s.test.set_index(['user', 'item']).index
-        train_idx = s.train.set_index(['user', 'item']).index
+        test_idx = s.test.set_index(["user", "item"]).index
+        train_idx = s.train.set_index(["user", "item"]).index
         assert len(test_idx.intersection(train_idx)) == 0
 
     for s1, s2 in it.product(splits, splits):
         if s1 is s2:
             continue
 
-        i1 = s1.test.set_index(['user', 'item']).index
-        i2 = s2.test.set_index(['user', 'item']).index
+        i1 = s1.test.set_index(["user", "item"]).index
+        i2 = s2.test.set_index(["user", "item"]).index
         inter = i1.intersection(i2)
         assert len(inter) == 0
 
@@ -69,16 +69,16 @@ def test_sample_rows_more_smaller_parts():
     for s in splits:
         assert len(s.test) == 500
         assert len(s.test) + len(s.train) == len(ratings)
-        test_idx = s.test.set_index(['user', 'item']).index
-        train_idx = s.train.set_index(['user', 'item']).index
+        test_idx = s.test.set_index(["user", "item"]).index
+        train_idx = s.train.set_index(["user", "item"]).index
         assert len(test_idx.intersection(train_idx)) == 0
 
     for s1, s2 in it.product(splits, splits):
         if s1 is s2:
             continue
 
-        i1 = s1.test.set_index(['user', 'item']).index
-        i2 = s2.test.set_index(['user', 'item']).index
+        i1 = s1.test.set_index(["user", "item"]).index
+        i2 = s2.test.set_index(["user", "item"]).index
         inter = i1.intersection(i2)
         assert len(inter) == 0
 
@@ -92,13 +92,15 @@ def test_sample_non_disjoint():
     for s in splits:
         assert len(s.test) == 1000
         assert len(s.test) + len(s.train) == len(ratings)
-        test_idx = s.test.set_index(['user', 'item']).index
-        train_idx = s.train.set_index(['user', 'item']).index
+        test_idx = s.test.set_index(["user", "item"]).index
+        train_idx = s.train.set_index(["user", "item"]).index
         assert len(test_idx.intersection(train_idx)) == 0
 
     # There are enough splits & items we should pick at least one duplicate
-    ipairs = ((s1.test.set_index('user', 'item').index, s2.test.set_index('user', 'item').index)
-              for (s1, s2) in it.product(splits, splits))
+    ipairs = (
+        (s1.test.set_index("user", "item").index, s2.test.set_index("user", "item").index)
+        for (s1, s2) in it.product(splits, splits)
+    )
     isizes = [len(i1.intersection(i2)) for (i1, i2) in ipairs]
     assert any(n > 0 for n in isizes)
 
@@ -113,8 +115,8 @@ def test_sample_oversize():
     for s in splits:
         assert len(s.test) + len(s.train) == len(ratings)
         assert all(s.test.index.union(s.train.index) == ratings.index)
-        test_idx = s.test.set_index(['user', 'item']).index
-        train_idx = s.train.set_index(['user', 'item']).index
+        test_idx = s.test.set_index(["user", "item"]).index
+        train_idx = s.train.set_index(["user", "item"]).index
         assert len(test_idx.intersection(train_idx)) == 0
 
 
@@ -190,7 +192,7 @@ def test_last_frac():
     ratings = lktu.ml_test.ratings
     users = np.random.choice(ratings.user.unique(), 5, replace=False)
 
-    samp = xf.LastFrac(0.2, 'timestamp')
+    samp = xf.LastFrac(0.2, "timestamp")
     for u in users:
         udf = ratings[ratings.user == u]
         tst = samp(udf)
@@ -200,7 +202,7 @@ def test_last_frac():
         assert len(tst) <= math.ceil(len(udf) * 0.2)
         assert tst.timestamp.min() >= trn.timestamp.max()
 
-    samp = xf.LastFrac(0.5, 'timestamp')
+    samp = xf.LastFrac(0.5, "timestamp")
     for u in users:
         udf = ratings[ratings.user == u]
         tst = samp(udf)
@@ -218,14 +220,13 @@ def test_partition_users():
     assert len(splits) == 5
 
     for s in splits:
-        ucounts = s.test.groupby('user').agg('count')
+        ucounts = s.test.groupby("user").agg("count")
         assert all(ucounts == 5)
         assert all(s.test.index.union(s.train.index) == ratings.index)
-        assert all(s.train['user'].isin(s.train['user'].unique()))
+        assert all(s.train["user"].isin(s.train["user"].unique()))
         assert len(s.test) + len(s.train) == len(ratings)
 
-    users = ft.reduce(lambda us1, us2: us1 | us2,
-                      (set(s.test.user) for s in splits))
+    users = ft.reduce(lambda us1, us2: us1 | us2, (set(s.test.user) for s in splits))
     assert len(users) == ratings.user.nunique()
     assert users == set(ratings.user)
 
@@ -235,9 +236,9 @@ def test_partition_may_skip_train():
     ratings = lktu.ml_test.ratings
     # make a data set where some users only have 1 rating
     ratings = ratings.sample(frac=0.1)
-    users = ratings.groupby('user')['rating'].count()
+    users = ratings.groupby("user")["rating"].count()
     assert users.min() == 1.0  # we should have some small users!
-    users.name = 'ur_count'
+    users.name = "ur_count"
 
     splits = xf.partition_users(ratings, 5, xf.SampleN(1))
     splits = list(splits)
@@ -246,12 +247,12 @@ def test_partition_may_skip_train():
     # now we go make sure we're missing some users! And don't have any NaN ratings
     for train, test in splits:
         # no null ratings
-        assert all(train['rating'].notna())
+        assert all(train["rating"].notna())
         # see if test users with 1 rating are missing from train
-        test = test.join(users, on='user')
-        assert all(~(test.loc[test['ur_count'] == 1, 'user'].isin(train['user'].unique())))
+        test = test.join(users, on="user")
+        assert all(~(test.loc[test["ur_count"] == 1, "user"].isin(train["user"].unique())))
         # and users with more than one rating are in train
-        assert all(test.loc[test['ur_count'] > 1, 'user'].isin(train['user'].unique()))
+        assert all(test.loc[test["ur_count"] > 1, "user"].isin(train["user"].unique()))
 
 
 def test_partition_users_frac():
@@ -259,19 +260,18 @@ def test_partition_users_frac():
     splits = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
     splits = list(splits)
     assert len(splits) == 5
-    ucounts = ratings.groupby('user').item.count()
+    ucounts = ratings.groupby("user").item.count()
     uss = ucounts * 0.2
 
     for s in splits:
-        tucs = s.test.groupby('user').item.count()
+        tucs = s.test.groupby("user").item.count()
         assert all(tucs >= uss.loc[tucs.index] - 1)
         assert all(tucs <= uss.loc[tucs.index] + 1)
         assert all(s.test.index.union(s.train.index) == ratings.index)
         assert len(s.test) + len(s.train) == len(ratings)
 
     # we have all users
-    users = ft.reduce(lambda us1, us2: us1 | us2,
-                      (set(s.test.user) for s in splits))
+    users = ft.reduce(lambda us1, us2: us1 | us2, (set(s.test.user) for s in splits))
     assert len(users) == ratings.user.nunique()
     assert users == set(ratings.user)
 
@@ -283,7 +283,7 @@ def test_sample_users():
     assert len(splits) == 5
 
     for s in splits:
-        ucounts = s.test.groupby('user').agg('count')
+        ucounts = s.test.groupby("user").agg("count")
         assert len(s.test) == 5 * 100
         assert len(ucounts) == 100
         assert all(ucounts == 5)
@@ -304,11 +304,11 @@ def test_sample_users_frac():
     splits = xf.sample_users(ratings, 5, 100, xf.SampleFrac(0.2))
     splits = list(splits)
     assert len(splits) == 5
-    ucounts = ratings.groupby('user').item.count()
+    ucounts = ratings.groupby("user").item.count()
     uss = ucounts * 0.2
 
     for s in splits:
-        tucs = s.test.groupby('user').item.count()
+        tucs = s.test.groupby("user").item.count()
         assert len(tucs) == 100
         assert all(tucs >= uss.loc[tucs.index] - 1)
         assert all(tucs <= uss.loc[tucs.index] + 1)
@@ -332,14 +332,13 @@ def test_sample_users_frac_oversize():
     assert len(splits) == 20
 
     for s in splits:
-        ucounts = s.test.groupby('user').agg('count')
+        ucounts = s.test.groupby("user").agg("count")
         assert len(ucounts) < 100
         assert all(ucounts == 5)
         assert all(s.test.index.union(s.train.index) == ratings.index)
         assert len(s.test) + len(s.train) == len(ratings)
 
-    users = ft.reduce(lambda us1, us2: us1 | us2,
-                      (set(s.test.user) for s in splits))
+    users = ft.reduce(lambda us1, us2: us1 | us2, (set(s.test.user) for s in splits))
     assert len(users) == ratings.user.nunique()
     assert users == set(ratings.user)
     for s1, s2 in it.product(splits, splits):
@@ -358,7 +357,7 @@ def test_sample_users_frac_oversize_ndj():
     assert len(splits) == 20
 
     for s in splits:
-        ucounts = s.test.groupby('user').agg('count')
+        ucounts = s.test.groupby("user").agg("count")
         assert len(ucounts) == 100
         assert len(s.test) == 5 * 100
         assert all(ucounts == 5)
@@ -369,7 +368,7 @@ def test_sample_users_frac_oversize_ndj():
 def test_non_unique_index_partition_users():
     """Partitioning users when dataframe has non-unique indices"""
     ratings = lktu.ml_test.ratings
-    ratings = ratings.set_index('user')  ##forces non-unique index
+    ratings = ratings.set_index("user")  ##forces non-unique index
     with pytest.raises(ValueError):
         for split in xf.partition_users(ratings, 5, xf.SampleN(5)):
             pass
@@ -378,7 +377,7 @@ def test_non_unique_index_partition_users():
 def test_sample_users():
     """Sampling users when dataframe has non-unique indices"""
     ratings = lktu.ml_test.ratings
-    ratings = ratings.set_index('user')  ##forces non-unique index
+    ratings = ratings.set_index("user")  ##forces non-unique index
     with pytest.raises(ValueError):
         for split in xf.sample_users(ratings, 5, 100, xf.SampleN(5)):
             pass
@@ -387,7 +386,7 @@ def test_sample_users():
 def test_sample_rows():
     """Sampling ratings when dataframe has non-unique indices"""
     ratings = lktu.ml_test.ratings
-    ratings = ratings.set_index('user')  ##forces non-unique index
+    ratings = ratings.set_index("user")  ##forces non-unique index
     with pytest.raises(ValueError):
         for split in xf.sample_rows(ratings, partitions=5, size=1000):
             pass
@@ -396,7 +395,7 @@ def test_sample_rows():
 def test_partition_users():
     """Partitioning ratings when dataframe has non-unique indices"""
     ratings = lktu.ml_test.ratings
-    ratings = ratings.set_index('user')  ##forces non-unique index
+    ratings = ratings.set_index("user")  ##forces non-unique index
     with pytest.raises(ValueError):
         for split in xf.partition_users(ratings, 5, xf.SampleN(5)):
             pass
diff --git a/tests/test_fallback.py b/tests/test_fallback.py
index ba958bf43..b2644b866 100644
--- a/tests/test_fallback.py
+++ b/tests/test_fallback.py
@@ -9,9 +9,9 @@
 import lenskit.util.test as lktu
 from pytest import approx
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13],
-                          'rating': [4.0, 3.0, 5.0, 2.0]})
+simple_df = pd.DataFrame(
+    {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]}
+)
 
 
 def test_fallback_train_one():
@@ -42,15 +42,15 @@ def test_fallback_list():
     assert len(algo.algorithms) == 2
 
     params = algo.get_params()
-    assert list(params.keys()) == ['algorithms']
-    assert len(params['algorithms']) == 2
-    assert isinstance(params['algorithms'][0], basic.Memorized)
-    assert isinstance(params['algorithms'][1], Bias)
+    assert list(params.keys()) == ["algorithms"]
+    assert len(params["algorithms"]) == 2
+    assert isinstance(params["algorithms"][0], basic.Memorized)
+    assert isinstance(params["algorithms"][1], Bias)
 
 
 def test_fallback_string():
     algo = basic.Fallback([basic.Memorized(simple_df), Bias()])
-    assert 'Fallback' in str(algo)
+    assert "Fallback" in str(algo)
 
 
 def test_fallback_clone():
@@ -110,7 +110,7 @@ def test_fallback_save_load(tmp_path):
     original = basic.Fallback(basic.Memorized(simple_df), Bias())
     original.fit(lktu.ml_test.ratings)
 
-    fn = tmp_path / 'fb.mod'
+    fn = tmp_path / "fb.mod"
 
     binpickle.dump(original, fn)
 
diff --git a/tests/test_funksvd.py b/tests/test_funksvd.py
index 149ed1f2d..ffcc2ac7f 100644
--- a/tests/test_funksvd.py
+++ b/tests/test_funksvd.py
@@ -13,9 +13,9 @@
 
 _log = logging.getLogger(__name__)
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13],
-                          'rating': [4.0, 3.0, 5.0, 2.0]})
+simple_df = pd.DataFrame(
+    {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]}
+)
 
 
 def test_fsvd_basic_build():
@@ -136,7 +136,7 @@ def test_fsvd_save_load():
     assert original.user_features_.shape == (ratings.user.nunique(), 20)
 
     mod = pickle.dumps(original)
-    _log.info('serialized to %d bytes', len(mod))
+    _log.info("serialized to %d bytes", len(mod))
     algo = pickle.loads(mod)
 
     assert algo.bias.mean_ == original.bias.mean_
@@ -151,7 +151,7 @@ def test_fsvd_save_load():
 @lktu.wantjit
 @mark.slow
 def test_fsvd_train_binary():
-    ratings = lktu.ml_test.ratings.drop(columns=['rating', 'timestamp'])
+    ratings = lktu.ml_test.ratings.drop(columns=["rating", "timestamp"])
 
     original = svd.FunkSVD(20, iterations=20, bias=False)
     original.fit(ratings)
@@ -165,19 +165,19 @@ def test_fsvd_train_binary():
 @mark.slow
 def test_fsvd_known_preds():
     algo = svd.FunkSVD(15, iterations=125, lrate=0.001)
-    _log.info('training %s on ml data', algo)
+    _log.info("training %s on ml data", algo)
     algo.fit(lktu.ml_test.ratings)
 
     dir = Path(__file__).parent
-    pred_file = dir / 'funksvd-preds.csv'
-    _log.info('reading known predictions from %s', pred_file)
+    pred_file = dir / "funksvd-preds.csv"
+    _log.info("reading known predictions from %s", pred_file)
     known_preds = pd.read_csv(str(pred_file))
-    pairs = known_preds.loc[:, ['user', 'item']]
+    pairs = known_preds.loc[:, ["user", "item"]]
 
     preds = algo.predict(pairs)
-    known_preds.rename(columns={'prediction': 'expected'}, inplace=True)
+    known_preds.rename(columns={"prediction": "expected"}, inplace=True)
     merged = known_preds.assign(prediction=preds)
-    merged['error'] = merged.expected - merged.prediction
+    merged["error"] = merged.expected - merged.prediction
     assert not any(merged.prediction.isna() & merged.expected.notna())
     err = merged.error
     err = err[err.notna()]
@@ -185,14 +185,14 @@ def test_fsvd_known_preds():
         assert all(err.abs() < 0.01)
     except AssertionError as e:
         bad = merged[merged.error.notna() & (merged.error.abs() >= 0.01)]
-        _log.error('erroneous predictions:\n%s', bad)
+        _log.error("erroneous predictions:\n%s", bad)
         raise e
 
 
 @lktu.wantjit
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present')
+@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
 def test_fsvd_batch_accuracy():
     from lenskit.algorithms import basic
     from lenskit.algorithms import bias
@@ -206,9 +206,9 @@ def test_fsvd_batch_accuracy():
     algo = basic.Fallback(svd_algo, bias.Bias(damping=10))
 
     def eval(train, test):
-        _log.info('running training')
+        _log.info("running training")
         algo.fit(train)
-        _log.info('testing %d users', test.user.nunique())
+        _log.info("testing %d users", test.user.nunique())
         return batch.predict(algo, test)
 
     folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
@@ -216,5 +216,5 @@ def eval(train, test):
     mae = pm.mae(preds.prediction, preds.rating)
     assert mae == approx(0.74, abs=0.025)
 
-    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
+    user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating))
     assert user_rmse.mean() == approx(0.92, abs=0.05)
diff --git a/tests/test_knn_item_item.py b/tests/test_knn_item_item.py
index 0518b1cb2..e4a3f6097 100644
--- a/tests/test_knn_item_item.py
+++ b/tests/test_knn_item_item.py
@@ -28,59 +28,62 @@
 _log = logging.getLogger(__name__)
 
 ml_ratings = lktu.ml_test.ratings
-simple_ratings = pd.DataFrame.from_records([
-    (1, 6, 4.0),
-    (2, 6, 2.0),
-    (1, 7, 3.0),
-    (2, 7, 2.0),
-    (3, 7, 5.0),
-    (4, 7, 2.0),
-    (1, 8, 3.0),
-    (2, 8, 4.0),
-    (3, 8, 3.0),
-    (4, 8, 2.0),
-    (5, 8, 3.0),
-    (6, 8, 2.0),
-    (1, 9, 3.0),
-    (3, 9, 4.0)
-], columns=['user', 'item', 'rating'])
-
-
-@fixture(scope='module')
+simple_ratings = pd.DataFrame.from_records(
+    [
+        (1, 6, 4.0),
+        (2, 6, 2.0),
+        (1, 7, 3.0),
+        (2, 7, 2.0),
+        (3, 7, 5.0),
+        (4, 7, 2.0),
+        (1, 8, 3.0),
+        (2, 8, 4.0),
+        (3, 8, 3.0),
+        (4, 8, 2.0),
+        (5, 8, 3.0),
+        (6, 8, 2.0),
+        (1, 9, 3.0),
+        (3, 9, 4.0),
+    ],
+    columns=["user", "item", "rating"],
+)
+
+
+@fixture(scope="module")
 def ml_subset():
     "Fixture that returns a subset of the MovieLens database."
     ratings = lktu.ml_test.ratings
-    icounts = ratings.groupby('item').rating.count()
+    icounts = ratings.groupby("item").rating.count()
     top = icounts.nlargest(500)
-    ratings = ratings.set_index('item')
+    ratings = ratings.set_index("item")
     top_rates = ratings.loc[top.index, :]
-    _log.info('top 500 items yield %d of %d ratings', len(top_rates), len(ratings))
+    _log.info("top 500 items yield %d of %d ratings", len(top_rates), len(ratings))
     return top_rates.reset_index()
 
 
 def test_ii_dft_config():
     algo = knn.ItemItem(30, save_nbrs=500)
     assert algo.center
-    assert algo.aggregate == 'weighted-average'
+    assert algo.aggregate == "weighted-average"
     assert algo.use_ratings
 
 
 def test_ii_exp_config():
-    algo = knn.ItemItem(30, save_nbrs=500, feedback='explicit')
+    algo = knn.ItemItem(30, save_nbrs=500, feedback="explicit")
     assert algo.center
-    assert algo.aggregate == 'weighted-average'
+    assert algo.aggregate == "weighted-average"
     assert algo.use_ratings
 
 
 def test_ii_imp_config():
-    algo = knn.ItemItem(30, save_nbrs=500, feedback='implicit')
+    algo = knn.ItemItem(30, save_nbrs=500, feedback="implicit")
     assert not algo.center
-    assert algo.aggregate == 'sum'
+    assert algo.aggregate == "sum"
     assert not algo.use_ratings
 
 
 def test_ii_imp_clone():
-    algo = knn.ItemItem(30, save_nbrs=500, feedback='implicit')
+    algo = knn.ItemItem(30, save_nbrs=500, feedback="implicit")
     a2 = clone(algo)
 
     assert a2.get_params() == algo.get_params()
@@ -98,17 +101,17 @@ def test_ii_train():
 
     # 6 is a neighbor of 7
     six, seven = algo.item_index_.get_indexer([6, 7])
-    _log.info('six: %d', six)
-    _log.info('seven: %d', seven)
-    _log.info('matrix: %s', algo.sim_matrix_)
+    _log.info("six: %d", six)
+    _log.info("seven: %d", seven)
+    _log.info("matrix: %s", algo.sim_matrix_)
     assert matrix[six, seven] > 0
     # and has the correct score
-    six_v = simple_ratings[simple_ratings.item == 6].set_index('user').rating
+    six_v = simple_ratings[simple_ratings.item == 6].set_index("user").rating
     six_v = six_v - six_v.mean()
-    seven_v = simple_ratings[simple_ratings.item == 7].set_index('user').rating
+    seven_v = simple_ratings[simple_ratings.item == 7].set_index("user").rating
     seven_v = seven_v - seven_v.mean()
     denom = la.norm(six_v.values) * la.norm(seven_v.values)
-    six_v, seven_v = six_v.align(seven_v, join='inner')
+    six_v, seven_v = six_v.align(seven_v, join="inner")
     num = six_v.dot(seven_v)
     assert matrix[six, seven] == approx(num / denom, 0.01)
 
@@ -133,12 +136,12 @@ def test_ii_train_unbounded():
     assert matrix[six, seven] > 0
 
     # and has the correct score
-    six_v = simple_ratings[simple_ratings.item == 6].set_index('user').rating
+    six_v = simple_ratings[simple_ratings.item == 6].set_index("user").rating
     six_v = six_v - six_v.mean()
-    seven_v = simple_ratings[simple_ratings.item == 7].set_index('user').rating
+    seven_v = simple_ratings[simple_ratings.item == 7].set_index("user").rating
     seven_v = seven_v - seven_v.mean()
     denom = la.norm(six_v.values) * la.norm(seven_v.values)
-    six_v, seven_v = six_v.align(seven_v, join='inner')
+    six_v, seven_v = six_v.align(seven_v, join="inner")
     num = six_v.dot(seven_v)
     assert matrix[six, seven] == approx(num / denom, 0.01)
 
@@ -155,8 +158,8 @@ def test_ii_simple_predict():
 
 
 def test_ii_simple_implicit_predict():
-    algo = knn.ItemItem(30, center=False, aggregate='sum')
-    algo.fit(simple_ratings.loc[:, ['user', 'item']])
+    algo = knn.ItemItem(30, center=False, aggregate="sum")
+    algo.fit(simple_ratings.loc[:, ["user", "item"]])
 
     res = algo.predict_for_user(3, [6])
     assert res is not None
@@ -168,9 +171,7 @@ def test_ii_simple_implicit_predict():
 
 @mark.skip("currently broken")
 def test_ii_warn_duplicates():
-    extra = pd.DataFrame.from_records([
-        (3, 7, 4.5)
-    ], columns=['user', 'item', 'rating'])
+    extra = pd.DataFrame.from_records([(3, 7, 4.5)], columns=["user", "item", "rating"])
     ratings = pd.concat([simple_ratings, extra])
     algo = knn.ItemItem(5)
     algo.fit(ratings)
@@ -193,7 +194,7 @@ def test_ii_warns_center():
 def test_ii_warns_center_with_no_use_ratings():
     "Test that item-item warns if you configure to ignore ratings but center."
     with pytest.warns(ConfigWarning):
-        knn.ItemItem(5, use_ratings=False, aggregate='sum')
+        knn.ItemItem(5, use_ratings=False, aggregate="sum")
 
 
 def test_ii_warns_wa_with_no_use_ratings():
@@ -216,7 +217,7 @@ def test_ii_train_big():
 
     assert algo.item_counts_.sum() == algo.sim_matrix_.nnz
 
-    means = ml_ratings.groupby('item').rating.mean()
+    means = ml_ratings.groupby("item").rating.mean()
     assert means[algo.item_index_].values == approx(algo.item_means_)
 
 
@@ -234,20 +235,20 @@ def test_ii_train_big_unbounded():
 
     assert algo.item_counts_.sum() == algo.sim_matrix_.nnz
 
-    means = ml_ratings.groupby('item').rating.mean()
+    means = ml_ratings.groupby("item").rating.mean()
     assert means[algo.item_index_].values == approx(algo.item_means_)
 
 
 @lktu.wantjit
-@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present')
+@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
 def test_ii_train_ml100k(tmp_path):
     "Test an unbounded model on ML-100K"
     ratings = lktu.ml100k.ratings
     algo = knn.ItemItem(30)
-    _log.info('training model')
+    _log.info("training model")
     algo.fit(ratings)
 
-    _log.info('testing model')
+    _log.info("testing model")
 
     assert all(np.logical_not(np.isnan(algo.sim_matrix_.values)))
     assert all(algo.sim_matrix_.values > 0)
@@ -257,17 +258,17 @@ def test_ii_train_ml100k(tmp_path):
 
     assert algo.item_counts_.sum() == algo.sim_matrix_.nnz
 
-    means = ratings.groupby('item').rating.mean()
+    means = ratings.groupby("item").rating.mean()
     assert means[algo.item_index_].values == approx(algo.item_means_)
 
     # save
-    fn = tmp_path / 'ii.mod'
-    _log.info('saving model to %s', fn)
-    with fn.open('wb') as modf:
+    fn = tmp_path / "ii.mod"
+    _log.info("saving model to %s", fn)
+    with fn.open("wb") as modf:
         pickle.dump(algo, modf)
 
-    _log.info('reloading model')
-    with fn.open('rb') as modf:
+    _log.info("reloading model")
+    with fn.open("rb") as modf:
         restored = pickle.load(modf)
 
     assert all(restored.sim_matrix_.values > 0)
@@ -290,22 +291,22 @@ def test_ii_train_ml100k(tmp_path):
 @mark.slow
 def test_ii_large_models():
     "Several tests of large trained I-I models"
-    _log.info('training limited model')
+    _log.info("training limited model")
     MODEL_SIZE = 100
     algo_lim = knn.ItemItem(30, save_nbrs=MODEL_SIZE)
     algo_lim.fit(ml_ratings)
 
-    _log.info('training unbounded model')
+    _log.info("training unbounded model")
     algo_ub = knn.ItemItem(30)
     algo_ub.fit(ml_ratings)
 
-    _log.info('testing models')
+    _log.info("testing models")
     assert all(np.logical_not(np.isnan(algo_lim.sim_matrix_.values)))
     assert all(algo_lim.sim_matrix_.values > 0)
     # a little tolerance
     assert all(algo_lim.sim_matrix_.values < 1 + 1.0e-6)
 
-    means = ml_ratings.groupby('item').rating.mean()
+    means = ml_ratings.groupby("item").rating.mean()
     assert means[algo_lim.item_index_].values == approx(algo_lim.item_means_)
 
     assert all(np.logical_not(np.isnan(algo_ub.sim_matrix_.values)))
@@ -313,24 +314,26 @@ def test_ii_large_models():
     # a little tolerance
     assert all(algo_ub.sim_matrix_.values < 1 + 1.0e-6)
 
-    means = ml_ratings.groupby('item').rating.mean()
+    means = ml_ratings.groupby("item").rating.mean()
     assert means[algo_ub.item_index_].values == approx(algo_ub.item_means_)
 
-    mc_rates = ml_ratings.set_index('item')\
-                         .join(pd.DataFrame({'item_mean': means}))\
-                         .assign(rating=lambda df: df.rating - df.item_mean)
+    mc_rates = (
+        ml_ratings.set_index("item")
+        .join(pd.DataFrame({"item_mean": means}))
+        .assign(rating=lambda df: df.rating - df.item_mean)
+    )
 
     mat_lim = algo_lim.sim_matrix_.to_scipy()
     mat_ub = algo_ub.sim_matrix_.to_scipy()
 
-    _log.info('checking a sample of neighborhoods')
+    _log.info("checking a sample of neighborhoods")
     items = pd.Series(algo_ub.item_index_)
     items = items[algo_ub.item_counts_ > 0]
     for i in items.sample(50):
         ipos = algo_ub.item_index_.get_loc(i)
-        _log.debug('checking item %d at position %d', i, ipos)
+        _log.debug("checking item %d at position %d", i, ipos)
         assert ipos == algo_lim.item_index_.get_loc(i)
-        irates = mc_rates.loc[[i], :].set_index('user').rating
+        irates = mc_rates.loc[[i], :].set_index("user").rating
 
         ub_row = mat_ub.getrow(ipos)
         b_row = mat_lim.getrow(ipos)
@@ -345,14 +348,14 @@ def test_ii_large_models():
         # spot-check some similarities
         for n in pd.Series(ub_row.indices).sample(min(10, len(ub_row.indices))):
             n_id = algo_ub.item_index_[n]
-            n_rates = mc_rates.loc[n_id, :].set_index('user').rating
+            n_rates = mc_rates.loc[n_id, :].set_index("user").rating
             ir, nr = irates.align(n_rates, fill_value=0)
             cor = ir.corr(nr)
             assert mat_ub[ipos, n] == approx(cor)
 
         # short rows are equal
         if b_row.nnz < MODEL_SIZE:
-            _log.debug('short row of length %d', b_row.nnz)
+            _log.debug("short row of length %d", b_row.nnz)
             assert b_row.nnz == ub_row.nnz
             ub_row.sort_indices()
             b_row.sort_indices()
@@ -367,7 +370,7 @@ def test_ii_large_models():
         assert len(b_nbrs) <= MODEL_SIZE
         assert all(b_nbrs.index.isin(ub_nbrs.index))
         # the similarities should be equal!
-        b_match, ub_match = b_nbrs.align(ub_nbrs, join='inner')
+        b_match, ub_match = b_nbrs.align(ub_nbrs, join="inner")
         assert all(b_match == b_nbrs)
         assert b_match.values == approx(ub_match.values)
         assert b_nbrs.max() == approx(ub_nbrs.max())
@@ -385,19 +388,19 @@ def test_ii_large_models():
 def test_ii_save_load(tmp_path, ml_subset):
     "Save and load a model"
     original = knn.ItemItem(30, save_nbrs=500)
-    _log.info('building model')
+    _log.info("building model")
     original.fit(ml_subset)
 
-    fn = tmp_path / 'ii.mod'
-    _log.info('saving model to %s', fn)
-    with fn.open('wb') as modf:
+    fn = tmp_path / "ii.mod"
+    _log.info("saving model to %s", fn)
+    with fn.open("wb") as modf:
         pickle.dump(original, modf)
 
-    _log.info('reloading model')
-    with fn.open('rb') as modf:
+    _log.info("reloading model")
+    with fn.open("rb") as modf:
         algo = pickle.load(modf)
 
-    _log.info('checking model')
+    _log.info("checking model")
     assert all(np.logical_not(np.isnan(algo.sim_matrix_.values)))
     assert all(algo.sim_matrix_.values > 0)
     # a little tolerance
@@ -421,7 +424,7 @@ def test_ii_save_load(tmp_path, ml_subset):
         assert all(np.diff(r_mat.values[sp:ep]) <= 0)
         assert all(r_mat.values[sp:ep] == o_mat.values[sp:ep])
 
-    means = ml_ratings.groupby('item').rating.mean()
+    means = ml_ratings.groupby("item").rating.mean()
     assert means[algo.item_index_].values == approx(original.item_means_)
 
     matrix = algo.sim_matrix_.to_scipy()
@@ -430,7 +433,7 @@ def test_ii_save_load(tmp_path, ml_subset):
     items = items[algo.item_counts_ > 0]
     for i in items.sample(50):
         ipos = algo.item_index_.get_loc(i)
-        _log.debug('checking item %d at position %d', i, ipos)
+        _log.debug("checking item %d at position %d", i, ipos)
 
         row = matrix.getrow(ipos)
 
@@ -441,20 +444,20 @@ def test_ii_save_load(tmp_path, ml_subset):
 
 def test_ii_implicit_save_load(tmp_path, ml_subset):
     "Save and load a model"
-    original = knn.ItemItem(30, save_nbrs=500, center=False, aggregate='sum')
-    _log.info('building model')
-    original.fit(ml_subset.loc[:, ['user', 'item']])
+    original = knn.ItemItem(30, save_nbrs=500, center=False, aggregate="sum")
+    _log.info("building model")
+    original.fit(ml_subset.loc[:, ["user", "item"]])
 
-    fn = tmp_path / 'ii.mod'
-    _log.info('saving model to %s', fn)
-    with fn.open('wb') as modf:
+    fn = tmp_path / "ii.mod"
+    _log.info("saving model to %s", fn)
+    with fn.open("wb") as modf:
         pickle.dump(original, modf)
 
-    _log.info('reloading model')
-    with fn.open('rb') as modf:
+    _log.info("reloading model")
+    with fn.open("rb") as modf:
         algo = pickle.load(modf)
 
-    _log.info('checking model')
+    _log.info("checking model")
     assert all(np.logical_not(np.isnan(algo.sim_matrix_.values)))
     assert all(algo.sim_matrix_.values > 0)
     # a little tolerance
@@ -487,7 +490,7 @@ def test_ii_implicit_save_load(tmp_path, ml_subset):
     items = items[algo.item_counts_ > 0]
     for i in items.sample(50):
         ipos = algo.item_index_.get_loc(i)
-        _log.debug('checking item %d at position %d', i, ipos)
+        _log.debug("checking item %d at position %d", i, ipos)
 
         row = matrix.getrow(ipos)
 
@@ -499,8 +502,8 @@ def test_ii_implicit_save_load(tmp_path, ml_subset):
 @lktu.wantjit
 @mark.slow
 def test_ii_old_implicit():
-    algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum')
-    data = ml_ratings.loc[:, ['user', 'item']]
+    algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate="sum")
+    data = ml_ratings.loc[:, ["user", "item"]]
 
     algo.fit(data)
     assert algo.item_counts_.sum() == algo.sim_matrix_.nnz
@@ -514,10 +517,10 @@ def test_ii_old_implicit():
 @lktu.wantjit
 @mark.slow
 def test_ii_no_ratings():
-    a1 = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum')
-    a1.fit(ml_ratings.loc[:, ['user', 'item']])
+    a1 = knn.ItemItem(20, save_nbrs=100, center=False, aggregate="sum")
+    a1.fit(ml_ratings.loc[:, ["user", "item"]])
 
-    algo = knn.ItemItem(20, save_nbrs=100, feedback='implicit')
+    algo = knn.ItemItem(20, save_nbrs=100, feedback="implicit")
 
     algo.fit(ml_ratings)
     assert algo.item_counts_.sum() == algo.sim_matrix_.nnz
@@ -533,8 +536,8 @@ def test_ii_no_ratings():
 
 @mark.slow
 def test_ii_implicit_fast_ident():
-    algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate='sum')
-    data = ml_ratings.loc[:, ['user', 'item']]
+    algo = knn.ItemItem(20, save_nbrs=100, center=False, aggregate="sum")
+    data = ml_ratings.loc[:, ["user", "item"]]
 
     algo.fit(data)
     assert algo.item_counts_.sum() == algo.sim_matrix_.nnz
@@ -553,7 +556,7 @@ def test_ii_implicit_fast_ident():
 
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present')
+@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
 def test_ii_batch_accuracy():
     from lenskit.algorithms import basic
     from lenskit.algorithms import bias
@@ -567,18 +570,18 @@ def test_ii_batch_accuracy():
     algo = basic.Fallback(ii_algo, bias.Bias())
 
     def eval(train, test):
-        _log.info('running training')
+        _log.info("running training")
         algo.fit(train)
-        _log.info('testing %d users', test.user.nunique())
+        _log.info("testing %d users", test.user.nunique())
         return batch.predict(algo, test, n_jobs=4)
 
-    preds = pd.concat((eval(train, test)
-                       for (train, test)
-                       in xf.partition_users(ratings, 5, xf.SampleFrac(0.2))))
+    preds = pd.concat(
+        (eval(train, test) for (train, test) in xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))
+    )
     mae = pm.mae(preds.prediction, preds.rating)
     assert mae == approx(0.70, abs=0.025)
 
-    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
+    user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating))
     assert user_rmse.mean() == approx(0.90, abs=0.05)
 
 
@@ -588,27 +591,27 @@ def test_ii_known_preds():
     from lenskit import batch
 
     algo = knn.ItemItem(20, min_sim=1.0e-6)
-    _log.info('training %s on ml data', algo)
+    _log.info("training %s on ml data", algo)
     algo.fit(lktu.ml_test.ratings)
     assert algo.center
     assert algo.item_means_ is not None
-    _log.info('model means: %s', algo.item_means_)
+    _log.info("model means: %s", algo.item_means_)
 
     dir = Path(__file__).parent
-    pred_file = dir / 'item-item-preds.csv'
-    _log.info('reading known predictions from %s', pred_file)
+    pred_file = dir / "item-item-preds.csv"
+    _log.info("reading known predictions from %s", pred_file)
     known_preds = pd.read_csv(str(pred_file))
-    pairs = known_preds.loc[:, ['user', 'item']]
+    pairs = known_preds.loc[:, ["user", "item"]]
 
     preds = batch.predict(algo, pairs)
-    merged = pd.merge(known_preds.rename(columns={'prediction': 'expected'}), preds)
+    merged = pd.merge(known_preds.rename(columns={"prediction": "expected"}), preds)
     assert len(merged) == len(preds)
-    merged['error'] = merged.expected - merged.prediction
+    merged["error"] = merged.expected - merged.prediction
     try:
         assert not any(merged.prediction.isna() & merged.expected.notna())
     except AssertionError as e:
         bad = merged[merged.prediction.isna() & merged.expected.notna()]
-        _log.error('erroneously missing or present predictions:\n%s', bad)
+        _log.error("erroneously missing or present predictions:\n%s", bad)
         raise e
 
     err = merged.error
@@ -617,33 +620,33 @@ def test_ii_known_preds():
         assert all(err.abs() < 0.03)  # FIXME this threshold is too high
     except AssertionError as e:
         bad = merged[merged.error.notna() & (merged.error.abs() >= 0.01)]
-        _log.error('erroneous predictions:\n%s', bad)
+        _log.error("erroneous predictions:\n%s", bad)
         raise e
 
 
 def _train_ii():
     algo = knn.ItemItem(20, min_sim=1.0e-6)
     timer = Stopwatch()
-    _log.info('training %s on ml data', algo)
+    _log.info("training %s on ml data", algo)
     algo.fit(lktu.ml_test.ratings)
-    _log.info('trained in %s', timer)
+    _log.info("trained in %s", timer)
     shr = persist(algo)
     return shr.transfer()
 
 
 @lktu.wantjit
 @mark.slow
-@mark.skip('no longer testing II match')
-@mark.skipif(csrk.name != 'csr.kernels.mkl', reason='only needed when MKL is available')
+@mark.skip("no longer testing II match")
+@mark.skipif(csrk.name != "csr.kernels.mkl", reason="only needed when MKL is available")
 def test_ii_impl_match():
     mkl_h = None
     nba_h = None
     try:
-        with lktu.set_env_var('CSR_KERNEL', 'mkl'):
+        with lktu.set_env_var("CSR_KERNEL", "mkl"):
             mkl_h = run_sp(_train_ii)
         mkl = mkl_h.get()
 
-        with lktu.set_env_var('CSR_KERNEL', 'numba'):
+        with lktu.set_env_var("CSR_KERNEL", "numba"):
             nba_h = run_sp(_train_ii)
         nba = nba_h.get()
 
@@ -657,8 +660,9 @@ def test_ii_impl_match():
             assert all(np.diff(mkl.sim_matrix_.values[sp:ep]) <= 0)
             assert all(np.diff(nba.sim_matrix_.values[sp:ep]) <= 0)
             assert set(mkl.sim_matrix_.colinds[sp:ep]) == set(nba.sim_matrix_.colinds[sp:ep])
-            assert mkl.sim_matrix_.values[sp:ep] == \
-                approx(nba.sim_matrix_.values[sp:ep], abs=1.0e-3)
+            assert mkl.sim_matrix_.values[sp:ep] == approx(
+                nba.sim_matrix_.values[sp:ep], abs=1.0e-3
+            )
 
     finally:
         mkl = None
@@ -671,8 +675,8 @@ def test_ii_impl_match():
 @lktu.wantjit
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason='ML100K not available')
-@mark.parametrize('ncpus', [1, 2])
+@mark.skipif(not lktu.ml100k.available, reason="ML100K not available")
+@mark.parametrize("ncpus", [1, 2])
 def test_ii_batch_recommend(ncpus):
     import lenskit.crossfold as xf
     from lenskit import topn
@@ -680,11 +684,11 @@ def test_ii_batch_recommend(ncpus):
     ratings = lktu.ml100k.ratings
 
     def eval(train, test):
-        _log.info('running training')
+        _log.info("running training")
         algo = knn.ItemItem(30)
         algo = Recommender.adapt(algo)
         algo.fit(train)
-        _log.info('testing %d users', test.user.nunique())
+        _log.info("testing %d users", test.user.nunique())
         recs = batch.recommend(algo, test.user.unique(), 100, n_jobs=ncpus)
         return recs
 
@@ -697,21 +701,21 @@ def eval(train, test):
     test = pd.concat(test_frames)
     recs = pd.concat(recs)
 
-    _log.info('analyzing recommendations')
+    _log.info("analyzing recommendations")
     rla = topn.RecListAnalysis()
     rla.add_metric(topn.ndcg)
     results = rla.compute(recs, test)
     dcg = results.ndcg
-    _log.info('nDCG for %d users is %f', len(dcg), dcg.mean())
+    _log.info("nDCG for %d users is %f", len(dcg), dcg.mean())
     assert dcg.mean() > 0.03
 
 
 def _build_predict(ratings, fold):
     algo = Fallback(knn.ItemItem(20), Bias(5))
-    train = ratings[ratings['partition'] != fold]
+    train = ratings[ratings["partition"] != fold]
     algo.fit(train)
 
-    test = ratings[ratings['partition'] == fold]
+    test = ratings[ratings["partition"] == fold]
     preds = batch.predict(algo, test, n_jobs=1)
     return preds
 
@@ -721,7 +725,7 @@ def _build_predict(ratings, fold):
 def test_ii_parallel_multi_build():
     "Build multiple item-item models in parallel"
     ratings = lktu.ml_test.ratings
-    ratings['partition'] = np.random.choice(4, len(ratings), replace=True)
+    ratings["partition"] = np.random.choice(4, len(ratings), replace=True)
 
     with invoker(ratings, _build_predict, 2) as inv:
         preds = inv.map(range(4))
diff --git a/tests/test_knn_user_user.py b/tests/test_knn_user_user.py
index 663677091..7fd2c90a3 100644
--- a/tests/test_knn_user_user.py
+++ b/tests/test_knn_user_user.py
@@ -23,28 +23,28 @@ def test_uu_dft_config():
     algo = knn.UserUser(30)
     assert algo.nnbrs == 30
     assert algo.center
-    assert algo.aggregate == 'weighted-average'
+    assert algo.aggregate == "weighted-average"
     assert algo.use_ratings
 
 
 def test_uu_exp_config():
-    algo = knn.UserUser(30, feedback='explicit')
+    algo = knn.UserUser(30, feedback="explicit")
     assert algo.nnbrs == 30
     assert algo.center
-    assert algo.aggregate == 'weighted-average'
+    assert algo.aggregate == "weighted-average"
     assert algo.use_ratings
 
 
 def test_uu_imp_config():
-    algo = knn.UserUser(30, feedback='implicit')
+    algo = knn.UserUser(30, feedback="implicit")
     assert algo.nnbrs == 30
     assert not algo.center
-    assert algo.aggregate == 'sum'
+    assert algo.aggregate == "sum"
     assert not algo.use_ratings
 
 
 def test_uu_imp_clone():
-    algo = knn.UserUser(30, feedback='implicit')
+    algo = knn.UserUser(30, feedback="implicit")
     a2 = clone(algo)
 
     assert a2.get_params() == algo.get_params()
@@ -57,22 +57,24 @@ def test_uu_train():
     assert ret is algo
 
     # it should have computed correct means
-    umeans = ml_ratings.groupby('user').rating.mean()
-    mlmeans = pd.Series(algo.user_means_, index=algo.user_index_, name='mean')
+    umeans = ml_ratings.groupby("user").rating.mean()
+    mlmeans = pd.Series(algo.user_means_, index=algo.user_index_, name="mean")
     umeans, mlmeans = umeans.align(mlmeans)
     assert mlmeans.values == approx(umeans.values)
 
     # we should be able to reconstruct rating values
-    uir = ml_ratings.set_index(['user', 'item']).rating
+    uir = ml_ratings.set_index(["user", "item"]).rating
     r_items = algo.transpose_matrix_.rowinds()
-    ui_rbdf = pd.DataFrame({
-        'user': algo.user_index_[algo.transpose_matrix_.colinds],
-        'item': algo.item_index_[r_items],
-        'nrating': algo.transpose_matrix_.values
-    }).set_index(['user', 'item'])
+    ui_rbdf = pd.DataFrame(
+        {
+            "user": algo.user_index_[algo.transpose_matrix_.colinds],
+            "item": algo.item_index_[r_items],
+            "nrating": algo.transpose_matrix_.values,
+        }
+    ).set_index(["user", "item"])
     ui_rbdf = ui_rbdf.join(mlmeans)
-    ui_rbdf['rating'] = ui_rbdf['nrating'] + ui_rbdf['mean']
-    ui_rbdf['orig_rating'] = uir
+    ui_rbdf["rating"] = ui_rbdf["nrating"] + ui_rbdf["mean"]
+    ui_rbdf["orig_rating"] = uir
     assert ui_rbdf.rating.values == approx(ui_rbdf.orig_rating.values)
 
 
@@ -122,7 +124,7 @@ def test_uu_predict_live_ratings():
     no4 = ml_ratings[ml_ratings.user != 4]
     algo.fit(no4)
 
-    ratings = ml_ratings[ml_ratings.user == 4].set_index('item').rating
+    ratings = ml_ratings[ml_ratings.user == 4].set_index("item").rating
 
     preds = algo.predict_for_user(20381, [1016, 2091], ratings)
     assert len(preds) == 2
@@ -132,37 +134,39 @@ def test_uu_predict_live_ratings():
 
 def test_uu_save_load(tmp_path):
     orig = knn.UserUser(30)
-    _log.info('training model')
+    _log.info("training model")
     orig.fit(ml_ratings)
 
-    fn = tmp_path / 'uu.model'
-    _log.info('saving to %s', fn)
-    with fn.open('wb') as f:
+    fn = tmp_path / "uu.model"
+    _log.info("saving to %s", fn)
+    with fn.open("wb") as f:
         pickle.dump(orig, f)
 
-    _log.info('reloading model')
-    with fn.open('rb') as f:
+    _log.info("reloading model")
+    with fn.open("rb") as f:
         algo = pickle.load(f)
 
-    _log.info('checking model')
+    _log.info("checking model")
 
     # it should have computed correct means
-    umeans = ml_ratings.groupby('user').rating.mean()
-    mlmeans = pd.Series(algo.user_means_, index=algo.user_index_, name='mean')
+    umeans = ml_ratings.groupby("user").rating.mean()
+    mlmeans = pd.Series(algo.user_means_, index=algo.user_index_, name="mean")
     umeans, mlmeans = umeans.align(mlmeans)
     assert mlmeans.values == approx(umeans.values)
 
     # we should be able to reconstruct rating values
-    uir = ml_ratings.set_index(['user', 'item']).rating
+    uir = ml_ratings.set_index(["user", "item"]).rating
     r_items = algo.transpose_matrix_.rowinds()
-    ui_rbdf = pd.DataFrame({
-        'user': algo.user_index_[algo.transpose_matrix_.colinds],
-        'item': algo.item_index_[r_items],
-        'nrating': algo.transpose_matrix_.values
-    }).set_index(['user', 'item'])
+    ui_rbdf = pd.DataFrame(
+        {
+            "user": algo.user_index_[algo.transpose_matrix_.colinds],
+            "item": algo.item_index_[r_items],
+            "nrating": algo.transpose_matrix_.values,
+        }
+    ).set_index(["user", "item"])
     ui_rbdf = ui_rbdf.join(mlmeans)
-    ui_rbdf['rating'] = ui_rbdf['nrating'] + ui_rbdf['mean']
-    ui_rbdf['orig_rating'] = uir
+    ui_rbdf["rating"] = ui_rbdf["nrating"] + ui_rbdf["mean"]
+    ui_rbdf["orig_rating"] = uir
     assert ui_rbdf.rating.values == approx(ui_rbdf.orig_rating.values)
 
     # running the predictor should work
@@ -183,8 +187,8 @@ def test_uu_predict_unknown_empty():
 
 def test_uu_implicit():
     "Train and use user-user on an implicit data set."
-    algo = knn.UserUser(20, feedback='implicit')
-    data = ml_ratings.loc[:, ['user', 'item']]
+    algo = knn.UserUser(20, feedback="implicit")
+    data = ml_ratings.loc[:, ["user", "item"]]
 
     algo.fit(data)
     assert algo.user_means_ is None
@@ -200,8 +204,8 @@ def test_uu_implicit():
 @mark.slow
 def test_uu_save_load_implicit(tmp_path):
     "Save and load user-user on an implicit data set."
-    orig = knn.UserUser(20, feedback='implicit')
-    data = ml_ratings.loc[:, ['user', 'item']]
+    orig = knn.UserUser(20, feedback="implicit")
+    data = ml_ratings.loc[:, ["user", "item"]]
 
     orig.fit(data)
     ser = pickle.dumps(orig)
@@ -226,25 +230,25 @@ def test_uu_known_preds():
     from lenskit import batch
 
     algo = knn.UserUser(30, min_sim=1.0e-6)
-    _log.info('training %s on ml data', algo)
+    _log.info("training %s on ml data", algo)
     algo.fit(lktu.ml_test.ratings)
 
     dir = Path(__file__).parent
-    pred_file = dir / 'user-user-preds.csv'
-    _log.info('reading known predictions from %s', pred_file)
+    pred_file = dir / "user-user-preds.csv"
+    _log.info("reading known predictions from %s", pred_file)
     known_preds = pd.read_csv(str(pred_file))
-    pairs = known_preds.loc[:, ['user', 'item']]
-    _log.info('generating %d known predictions', len(pairs))
+    pairs = known_preds.loc[:, ["user", "item"]]
+    _log.info("generating %d known predictions", len(pairs))
 
     preds = batch.predict(algo, pairs)
-    merged = pd.merge(known_preds.rename(columns={'prediction': 'expected'}), preds)
+    merged = pd.merge(known_preds.rename(columns={"prediction": "expected"}), preds)
     assert len(merged) == len(preds)
-    merged['error'] = merged.expected - merged.prediction
+    merged["error"] = merged.expected - merged.prediction
     try:
         assert not any(merged.prediction.isna() & merged.expected.notna())
     except AssertionError as e:
         bad = merged[merged.prediction.isna() & merged.expected.notna()]
-        _log.error('%d missing predictions:\n%s', len(bad), bad)
+        _log.error("%d missing predictions:\n%s", len(bad), bad)
         raise e
 
     err = merged.error
@@ -253,22 +257,23 @@ def test_uu_known_preds():
         assert all(err.abs() < 0.01)
     except AssertionError as e:
         bad = merged[merged.error.notna() & (merged.error.abs() >= 0.01)]
-        _log.error('%d erroneous predictions:\n%s', len(bad), bad)
+        _log.error("%d erroneous predictions:\n%s", len(bad), bad)
         raise e
 
 
 def __batch_eval(job):
     from lenskit import batch
+
     algo, train, test = job
-    _log.info('running training')
+    _log.info("running training")
     algo.fit(train)
-    _log.info('testing %d users', test.user.nunique())
+    _log.info("testing %d users", test.user.nunique())
     return batch.predict(algo, test)
 
 
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present')
+@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
 def test_uu_batch_accuracy():
     from lenskit.algorithms import basic
     from lenskit.algorithms import bias
@@ -286,30 +291,30 @@ def test_uu_batch_accuracy():
     mae = pm.mae(preds.prediction, preds.rating)
     assert mae == approx(0.71, abs=0.05)
 
-    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
+    user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating))
     assert user_rmse.mean() == approx(0.91, abs=0.055)
 
 
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present')
+@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
 def test_uu_implicit_batch_accuracy():
     from lenskit import batch, topn
     import lenskit.crossfold as xf
 
     ratings = lktu.ml100k.ratings
 
-    algo = knn.UserUser(30, center=False, aggregate='sum')
+    algo = knn.UserUser(30, center=False, aggregate="sum")
 
     folds = list(xf.partition_users(ratings, 5, xf.SampleFrac(0.2)))
     all_test = pd.concat(f.test for f in folds)
 
     rec_lists = []
     for train, test in folds:
-        _log.info('running training')
+        _log.info("running training")
         rec_algo = Recommender.adapt(algo)
-        rec_algo.fit(train.loc[:, ['user', 'item']])
-        _log.info('testing %d users', test.user.nunique())
+        rec_algo.fit(train.loc[:, ["user", "item"]])
+        _log.info("testing %d users", test.user.nunique())
         recs = batch.recommend(rec_algo, test.user.unique(), 100, n_jobs=2)
         rec_lists.append(recs)
     recs = pd.concat(rec_lists)
diff --git a/tests/test_math_solve.py b/tests/test_math_solve.py
index 9568c4da2..cac54caa0 100644
--- a/tests/test_math_solve.py
+++ b/tests/test_math_solve.py
@@ -17,7 +17,7 @@ def square_problem(draw, scale=10):
     size = draw(st.integers(2, 100))
 
     # Hypothesis doesn't do well at generating problem data, so go with this
-    seed = draw(st.integers(min_value=0, max_value=2**32-1))
+    seed = draw(st.integers(min_value=0, max_value=2**32 - 1))
     rng = np.random.RandomState(seed)
     A = rng.randn(size, size) * scale
     b = rng.randn(size) * scale
diff --git a/tests/test_matrix.py b/tests/test_matrix.py
index bef4f71cb..408d7b3b3 100644
--- a/tests/test_matrix.py
+++ b/tests/test_matrix.py
@@ -17,17 +17,17 @@ def test_sparse_matrix(rng):
     assert mat.ncols == ratings.item.nunique()
 
     # user indicators should correspond to user item counts
-    ucounts = ratings.groupby('user').item.count()
+    ucounts = ratings.groupby("user").item.count()
     ucounts = ucounts.loc[uidx].cumsum()
     assert all(mat.rowptrs[1:] == ucounts.values)
 
     # verify rating values
-    ratings = ratings.set_index(['user', 'item'])
+    ratings = ratings.set_index(["user", "item"])
     for u in rng.choice(uidx, size=50):
         ui = uidx.get_loc(u)
         vs = mat.row_vs(ui)
         vs = pd.Series(vs, iidx[mat.row_cs(ui)])
-        rates = ratings.loc[u]['rating']
+        rates = ratings.loc[u]["rating"]
         vs, rates = vs.align(rates)
         assert not any(vs.isna())
         assert not any(rates.isna())
@@ -36,7 +36,7 @@ def test_sparse_matrix(rng):
 
 def test_sparse_matrix_implicit():
     ratings = ml_test.ratings
-    ratings = ratings.loc[:, ['user', 'item']]
+    ratings = ratings.loc[:, ["user", "item"]]
     mat, uidx, iidx = sparse_ratings(ratings)
 
     assert mat.nrows == len(uidx)
@@ -47,11 +47,11 @@ def test_sparse_matrix_implicit():
 
 
 @mark.parametrize(
-    'format, sps_fmt_checker',
+    "format, sps_fmt_checker",
     [
         (True, sps.isspmatrix_csr),
-        ('csr', sps.isspmatrix_csr),
-        ('coo', sps.isspmatrix_coo),
+        ("csr", sps.isspmatrix_csr),
+        ("coo", sps.isspmatrix_coo),
     ],
 )
 def test_sparse_matrix_scipy(format, sps_fmt_checker):
@@ -64,7 +64,7 @@ def test_sparse_matrix_scipy(format, sps_fmt_checker):
     assert len(iidx) == ratings.item.nunique()
 
     # user indicators should correspond to user item counts
-    ucounts = ratings.groupby('user').item.count()
+    ucounts = ratings.groupby("user").item.count()
     ucounts = ucounts.loc[uidx].cumsum()
     if sps.isspmatrix_coo(mat):
         mat = mat.tocsr()
@@ -73,7 +73,7 @@ def test_sparse_matrix_scipy(format, sps_fmt_checker):
 
 def test_sparse_matrix_scipy_implicit():
     ratings = ml_test.ratings
-    ratings = ratings.loc[:, ['user', 'item']]
+    ratings = ratings.loc[:, ["user", "item"]]
     mat, uidx, iidx = sparse_ratings(ratings, scipy=True)
 
     assert sps.issparse(mat)
@@ -86,8 +86,8 @@ def test_sparse_matrix_scipy_implicit():
 
 def test_sparse_matrix_indexes(rng):
     ratings = ml_test.ratings
-    uidx = pd.Index(rng.permutation(ratings['user'].unique()))
-    iidx = pd.Index(rng.permutation(ratings['item'].unique()))
+    uidx = pd.Index(rng.permutation(ratings["user"].unique()))
+    iidx = pd.Index(rng.permutation(ratings["item"].unique()))
 
     mat, _uidx, _iidx = sparse_ratings(ratings, users=uidx, items=iidx)
 
@@ -97,12 +97,12 @@ def test_sparse_matrix_indexes(rng):
     assert len(_iidx) == ratings.item.nunique()
 
     # verify rating values
-    ratings = ratings.set_index(['user', 'item'])
+    ratings = ratings.set_index(["user", "item"])
     for u in rng.choice(_uidx, size=50):
         ui = _uidx.get_loc(u)
         vs = mat.row_vs(ui)
         vs = pd.Series(vs, _iidx[mat.row_cs(ui)])
-        rates = ratings.loc[u]['rating']
+        rates = ratings.loc[u]["rating"]
         vs, rates = vs.align(rates)
         assert not any(vs.isna())
         assert not any(rates.isna())
diff --git a/tests/test_ml20m.py b/tests/test_ml20m.py
index b36df4d50..3b4387eac 100644
--- a/tests/test_ml20m.py
+++ b/tests/test_ml20m.py
@@ -16,6 +16,7 @@
 from lenskit.algorithms.basic import Popular
 from lenskit.algorithms.als import BiasedMF
 from lenskit.algorithms import item_knn as knn
+
 try:
     import lenskit_tf
 except:
@@ -28,7 +29,7 @@
 
 _log = logging.getLogger(__name__)
 
-_ml_path = Path('data/ml-20m')
+_ml_path = Path("data/ml-20m")
 if _ml_path.exists():
     _ml_20m = MovieLens(_ml_path)
 else:
@@ -40,36 +41,36 @@ def ml20m():
     if _ml_20m:
         return _ml_20m.ratings
     else:
-        pytest.skip('ML-20M not available')
+        pytest.skip("ML-20M not available")
 
 
 @pytest.mark.slow
 @pytest.mark.realdata
-@pytest.mark.parametrize('n_jobs', [1, 2])
+@pytest.mark.parametrize("n_jobs", [1, 2])
 def test_pop_recommend(ml20m, rng, n_jobs):
-    users = rng.choice(ml20m['user'].unique(), 10000, replace=False)
+    users = rng.choice(ml20m["user"].unique(), 10000, replace=False)
     algo = Popular()
-    _log.info('training %s', algo)
+    _log.info("training %s", algo)
     algo.fit(ml20m)
-    _log.info('recommending with %s', algo)
+    _log.info("recommending with %s", algo)
     recs = batch.recommend(algo, users, 10, n_jobs=n_jobs)
 
-    assert recs['user'].nunique() == 10000
+    assert recs["user"].nunique() == 10000
 
 
 @pytest.mark.realdata
 @pytest.mark.slow
 def test_als_isolate(ml20m, rng):
-    users = rng.choice(ml20m['user'].unique(), 5000, replace=False)
+    users = rng.choice(ml20m["user"].unique(), 5000, replace=False)
     algo = BiasedMF(20, iterations=10)
     algo = Recommender.adapt(algo)
-    _log.info('training %s', algo)
+    _log.info("training %s", algo)
     ares = batch.train_isolated(algo, ml20m)
     try:
-        _log.info('recommending with %s', algo)
+        _log.info("recommending with %s", algo)
         recs = batch.recommend(ares, users, 10)
-        assert recs['user'].nunique() == 5000
-        _log.info('predicting with %s', algo)
+        assert recs["user"].nunique() == 5000
+        _log.info("predicting with %s", algo)
         pairs = ml20m.sample(1000)
         preds = batch.predict(ares, pairs)
         assert len(preds) == len(pairs)
@@ -80,14 +81,16 @@ def test_als_isolate(ml20m, rng):
 @pytest.mark.realdata
 @pytest.mark.slow
 @pytest.mark.skip
-@pytest.mark.skipif(lenskit_tf is None or not lenskit_tf.TF_AVAILABLE, reason='TensorFlow not available')
+@pytest.mark.skipif(
+    lenskit_tf is None or not lenskit_tf.TF_AVAILABLE, reason="TensorFlow not available"
+)
 def test_tf_isvd(ml20m):
     algo = lenskit_tf.IntegratedBiasMF(20)
 
     def eval(train, test):
-        _log.info('running training')
+        _log.info("running training")
         algo.fit(train)
-        _log.info('testing %d users', test.user.nunique())
+        _log.info("testing %d users", test.user.nunique())
         return batch.predict(algo, test)
 
     folds = xf.sample_users(ml20m, 2, 5000, xf.SampleFrac(0.2))
@@ -95,5 +98,5 @@ def eval(train, test):
     mae = pm.mae(preds.prediction, preds.rating)
     assert mae == approx(0.60, abs=0.025)
 
-    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
+    user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating))
     assert user_rmse.mean() == approx(0.92, abs=0.05)
diff --git a/tests/test_parallel.py b/tests/test_parallel.py
index 654be1588..6bfb3434c 100644
--- a/tests/test_parallel.py
+++ b/tests/test_parallel.py
@@ -19,11 +19,11 @@ def _mul_op(m, v):
 
 
 def _worker_status(blob, *args):
-    _log.info('in worker %s', mp.current_process().name)
+    _log.info("in worker %s", mp.current_process().name)
     return os.getpid(), is_worker(), is_mp_worker()
 
 
-@mark.parametrize('n_jobs', [None, 1, 2, 8])
+@mark.parametrize("n_jobs", [None, 1, 2, 8])
 def test_invoke_matrix(n_jobs):
     matrix = np.random.randn(100, 100)
     vectors = [np.random.randn(100) for i in range(100)]
@@ -35,51 +35,51 @@ def test_invoke_matrix(n_jobs):
 
 
 def test_mp_is_worker():
-    with invoker('foo', _worker_status, 2) as loop:
+    with invoker("foo", _worker_status, 2) as loop:
         res = list(loop.map(range(10)))
         assert all([w for (pid, w, mpw) in res])
         assert all([mpw for (pid, w, mpw) in res])
 
 
 def test_proc_count_default():
-    with set_env_var('LK_NUM_PROCS', None):
+    with set_env_var("LK_NUM_PROCS", None):
         assert proc_count() == mp.cpu_count() // 2
         assert proc_count(level=1) == 2
 
 
 def test_proc_count_no_div():
-    with set_env_var('LK_NUM_PROCS', None):
+    with set_env_var("LK_NUM_PROCS", None):
         assert proc_count(1) == mp.cpu_count()
 
 
 def test_proc_count_env():
-    with set_env_var('LK_NUM_PROCS', '17'):
+    with set_env_var("LK_NUM_PROCS", "17"):
         assert proc_count() == 17
         assert proc_count(level=1) == 1
 
 
 def test_proc_count_max():
-    with set_env_var('LK_NUM_PROCS', None):
+    with set_env_var("LK_NUM_PROCS", None):
         assert proc_count(max_default=1) == 1
 
 
 def test_proc_count_nest_env():
-    with set_env_var('LK_NUM_PROCS', '7,3'):
+    with set_env_var("LK_NUM_PROCS", "7,3"):
         assert proc_count() == 7
         assert proc_count(level=1) == 3
         assert proc_count(level=2) == 1
 
 
 def _sp_matmul(a1, a2, *, fail=False):
-    _log.info('in worker process')
+    _log.info("in worker process")
     if fail:
-        raise RuntimeError('you rang?')
+        raise RuntimeError("you rang?")
     else:
         return a1 @ a2
 
 
 def _sp_matmul_p(a1, a2, *, method=None, fail=False):
-    _log.info('in worker process')
+    _log.info("in worker process")
     return persist(a1 @ a2, method=method).transfer()
 
 
@@ -99,10 +99,10 @@ def test_run_sp_fail():
         run_sp(_sp_matmul, a1, a2, fail=True)
 
 
-@pytest.mark.parametrize('method', [None, 'binpickle', 'shm'])
+@pytest.mark.parametrize("method", [None, "binpickle", "shm"])
 def test_run_sp_persist(method):
-    if method == 'shm' and not SHM_AVAILABLE:
-        pytest.skip('SHM backend not available')
+    if method == "shm" and not SHM_AVAILABLE:
+        pytest.skip("SHM backend not available")
 
     a1 = np.random.randn(100, 100)
     a2 = np.random.randn(100, 100)
@@ -116,7 +116,7 @@ def test_run_sp_persist(method):
 
 
 def test_sp_is_worker():
-    pid, w, mpw = run_sp(_worker_status, 'fishtank')
+    pid, w, mpw = run_sp(_worker_status, "fishtank")
     assert pid != os.getpid()
     assert w
     assert not mpw
@@ -131,4 +131,4 @@ def test_sp_random_seed():
     seed = run_sp(_get_seed)
     # we should spawn a seed for the worker
     assert seed.entropy == init.entropy
-    assert seed.spawn_key == (init.n_children_spawned - 1, )
+    assert seed.spawn_key == (init.n_children_spawned - 1,)
diff --git a/tests/test_popular.py b/tests/test_popular.py
index b7be88cb0..790bd98f5 100644
--- a/tests/test_popular.py
+++ b/tests/test_popular.py
@@ -6,15 +6,15 @@
 
 import lenskit.util.test as lktu
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13],
-                          'rating': [4.0, 3.0, 5.0, 2.0]})
+simple_df = pd.DataFrame(
+    {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]}
+)
 
 
 def test_popular():
     algo = basic.Popular()
     algo.fit(lktu.ml_test.ratings)
-    counts = lktu.ml_test.ratings.groupby('item').user.count()
+    counts = lktu.ml_test.ratings.groupby("item").user.count()
     counts = counts.nlargest(100)
 
     assert algo.item_pop_.max() == counts.max()
@@ -31,7 +31,7 @@ def test_popular():
 def test_popular_excludes_rated():
     algo = basic.Popular()
     algo.fit(lktu.ml_test.ratings)
-    counts = lktu.ml_test.ratings.groupby('item').user.count()
+    counts = lktu.ml_test.ratings.groupby("item").user.count()
     counts = counts.nlargest(100)
 
     recs = algo.recommend(100, 100)
@@ -40,16 +40,16 @@ def test_popular_excludes_rated():
 
     # make sure we didn't recommend anything the user likes
     ratings = lktu.ml_test.ratings
-    urates = ratings.set_index(['user', 'item'])
+    urates = ratings.set_index(["user", "item"])
     urates = urates.loc[100, :]
-    match = recs.join(urates, on='item', how='inner')
+    match = recs.join(urates, on="item", how="inner")
     assert len(match) == 0
 
 
 def test_pop_candidates():
     algo = basic.Popular()
     algo.fit(lktu.ml_test.ratings)
-    counts = lktu.ml_test.ratings.groupby('item').user.count()
+    counts = lktu.ml_test.ratings.groupby("item").user.count()
     items = lktu.ml_test.ratings.item.unique()
 
     assert algo.item_pop_.max() == counts.max()
@@ -75,7 +75,7 @@ def test_pop_save_load():
     mod = pickle.dumps(original)
     algo = pickle.loads(mod)
 
-    counts = lktu.ml_test.ratings.groupby('item').user.count()
+    counts = lktu.ml_test.ratings.groupby("item").user.count()
     counts = counts.nlargest(100)
 
     assert algo.item_pop_.max() == counts.max()
@@ -95,7 +95,7 @@ def test_popscore_quantile(rng):
 
     assert algo.item_scores_.max() == 1.0
 
-    counts = lktu.ml_test.ratings.groupby('item').user.count()
+    counts = lktu.ml_test.ratings.groupby("item").user.count()
     counts = counts.sort_values()
 
     winner = counts.index[-1]
@@ -103,10 +103,10 @@ def test_popscore_quantile(rng):
 
 
 def test_popscore_rank(rng):
-    algo = basic.PopScore('rank')
+    algo = basic.PopScore("rank")
     algo.fit(lktu.ml_test.ratings)
 
-    counts = lktu.ml_test.ratings.groupby('item').user.count()
+    counts = lktu.ml_test.ratings.groupby("item").user.count()
     counts = counts.sort_values()
 
     assert algo.item_scores_.max() == len(counts)
@@ -116,10 +116,10 @@ def test_popscore_rank(rng):
 
 
 def test_popscore_counts(rng):
-    algo = basic.PopScore('count')
+    algo = basic.PopScore("count")
     algo.fit(lktu.ml_test.ratings)
 
-    counts = lktu.ml_test.ratings.groupby('item').user.count()
+    counts = lktu.ml_test.ratings.groupby("item").user.count()
 
     scores, counts = algo.item_scores_.align(counts)
     assert all(scores == counts)
diff --git a/tests/test_predict_metrics.py b/tests/test_predict_metrics.py
index f1797e6e7..461f250bc 100644
--- a/tests/test_predict_metrics.py
+++ b/tests/test_predict_metrics.py
@@ -9,44 +9,44 @@
 
 
 def test_check_missing_empty():
-    pm._check_missing(pd.Series([], dtype='float64'), 'error')
+    pm._check_missing(pd.Series([], dtype="float64"), "error")
     # should pass
     assert True
 
 
 def test_check_missing_has_values():
-    pm._check_missing(pd.Series([1, 3, 2]), 'error')
+    pm._check_missing(pd.Series([1, 3, 2]), "error")
     # should pass
     assert True
 
 
 def test_check_missing_nan_raises():
     with raises(ValueError):
-        pm._check_missing(pd.Series([1, np.nan, 3]), 'error')
+        pm._check_missing(pd.Series([1, np.nan, 3]), "error")
 
 
 def test_check_missing_raises():
-    data = pd.Series([1, 7, 3], ['a', 'b', 'd'])
-    ref = pd.Series([3, 2, 4], ['b', 'c', 'd'])
-    ref, data = ref.align(data, join='left')
+    data = pd.Series([1, 7, 3], ["a", "b", "d"])
+    ref = pd.Series([3, 2, 4], ["b", "c", "d"])
+    ref, data = ref.align(data, join="left")
     with raises(ValueError):
-        pm._check_missing(data, 'error')
+        pm._check_missing(data, "error")
 
 
 def test_check_joined_ok():
-    data = pd.Series([1, 7, 3], ['a', 'b', 'd'])
-    ref = pd.Series([3, 2, 4], ['b', 'c', 'd'])
-    ref, data = ref.align(data, join='inner')
-    pm._check_missing(ref, 'error')
+    data = pd.Series([1, 7, 3], ["a", "b", "d"])
+    ref = pd.Series([3, 2, 4], ["b", "c", "d"])
+    ref, data = ref.align(data, join="inner")
+    pm._check_missing(ref, "error")
     # should get here
     assert True
 
 
 def test_check_missing_ignore():
-    data = pd.Series([1, 7, 3], ['a', 'b', 'd'])
-    ref = pd.Series([3, 2, 4], ['b', 'c', 'd'])
-    ref, data = ref.align(data, join='left')
-    pm._check_missing(data, 'ignore')
+    data = pd.Series([1, 7, 3], ["a", "b", "d"])
+    ref = pd.Series([3, 2, 4], ["b", "c", "d"])
+    ref, data = ref.align(data, join="left")
+    pm._check_missing(data, "ignore")
     # should get here
     assert True
 
@@ -103,18 +103,19 @@ def test_rmse_series_two():
 
 
 def test_rmse_series_subset_axis():
-    rmse = pm.rmse(pd.Series([1, 3], ['a', 'c']), pd.Series([3, 4, 1], ['a', 'b', 'c']))
+    rmse = pm.rmse(pd.Series([1, 3], ["a", "c"]), pd.Series([3, 4, 1], ["a", "b", "c"]))
     assert rmse == approx(2)
 
 
 def test_rmse_series_missing_value_error():
     with raises(ValueError):
-        pm.rmse(pd.Series([1, 3], ['a', 'd']), pd.Series([3, 4, 1], ['a', 'b', 'c']))
+        pm.rmse(pd.Series([1, 3], ["a", "d"]), pd.Series([3, 4, 1], ["a", "b", "c"]))
 
 
 def test_rmse_series_missing_value_ignore():
-    rmse = pm.rmse(pd.Series([1, 3], ['a', 'd']), pd.Series([3, 4, 1], ['a', 'b', 'c']),
-                   missing='ignore')
+    rmse = pm.rmse(
+        pd.Series([1, 3], ["a", "d"]), pd.Series([3, 4, 1], ["a", "b", "c"]), missing="ignore"
+    )
     assert rmse == approx(2)
 
 
@@ -159,7 +160,7 @@ def test_mae_series_two():
 
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present')
+@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
 def test_batch_rmse():
     import lenskit.crossfold as xf
     import lenskit.batch as batch
@@ -171,13 +172,13 @@ def test_batch_rmse():
     def eval(train, test):
         algo.fit(train)
         preds = batch.predict(algo, test)
-        return preds.set_index(['user', 'item'])
+        return preds.set_index(["user", "item"])
 
-    results = pd.concat((eval(train, test)
-                         for (train, test)
-                         in xf.partition_users(ratings, 5, xf.SampleN(5))))
+    results = pd.concat(
+        (eval(train, test) for (train, test) in xf.partition_users(ratings, 5, xf.SampleN(5)))
+    )
 
-    user_rmse = results.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
+    user_rmse = results.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating))
 
     # we should have all users
     users = ratings.user.unique()
@@ -224,9 +225,9 @@ def test_user_metric():
     preds = batch.predict(algo, test)
 
     rmse = pm.user_metric(preds)
-    u_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
+    u_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating))
     assert rmse == approx(u_rmse.mean())
 
     mae = pm.user_metric(preds, metric=pm.mae)
-    u_mae = preds.groupby('user').apply(lambda df: pm.mae(df.prediction, df.rating))
+    u_mae = preds.groupby("user").apply(lambda df: pm.mae(df.prediction, df.rating))
     assert mae == approx(u_mae.mean())
diff --git a/tests/test_rerank.py b/tests/test_rerank.py
index f496fd9b2..d0f7f1988 100644
--- a/tests/test_rerank.py
+++ b/tests/test_rerank.py
@@ -9,10 +9,10 @@
 
 def test_plackett_luce_rec():
     pop = PopScore()
-    algo = PlackettLuce(pop, rng_spec='user')
+    algo = PlackettLuce(pop, rng_spec="user")
     algo.fit(lktu.ml_test.ratings)
 
-    items = lktu.ml_test.ratings['item'].unique()
+    items = lktu.ml_test.ratings["item"].unique()
     nitems = len(items)
 
     recs1 = algo.recommend(2038, 100)
@@ -21,19 +21,19 @@ def test_plackett_luce_rec():
     assert len(recs2) == 100
 
     # we don't get exactly the same set of recs
-    assert set(recs1['item']) != set(recs2['item'])
+    assert set(recs1["item"]) != set(recs2["item"])
 
     recs_all = algo.recommend(2038)
     assert len(recs_all) == nitems
-    assert set(items) == set(recs_all['item'])
+    assert set(items) == set(recs_all["item"])
 
 
 def test_plackett_luce_pred():
     bias = Bias()
-    algo = PlackettLuce(bias, rng_spec='user')
+    algo = PlackettLuce(bias, rng_spec="user")
     algo.fit(lktu.ml_test.ratings)
 
-    items = lktu.ml_test.ratings['item'].unique()
+    items = lktu.ml_test.ratings["item"].unique()
     nitems = len(items)
 
     recs1 = algo.recommend(2038, 100)
@@ -42,8 +42,8 @@ def test_plackett_luce_pred():
     assert len(recs2) == 100
 
     # we don't get exactly the same set of recs
-    assert set(recs1['item']) != set(recs2['item'])
+    assert set(recs1["item"]) != set(recs2["item"])
 
     recs_all = algo.recommend(2038)
     assert len(recs_all) == nitems
-    assert set(items) == set(recs_all['item'])
+    assert set(items) == set(recs_all["item"])
diff --git a/tests/test_sharing.py b/tests/test_sharing.py
index 3ccc206c2..8e1033029 100644
--- a/tests/test_sharing.py
+++ b/tests/test_sharing.py
@@ -33,7 +33,7 @@ def test_persist_bpk():
         share.close()
 
 
-@mark.skipif(not lks.SHM_AVAILABLE, reason='shared_memory not available')
+@mark.skipif(not lks.SHM_AVAILABLE, reason="shared_memory not available")
 def test_persist_shm():
     matrix = np.random.randn(1000, 100)
     share = lks.persist_shm(matrix)
@@ -62,7 +62,7 @@ def test_persist():
 def test_persist_dir(tmp_path):
     "Test persistence with a configured directory"
     matrix = np.random.randn(1000, 100)
-    with lktu.set_env_var('LK_TEMP_DIR', os.fspath(tmp_path)):
+    with lktu.set_env_var("LK_TEMP_DIR", os.fspath(tmp_path)):
         share = lks.persist(matrix)
         assert isinstance(share, lks.BPKPersisted)
 
@@ -79,7 +79,7 @@ def test_persist_method():
     "Test persistence with a specified method"
     matrix = np.random.randn(1000, 100)
 
-    share = lks.persist(matrix, method='binpickle')
+    share = lks.persist(matrix, method="binpickle")
     assert isinstance(share, lks.BPKPersisted)
 
     try:
diff --git a/tests/test_svd.py b/tests/test_svd.py
index a62310ebb..ee4d7653b 100644
--- a/tests/test_svd.py
+++ b/tests/test_svd.py
@@ -13,11 +13,11 @@
 
 _log = logging.getLogger(__name__)
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13],
-                          'rating': [4.0, 3.0, 5.0, 2.0]})
+simple_df = pd.DataFrame(
+    {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]}
+)
 
-need_skl = mark.skipif(not svd.SKL_AVAILABLE, reason='scikit-learn not installed')
+need_skl = mark.skipif(not svd.SKL_AVAILABLE, reason="scikit-learn not installed")
 
 
 @need_skl
@@ -71,6 +71,7 @@ def test_svd_clone():
     assert a2.bias.user_damping == algo.bias.user_damping
     assert a2.bias.item_damping == algo.bias.item_damping
 
+
 @need_skl
 @mark.slow
 def test_svd_save_load():
@@ -80,7 +81,7 @@ def test_svd_save_load():
     original.fit(ratings)
 
     mod = pickle.dumps(original)
-    _log.info('serialized to %d bytes', len(mod))
+    _log.info("serialized to %d bytes", len(mod))
     algo = pickle.loads(mod)
 
     assert algo.bias.mean_ == original.bias.mean_
@@ -92,7 +93,7 @@ def test_svd_save_load():
 @need_skl
 @mark.slow
 @mark.eval
-@mark.skipif(not lktu.ml100k.available, reason='ML100K data not present')
+@mark.skipif(not lktu.ml100k.available, reason="ML100K data not present")
 def test_svd_batch_accuracy():
     from lenskit.algorithms import basic
     from lenskit.algorithms import bias
@@ -106,9 +107,9 @@ def test_svd_batch_accuracy():
     algo = basic.Fallback(svd_algo, bias.Bias(damping=10))
 
     def eval(train, test):
-        _log.info('running training')
+        _log.info("running training")
         algo.fit(train)
-        _log.info('testing %d users', test.user.nunique())
+        _log.info("testing %d users", test.user.nunique())
         return batch.predict(algo, test)
 
     folds = xf.partition_users(ratings, 5, xf.SampleFrac(0.2))
@@ -116,5 +117,5 @@ def eval(train, test):
     mae = pm.mae(preds.prediction, preds.rating)
     assert mae == approx(0.74, abs=0.025)
 
-    user_rmse = preds.groupby('user').apply(lambda df: pm.rmse(df.prediction, df.rating))
+    user_rmse = preds.groupby("user").apply(lambda df: pm.rmse(df.prediction, df.rating))
     assert user_rmse.mean() == approx(0.92, abs=0.05)
diff --git a/tests/test_topn_analysis.py b/tests/test_topn_analysis.py
index cfe2ee703..1253b8b0e 100644
--- a/tests/test_topn_analysis.py
+++ b/tests/test_topn_analysis.py
@@ -19,18 +19,21 @@
 
 def test_split_keys():
     rla = topn.RecListAnalysis()
-    recs, truth = topn._df_keys(['algorithm', 'user', 'item', 'rank', 'score'],
-                                ['user', 'item', 'rating'])
-    assert truth == ['user']
-    assert recs == ['algorithm', 'user']
+    recs, truth = topn._df_keys(
+        ["algorithm", "user", "item", "rank", "score"], ["user", "item", "rating"]
+    )
+    assert truth == ["user"]
+    assert recs == ["algorithm", "user"]
 
 
 def test_split_keys_gcol():
-    recs, truth = topn._df_keys(['algorithm', 'user', 'item', 'rank', 'score', 'fishtank'],
-                                ['user', 'item', 'rating'],
-                                ['algorithm', 'fishtank', 'user'])
-    assert truth == ['user']
-    assert recs == ['algorithm', 'fishtank', 'user']
+    recs, truth = topn._df_keys(
+        ["algorithm", "user", "item", "rank", "score", "fishtank"],
+        ["user", "item", "rating"],
+        ["algorithm", "fishtank", "user"],
+    )
+    assert truth == ["user"]
+    assert recs == ["algorithm", "fishtank", "user"]
 
 
 def test_run_one():
@@ -38,10 +41,10 @@ def test_run_one():
     rla.add_metric(topn.precision)
     rla.add_metric(topn.recall)
 
-    recs = pd.DataFrame({'user': 1, 'item': [2]})
-    recs.name = 'recs'
-    truth = pd.DataFrame({'user': 1, 'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth.name = 'truth'
+    recs = pd.DataFrame({"user": 1, "item": [2]})
+    recs.name = "recs"
+    truth = pd.DataFrame({"user": 1, "item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth.name = "truth"
 
     print(recs)
     print(truth)
@@ -49,13 +52,13 @@ def test_run_one():
     res = rla.compute(recs, truth)
     print(res)
 
-    assert res.index.name == 'user'
+    assert res.index.name == "user"
     assert res.index.is_unique
 
     assert len(res) == 1
     assert all(res.index == 1)
     assert all(res.precision == 1.0)
-    assert res.recall.values == approx(1/3)
+    assert res.recall.values == approx(1 / 3)
 
 
 def test_run_two():
@@ -64,17 +67,21 @@ def test_run_two():
     rla.add_metric(topn.recall)
     rla.add_metric(topn.ndcg)
 
-    recs = pd.DataFrame({
-        'data': 'a',
-        'user': ['a', 'a', 'a', 'b', 'b'],
-        'item': [2, 3, 1, 4, 5],
-        'rank': [1, 2, 3, 1, 2]
-    })
-    truth = pd.DataFrame({
-        'user': ['a', 'a', 'a', 'b', 'b', 'b'],
-        'item': [1, 2, 3, 1, 5, 6],
-        'rating': [3.0, 5.0, 4.0, 3.0, 5.0, 4.0]
-    })
+    recs = pd.DataFrame(
+        {
+            "data": "a",
+            "user": ["a", "a", "a", "b", "b"],
+            "item": [2, 3, 1, 4, 5],
+            "rank": [1, 2, 3, 1, 2],
+        }
+    )
+    truth = pd.DataFrame(
+        {
+            "user": ["a", "a", "a", "b", "b", "b"],
+            "item": [1, 2, 3, 1, 5, 6],
+            "rating": [3.0, 5.0, 4.0, 3.0, 5.0, 4.0],
+        }
+    )
 
     def prog(inner):
         assert len(inner) == 2
@@ -86,101 +93,110 @@ def prog(inner):
     assert res.columns.nlevels == 1
     assert len(res) == 2
     assert res.index.nlevels == 2
-    assert res.index.names == ['data', 'user']
-    assert all(res.index.levels[0] == 'a')
-    assert all(res.index.levels[1] == ['a', 'b'])
-    assert all(res.reset_index().user == ['a', 'b'])
+    assert res.index.names == ["data", "user"]
+    assert all(res.index.levels[0] == "a")
+    assert all(res.index.levels[1] == ["a", "b"])
+    assert all(res.reset_index().user == ["a", "b"])
     partial_ndcg = _dcg([0.0, 5.0]) / _dcg([5, 4, 3])
     assert res.ndcg.values == approx([1.0, partial_ndcg])
-    assert res.precision.values == approx([1.0, 1/2])
-    assert res.recall.values == approx([1.0, 1/3])
+    assert res.precision.values == approx([1.0, 1 / 2])
+    assert res.recall.values == approx([1.0, 1 / 3])
 
 
 def test_inner_format():
     rla = topn.RecListAnalysis()
 
-    recs = pd.DataFrame({
-        'data': 'a',
-        'user': ['a', 'a', 'a', 'b', 'b'],
-        'item': [2, 3, 1, 4, 5],
-        'rank': [1, 2, 3, 1, 2]
-    })
-    truth = pd.DataFrame({
-        'user': ['a', 'a', 'a', 'b', 'b', 'b'],
-        'item': [1, 2, 3, 1, 5, 6],
-        'rating': [3.0, 5.0, 4.0, 3.0, 5.0, 4.0]
-    })
-
-    def inner(recs, truth, foo='a'):
-        assert foo == 'b'
-        assert set(recs.columns) == set(['LKRecID', 'LKTruthID', 'item', 'rank'])
-        assert truth.index.name == 'item'
+    recs = pd.DataFrame(
+        {
+            "data": "a",
+            "user": ["a", "a", "a", "b", "b"],
+            "item": [2, 3, 1, 4, 5],
+            "rank": [1, 2, 3, 1, 2],
+        }
+    )
+    truth = pd.DataFrame(
+        {
+            "user": ["a", "a", "a", "b", "b", "b"],
+            "item": [1, 2, 3, 1, 5, 6],
+            "rating": [3.0, 5.0, 4.0, 3.0, 5.0, 4.0],
+        }
+    )
+
+    def inner(recs, truth, foo="a"):
+        assert foo == "b"
+        assert set(recs.columns) == set(["LKRecID", "LKTruthID", "item", "rank"])
+        assert truth.index.name == "item"
         assert truth.index.is_unique
         print(truth)
-        assert all(truth.columns == ['rating'])
-        return len(recs.join(truth, on='item', how='inner'))
-    rla.add_metric(inner, name='bob', foo='b')
+        assert all(truth.columns == ["rating"])
+        return len(recs.join(truth, on="item", how="inner"))
+
+    rla.add_metric(inner, name="bob", foo="b")
 
     res = rla.compute(recs, truth)
     print(res)
 
     assert len(res) == 2
     assert res.index.nlevels == 2
-    assert res.index.names == ['data', 'user']
-    assert all(res.index.levels[0] == 'a')
-    assert all(res.index.levels[1] == ['a', 'b'])
-    assert all(res.reset_index().user == ['a', 'b'])
-    assert all(res['bob'] == [3, 1])
+    assert res.index.names == ["data", "user"]
+    assert all(res.index.levels[0] == "a")
+    assert all(res.index.levels[1] == ["a", "b"])
+    assert all(res.reset_index().user == ["a", "b"])
+    assert all(res["bob"] == [3, 1])
 
 
 def test_spec_group_cols():
-    rla = topn.RecListAnalysis(group_cols=['data', 'user'])
+    rla = topn.RecListAnalysis(group_cols=["data", "user"])
     rla.add_metric(topn.precision)
     rla.add_metric(topn.recall)
     rla.add_metric(topn.ndcg)
 
-    recs = pd.DataFrame({
-        'data': 'a',
-        'user': ['a', 'a', 'a', 'b', 'b'],
-        'item': [2, 3, 1, 4, 5],
-        'rank': [1, 2, 3, 1, 2],
-        'wombat': np.random.randn(5)
-    })
-    truth = pd.DataFrame({
-        'user': ['a', 'a', 'a', 'b', 'b', 'b'],
-        'item': [1, 2, 3, 1, 5, 6],
-        'rating': [3.0, 5.0, 4.0, 3.0, 5.0, 4.0]
-    })
+    recs = pd.DataFrame(
+        {
+            "data": "a",
+            "user": ["a", "a", "a", "b", "b"],
+            "item": [2, 3, 1, 4, 5],
+            "rank": [1, 2, 3, 1, 2],
+            "wombat": np.random.randn(5),
+        }
+    )
+    truth = pd.DataFrame(
+        {
+            "user": ["a", "a", "a", "b", "b", "b"],
+            "item": [1, 2, 3, 1, 5, 6],
+            "rating": [3.0, 5.0, 4.0, 3.0, 5.0, 4.0],
+        }
+    )
 
     res = rla.compute(recs, truth)
     print(res)
 
     assert len(res) == 2
     assert res.index.nlevels == 2
-    assert res.index.names == ['data', 'user']
-    assert all(res.index.levels[0] == 'a')
-    assert all(res.index.levels[1] == ['a', 'b'])
-    assert all(res.reset_index().user == ['a', 'b'])
+    assert res.index.names == ["data", "user"]
+    assert all(res.index.levels[0] == "a")
+    assert all(res.index.levels[1] == ["a", "b"])
+    assert all(res.reset_index().user == ["a", "b"])
     partial_ndcg = _dcg([0.0, 5.0]) / _dcg([5, 4, 3])
     assert res.ndcg.values == approx([1.0, partial_ndcg])
-    assert res.precision.values == approx([1.0, 1/2])
-    assert res.recall.values == approx([1.0, 1/3])
+    assert res.precision.values == approx([1.0, 1 / 2])
+    assert res.recall.values == approx([1.0, 1 / 3])
 
 
 def test_java_equiv():
     dir = Path(__file__).parent
-    metrics = pd.read_csv(str(dir / 'topn-java-metrics.csv'))
-    recs = pd.read_csv(str(dir / 'topn-java-recs.csv'))
-    truth = pd.read_csv(str(dir / 'topn-java-truth.csv'))
+    metrics = pd.read_csv(str(dir / "topn-java-metrics.csv"))
+    recs = pd.read_csv(str(dir / "topn-java-recs.csv"))
+    truth = pd.read_csv(str(dir / "topn-java-truth.csv"))
 
     rla = topn.RecListAnalysis()
     rla.add_metric(topn.ndcg)
     res = rla.compute(recs, truth)
 
     umm = pd.merge(metrics, res.reset_index())
-    umm['err'] = umm['ndcg'] - umm['Java.nDCG']
-    _log.info('merged: \n%s', umm)
-    assert umm['err'].values == approx(0, abs=1.0e-6)
+    umm["err"] = umm["ndcg"] - umm["Java.nDCG"]
+    _log.info("merged: \n%s", umm)
+    assert umm["err"].values == approx(0, abs=1.0e-6)
 
 
 @mark.slow
@@ -196,20 +212,20 @@ def test_fill_users():
     train, test = next(splits)
     algo.fit(train)
 
-    rec_users = test['user'].sample(50).unique()
+    rec_users = test["user"].sample(50).unique()
     assert len(rec_users) < 50
     recs = batch.recommend(algo, rec_users, 25)
 
     scores = rla.compute(recs, test, include_missing=True)
-    assert len(scores) == test['user'].nunique()
-    assert scores['recall'].notna().sum() == len(rec_users)
-    assert all(scores['ntruth'] == 5)
+    assert len(scores) == test["user"].nunique()
+    assert scores["recall"].notna().sum() == len(rec_users)
+    assert all(scores["ntruth"] == 5)
 
     mscores = rla.compute(recs, test)
     assert len(mscores) < len(scores)
 
-    recall = scores.loc[scores['recall'].notna(), 'recall'].copy()
-    recall, mrecall = recall.align(mscores['recall'])
+    recall = scores.loc[scores["recall"].notna(), "recall"].copy()
+    recall, mrecall = recall.align(mscores["recall"])
     assert all(recall == mrecall)
 
 
@@ -229,65 +245,63 @@ def test_adv_fill_users():
     all_test = {}
     for i, (train, test) in enumerate(splits):
         a_uu.fit(train)
-        rec_users = test['user'].sample(50).unique()
-        all_recs[(i+1, 'UU')] = batch.recommend(a_uu, rec_users, 25)
+        rec_users = test["user"].sample(50).unique()
+        all_recs[(i + 1, "UU")] = batch.recommend(a_uu, rec_users, 25)
 
         a_ii.fit(train)
-        rec_users = test['user'].sample(50).unique()
-        all_recs[(i+1, 'II')] = batch.recommend(a_ii, rec_users, 25)
-        all_test[i+1] = test
+        rec_users = test["user"].sample(50).unique()
+        all_recs[(i + 1, "II")] = batch.recommend(a_ii, rec_users, 25)
+        all_test[i + 1] = test
 
-    recs = pd.concat(all_recs, names=['part', 'algo'])
-    recs.reset_index(['part', 'algo'], inplace=True)
+    recs = pd.concat(all_recs, names=["part", "algo"])
+    recs.reset_index(["part", "algo"], inplace=True)
     recs.reset_index(drop=True, inplace=True)
 
-    test = pd.concat(all_test, names=['part'])
-    test.reset_index(['part'], inplace=True)
+    test = pd.concat(all_test, names=["part"])
+    test.reset_index(["part"], inplace=True)
     test.reset_index(drop=True, inplace=True)
 
     scores = rla.compute(recs, test, include_missing=True)
     inames = scores.index.names
     scores.sort_index(inplace=True)
     assert len(scores) == 50 * 4
-    assert all(scores['ntruth'] == 5)
-    assert scores['recall'].isna().sum() > 0
-    _log.info('scores:\n%s', scores)
+    assert all(scores["ntruth"] == 5)
+    assert scores["recall"].isna().sum() > 0
+    _log.info("scores:\n%s", scores)
 
-    ucounts = scores.reset_index().groupby('algo')['user'].agg(['count', 'nunique'])
-    assert all(ucounts['count'] == 100)
-    assert all(ucounts['nunique'] == 100)
+    ucounts = scores.reset_index().groupby("algo")["user"].agg(["count", "nunique"])
+    assert all(ucounts["count"] == 100)
+    assert all(ucounts["nunique"] == 100)
 
     mscores = rla.compute(recs, test)
     mscores = mscores.reset_index().set_index(inames)
     mscores.sort_index(inplace=True)
     assert len(mscores) < len(scores)
-    _log.info('mscores:\n%s', mscores)
+    _log.info("mscores:\n%s", mscores)
 
-    recall = scores.loc[scores['recall'].notna(), 'recall'].copy()
-    recall, mrecall = recall.align(mscores['recall'])
+    recall = scores.loc[scores["recall"].notna(), "recall"].copy()
+    recall, mrecall = recall.align(mscores["recall"])
     assert all(recall == mrecall)
 
 
-@mark.parametrize('drop_rating', [False, True])
+@mark.parametrize("drop_rating", [False, True])
 def test_pr_bulk_match(demo_recs, drop_rating):
     "bulk and normal match"
     train, test, recs = demo_recs
     if drop_rating:
-        test = test[['user', 'item']]
+        test = test[["user", "item"]]
 
     rla = topn.RecListAnalysis()
     rla.add_metric(precision)
     rla.add_metric(recall)
     # metric without the bulk capabilities
-    rla.add_metric(lambda *a: precision(*a), name='ind_p')
-    rla.add_metric(lambda *a: recall(*a), name='ind_r')
+    rla.add_metric(lambda *a: precision(*a), name="ind_p")
+    rla.add_metric(lambda *a: recall(*a), name="ind_r")
     res = rla.compute(recs, test)
 
     print(res)
-    _log.info('precision mismatches:\n%s',
-              res[res.precision != res.ind_p])
-    _log.info('recall mismatches:\n%s',
-              res[res.recall != res.ind_r])
+    _log.info("precision mismatches:\n%s", res[res.precision != res.ind_p])
+    _log.info("recall mismatches:\n%s", res[res.recall != res.ind_r])
 
     assert res.precision.values == approx(res.ind_p.values)
     assert res.recall.values == approx(res.ind_r.values)
diff --git a/tests/test_topn_hit.py b/tests/test_topn_hit.py
index f907da2f6..b664023f6 100644
--- a/tests/test_topn_hit.py
+++ b/tests/test_topn_hit.py
@@ -12,8 +12,8 @@
 
 
 def _test_hit(items, rel, **kwargs):
-    recs = pd.DataFrame({'item': items})
-    truth = pd.DataFrame({'item': rel}).set_index('item')
+    recs = pd.DataFrame({"item": items})
+    truth = pd.DataFrame({"item": rel}).set_index("item")
     return hit(recs, truth, **kwargs)
 
 
@@ -81,7 +81,7 @@ def test_hit_series_array():
     hr = _test_hit(pd.Series([1, 2, 3, 4]), np.array([1, 3, 5, 7]))
     assert hr == 1
 
-    hr = _test_hit(pd.Series([1, 2, 3]), np.arange(4, 9, 1, 'u4'))
+    hr = _test_hit(pd.Series([1, 2, 3]), np.arange(4, 9, 1, "u4"))
     assert hr == 0
 
 
@@ -92,7 +92,7 @@ def test_hit_array():
     hr = _test_hit(np.array([1, 2, 3, 4]), np.array([1, 3, 5, 7]))
     assert hr == 1
 
-    hr = _test_hit(np.array([1, 2, 3]), np.arange(4, 9, 1, 'u4'))
+    hr = _test_hit(np.array([1, 2, 3]), np.arange(4, 9, 1, "u4"))
     assert hr == 0
 
 
@@ -122,19 +122,18 @@ def test_hit_partial_rel():
 def test_hit_bulk_k(demo_recs):
     "bulk and normal match"
     train, test, recs = demo_recs
-    assert test['user'].value_counts().max() > 5
+    assert test["user"].value_counts().max() > 5
 
     rla = topn.RecListAnalysis()
-    rla.add_metric(hit, name='hk', k=5)
+    rla.add_metric(hit, name="hk", k=5)
     rla.add_metric(hit)
     # metric without the bulk capabilities
-    rla.add_metric(lambda *a, **k: hit(*a, **k), name='ind_hk', k=5)
-    rla.add_metric(lambda *a: hit(*a), name='ind_h')
+    rla.add_metric(lambda *a, **k: hit(*a, **k), name="ind_hk", k=5)
+    rla.add_metric(lambda *a: hit(*a), name="ind_h")
     res = rla.compute(recs, test)
 
     print(res)
-    _log.info('recall mismatches:\n%s',
-              res[res.hit != res.ind_h])
+    _log.info("recall mismatches:\n%s", res[res.hit != res.ind_h])
 
     assert res.hit.values == approx(res.ind_h.values)
     assert res.hk.values == approx(res.ind_hk.values)
diff --git a/tests/test_topn_mrr.py b/tests/test_topn_mrr.py
index 5fc04d268..bb2b73843 100644
--- a/tests/test_topn_mrr.py
+++ b/tests/test_topn_mrr.py
@@ -11,8 +11,8 @@
 
 
 def _test_rr(items, rel, **kw):
-    recs = pd.DataFrame({'item': items})
-    truth = pd.DataFrame({'item': rel}).set_index('item')
+    recs = pd.DataFrame({"item": items})
+    truth = pd.DataFrame({"item": rel}).set_index("item")
     return recip_rank(recs, truth, **kw)
 
 
@@ -53,36 +53,36 @@ def test_mrr_series_idx():
 
 def test_mrr_array_late():
     "deep -> 0.1"
-    rr = _test_rr(np.arange(1, 21, 1, 'u4'), [20, 10])
+    rr = _test_rr(np.arange(1, 21, 1, "u4"), [20, 10])
     assert rr == approx(0.1)
 
 
 def test_mrr_k_trunc():
-    rr = _test_rr(np.arange(1, 21, 1, 'u4'), [20, 10], k=5)
+    rr = _test_rr(np.arange(1, 21, 1, "u4"), [20, 10], k=5)
     assert rr == approx(0.0)
 
-    rr = _test_rr(np.arange(1, 21, 1, 'u4'), [20, 10, 5], k=5)
+    rr = _test_rr(np.arange(1, 21, 1, "u4"), [20, 10, 5], k=5)
     assert rr == approx(0.2)
 
 
 def test_mrr_k_short():
-    rr = _test_rr(np.arange(1, 5, 1, 'u4'), [2], k=10)
+    rr = _test_rr(np.arange(1, 5, 1, "u4"), [2], k=10)
     assert rr == approx(0.5)
 
 
-@mark.parametrize('drop_rating', [False, True])
+@mark.parametrize("drop_rating", [False, True])
 def test_mrr_bulk(demo_recs, drop_rating):
     "bulk and normal match"
     train, test, recs = demo_recs
     if drop_rating:
-        test = test[['user', 'item']]
+        test = test[["user", "item"]]
 
     rla = RecListAnalysis()
     rla.add_metric(recip_rank)
-    rla.add_metric(recip_rank, name='rr_k', k=10)
+    rla.add_metric(recip_rank, name="rr_k", k=10)
     # metric without the bulk capabilities
-    rla.add_metric(lambda *a: recip_rank(*a), name='ind_rr')
-    rla.add_metric(lambda *a, **k: recip_rank(*a, **k), name='ind_rr_k', k=10)
+    rla.add_metric(lambda *a: recip_rank(*a), name="ind_rr")
+    rla.add_metric(lambda *a, **k: recip_rank(*a, **k), name="ind_rr_k", k=10)
     res = rla.compute(recs, test)
 
     assert all(res.recip_rank == res.ind_rr)
diff --git a/tests/test_topn_ndcg.py b/tests/test_topn_ndcg.py
index 2fdf9e573..830a4b8b6 100644
--- a/tests/test_topn_ndcg.py
+++ b/tests/test_topn_ndcg.py
@@ -55,71 +55,67 @@ def test_dcg_nan():
 
 def test_dcg_series():
     "The DCG function should work on a series"
-    assert _dcg(pd.Series([np.e, 0, 0, np.pi])) == \
-        approx((np.e + np.pi / np.log2(4)))
+    assert _dcg(pd.Series([np.e, 0, 0, np.pi])) == approx((np.e + np.pi / np.log2(4)))
 
 
 def test_dcg_mult2():
     "multiple elements should score correctly"
     assert _dcg(np.array([np.e, np.pi])) == approx(np.e + np.pi)
-    assert _dcg(np.array([np.e, 0, 0, np.pi])) == \
-        approx((np.e + np.pi / np.log2(4)))
+    assert _dcg(np.array([np.e, 0, 0, np.pi])) == approx((np.e + np.pi / np.log2(4)))
 
 
 def test_ndcg_empty():
-    recs = pd.DataFrame({'item': []})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item')
+    recs = pd.DataFrame({"item": []})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item")
     assert ndcg(recs, truth) == approx(0.0)
 
 
 def test_ndcg_no_match():
-    recs = pd.DataFrame({'item': [4]})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item')
+    recs = pd.DataFrame({"item": [4]})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item")
     assert ndcg(recs, truth) == approx(0.0)
 
 
 def test_ndcg_perfect():
-    recs = pd.DataFrame({'item': [2, 3, 1]})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item')
+    recs = pd.DataFrame({"item": [2, 3, 1]})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item")
     assert ndcg(recs, truth) == approx(1.0)
 
 
 def test_ndcg_perfect_k_short():
-    recs = pd.DataFrame({'item': [2, 3, 1]})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item')
+    recs = pd.DataFrame({"item": [2, 3, 1]})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item")
     assert ndcg(recs, truth, k=2) == approx(1.0)
     assert ndcg(recs[:2], truth, k=2) == approx(1.0)
 
 
 def test_ndcg_wrong():
-    recs = pd.DataFrame({'item': [1, 2]})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item')
+    recs = pd.DataFrame({"item": [1, 2]})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item")
     assert ndcg(recs, truth) == approx(_dcg([3.0, 5.0] / _dcg([5.0, 4.0, 3.0])))
 
 
 def test_ndcg_perfect_k():
-    recs = pd.DataFrame({'item': [2, 3]})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item')
+    recs = pd.DataFrame({"item": [2, 3]})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item")
     assert ndcg(recs, truth, k=2) == approx(1.0)
 
 
 def test_ndcg_bulk_at_top():
-    truth = pd.DataFrame.from_records([
-        (1, 50, 3.5),
-        (1, 30, 3.5)
-    ], columns=['LKTruthID', 'item', 'rating']).set_index(['LKTruthID', 'item'])
+    truth = pd.DataFrame.from_records(
+        [(1, 50, 3.5), (1, 30, 3.5)], columns=["LKTruthID", "item", "rating"]
+    ).set_index(["LKTruthID", "item"])
 
-    recs = pd.DataFrame.from_records([
-        (1, 1, 50, 1),
-        (1, 1, 30, 2),
-        (1, 1, 72, 3)
-    ], columns=['LKRecID', 'LKTruthID', 'item', 'rank'])
+    recs = pd.DataFrame.from_records(
+        [(1, 1, 50, 1), (1, 1, 30, 2), (1, 1, 72, 3)],
+        columns=["LKRecID", "LKTruthID", "item", "rank"],
+    )
 
     ndcg = _bulk_ndcg(recs, truth)
     assert len(ndcg) == 1
@@ -128,16 +124,14 @@ def test_ndcg_bulk_at_top():
 
 
 def test_ndcg_bulk_not_at_top():
-    truth = pd.DataFrame.from_records([
-        (1, 50, 3.5),
-        (1, 30, 3.5)
-    ], columns=['LKTruthID', 'item', 'rating']).set_index(['LKTruthID', 'item'])
+    truth = pd.DataFrame.from_records(
+        [(1, 50, 3.5), (1, 30, 3.5)], columns=["LKTruthID", "item", "rating"]
+    ).set_index(["LKTruthID", "item"])
 
-    recs = pd.DataFrame.from_records([
-        (1, 1, 50, 1),
-        (1, 1, 72, 2),
-        (1, 1, 30, 3)
-    ], columns=['LKRecID', 'LKTruthID', 'item', 'rank'])
+    recs = pd.DataFrame.from_records(
+        [(1, 1, 50, 1), (1, 1, 72, 2), (1, 1, 30, 3)],
+        columns=["LKRecID", "LKTruthID", "item", "rank"],
+    )
 
     ndcg = _bulk_ndcg(recs, truth)
     assert len(ndcg) == 1
@@ -145,23 +139,23 @@ def test_ndcg_bulk_not_at_top():
     assert ndcg.iloc[0] == approx(0.8155, abs=0.001)
 
 
-@mark.parametrize('drop_rating', [False, True])
+@mark.parametrize("drop_rating", [False, True])
 def test_ndcg_bulk_match(demo_recs, drop_rating):
     "bulk and normal match"
     train, test, recs = demo_recs
     if drop_rating:
-        test = test[['user', 'item']]
+        test = test[["user", "item"]]
 
     rla = RecListAnalysis()
     rla.add_metric(ndcg)
-    rla.add_metric(ndcg, name='ndcg_k', k=5)
+    rla.add_metric(ndcg, name="ndcg_k", k=5)
     rla.add_metric(dcg)
     # metric without the bulk capabilities
-    rla.add_metric(lambda *a: ndcg(*a), name='ind_ndcg')
-    rla.add_metric(lambda *a, **k: ndcg(*a, **k), name='ind_ndcg_k', k=5)
+    rla.add_metric(lambda *a: ndcg(*a), name="ind_ndcg")
+    rla.add_metric(lambda *a, **k: ndcg(*a, **k), name="ind_ndcg_k", k=5)
     res = rla.compute(recs, test)
 
-    res['ind_ideal'] = res['dcg'] / res['ind_ndcg']
+    res["ind_ideal"] = res["dcg"] / res["ind_ndcg"]
     print(res)
 
     assert res.ndcg.values == approx(res.ind_ndcg.values)
diff --git a/tests/test_topn_precision.py b/tests/test_topn_precision.py
index 024c6e536..62df2bc1a 100644
--- a/tests/test_topn_precision.py
+++ b/tests/test_topn_precision.py
@@ -9,8 +9,8 @@
 
 
 def _test_prec(items, rel, **k):
-    recs = pd.DataFrame({'item': items})
-    truth = pd.DataFrame({'item': rel}).set_index('item')
+    recs = pd.DataFrame({"item": items})
+    truth = pd.DataFrame({"item": rel}).set_index("item")
     return precision(recs, truth, **k)
 
 
@@ -76,7 +76,7 @@ def test_precision_series_array():
     prec = _test_prec(pd.Series([1, 2, 3, 4]), np.array([1, 3, 5]))
     assert prec == approx(0.5)
 
-    prec = _test_prec(pd.Series([1, 2, 3, 4]), np.arange(4, 10, 1, 'u4'))
+    prec = _test_prec(pd.Series([1, 2, 3, 4]), np.arange(4, 10, 1, "u4"))
     assert prec == approx(0.25)
 
 
@@ -87,7 +87,7 @@ def test_precision_array():
     prec = _test_prec(np.array([1, 2, 3, 4]), np.array([1, 3, 5]))
     assert prec == approx(0.5)
 
-    prec = _test_prec(np.array([1, 2, 3, 4]), np.arange(4, 10, 1, 'u4'))
+    prec = _test_prec(np.array([1, 2, 3, 4]), np.arange(4, 10, 1, "u4"))
     assert prec == approx(0.25)
 
 
@@ -118,14 +118,14 @@ def test_prec_short_items():
 def test_recall_bulk_k(demo_recs):
     "bulk and normal match"
     train, test, recs = demo_recs
-    assert test['user'].value_counts().max() > 5
+    assert test["user"].value_counts().max() > 5
 
     rla = topn.RecListAnalysis()
-    rla.add_metric(precision, name='pk', k=5)
+    rla.add_metric(precision, name="pk", k=5)
     rla.add_metric(precision)
     # metric without the bulk capabilities
-    rla.add_metric(lambda *a, **k: precision(*a, **k), name='ind_pk', k=5)
-    rla.add_metric(lambda *a: precision(*a), name='ind_p')
+    rla.add_metric(lambda *a, **k: precision(*a, **k), name="ind_pk", k=5)
+    rla.add_metric(lambda *a: precision(*a), name="ind_p")
     res = rla.compute(recs, test)
 
     assert res.precision.values == approx(res.ind_p.values)
diff --git a/tests/test_topn_rbp.py b/tests/test_topn_rbp.py
index c3496b276..babe76dc3 100644
--- a/tests/test_topn_rbp.py
+++ b/tests/test_topn_rbp.py
@@ -15,82 +15,86 @@
 
 
 def test_rbp_empty():
-    recs = pd.DataFrame({'item': []})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item')
+    recs = pd.DataFrame({"item": []})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item")
     assert rbp(recs, truth) == approx(0.0)
 
 
 def test_rbp_no_match():
-    recs = pd.DataFrame({'item': [4]})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item')
+    recs = pd.DataFrame({"item": [4]})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item")
     assert rbp(recs, truth) == approx(0.0)
 
 
 def test_rbp_one_match():
-    recs = pd.DataFrame({'item': [1]})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item')
+    recs = pd.DataFrame({"item": [1]})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item")
     assert rbp(recs, truth) == approx(0.5)
 
 
 @given(st.lists(st.integers(1), min_size=1, max_size=100, unique=True), st.floats(0.05, 0.95))
 def test_rbp_perfect(items, p):
     n = len(items)
-    recs = pd.DataFrame({'item': items})
-    truth = pd.DataFrame({'item': items, 'rating': 1})
-    truth = truth.set_index('item').sort_index()
+    recs = pd.DataFrame({"item": items})
+    truth = pd.DataFrame({"item": items, "rating": 1})
+    truth = truth.set_index("item").sort_index()
     assert rbp(recs, truth, patience=p) == approx(np.sum(p ** np.arange(n)) * (1 - p))
 
 
 @given(st.lists(st.integers(1), min_size=1, max_size=100, unique=True), st.floats(0.05, 0.95))
 def test_rbp_perfect_norm(items, p):
-    recs = pd.DataFrame({'item': items})
-    truth = pd.DataFrame({'item': items, 'rating': 1})
-    truth = truth.set_index('item').sort_index()
+    recs = pd.DataFrame({"item": items})
+    truth = pd.DataFrame({"item": items, "rating": 1})
+    truth = truth.set_index("item").sort_index()
     assert rbp(recs, truth, patience=p, normalize=True) == approx(1.0)
 
 
-@given(st.lists(st.integers(1), min_size=1, max_size=100, unique=True),
-       st.integers(1, 100), st.floats(0.05, 0.95))
+@given(
+    st.lists(st.integers(1), min_size=1, max_size=100, unique=True),
+    st.integers(1, 100),
+    st.floats(0.05, 0.95),
+)
 def test_rbp_perfect_k(items, k, p):
     n = len(items)
     eff_n = min(n, k)
-    recs = pd.DataFrame({'item': items})
-    truth = pd.DataFrame({'item': items, 'rating': 1})
-    truth = truth.set_index('item').sort_index()
+    recs = pd.DataFrame({"item": items})
+    truth = pd.DataFrame({"item": items, "rating": 1})
+    truth = truth.set_index("item").sort_index()
     assert rbp(recs, truth, k=k, patience=p) == approx(np.sum(p ** np.arange(eff_n)) * (1 - p))
 
 
-@given(st.lists(st.integers(1), min_size=1, max_size=100, unique=True),
-       st.integers(1, 100), st.floats(0.05, 0.95))
+@given(
+    st.lists(st.integers(1), min_size=1, max_size=100, unique=True),
+    st.integers(1, 100),
+    st.floats(0.05, 0.95),
+)
 def test_rbp_perfect_k_norm(items, k, p):
-    recs = pd.DataFrame({'item': items})
-    truth = pd.DataFrame({'item': items, 'rating': 1})
-    truth = truth.set_index('item').sort_index()
+    recs = pd.DataFrame({"item": items})
+    truth = pd.DataFrame({"item": items, "rating": 1})
+    truth = truth.set_index("item").sort_index()
     assert rbp(recs, truth, k=k, patience=p, normalize=True) == approx(1.0)
 
 
 def test_rbp_missing():
-    recs = pd.DataFrame({'item': [1, 2]})
-    truth = pd.DataFrame({'item': [1, 2, 3], 'rating': [3.0, 5.0, 4.0]})
-    truth = truth.set_index('item').sort_index()
+    recs = pd.DataFrame({"item": [1, 2]})
+    truth = pd.DataFrame({"item": [1, 2, 3], "rating": [3.0, 5.0, 4.0]})
+    truth = truth.set_index("item").sort_index()
     # (1 + 0.5) * 0.5
     assert rbp(recs, truth) == approx(0.75)
 
 
 def test_rbp_bulk_at_top():
-    truth = pd.DataFrame.from_records([
-        (1, 50, 3.5),
-        (1, 30, 3.5)
-    ], columns=['LKTruthID', 'item', 'rating']).set_index(['LKTruthID', 'item'])
+    truth = pd.DataFrame.from_records(
+        [(1, 50, 3.5), (1, 30, 3.5)], columns=["LKTruthID", "item", "rating"]
+    ).set_index(["LKTruthID", "item"])
 
-    recs = pd.DataFrame.from_records([
-        (1, 1, 50, 1),
-        (1, 1, 30, 2),
-        (1, 1, 72, 3)
-    ], columns=['LKRecID', 'LKTruthID', 'item', 'rank'])
+    recs = pd.DataFrame.from_records(
+        [(1, 1, 50, 1), (1, 1, 30, 2), (1, 1, 72, 3)],
+        columns=["LKRecID", "LKTruthID", "item", "rank"],
+    )
 
     rbp = _bulk_rbp(recs, truth)
     assert len(rbp) == 1
@@ -99,16 +103,14 @@ def test_rbp_bulk_at_top():
 
 
 def test_rbp_bulk_not_at_top():
-    truth = pd.DataFrame.from_records([
-        (1, 50, 3.5),
-        (1, 30, 3.5)
-    ], columns=['LKTruthID', 'item', 'rating']).set_index(['LKTruthID', 'item'])
+    truth = pd.DataFrame.from_records(
+        [(1, 50, 3.5), (1, 30, 3.5)], columns=["LKTruthID", "item", "rating"]
+    ).set_index(["LKTruthID", "item"])
 
-    recs = pd.DataFrame.from_records([
-        (1, 1, 50, 1),
-        (1, 1, 72, 2),
-        (1, 1, 30, 3)
-    ], columns=['LKRecID', 'LKTruthID', 'item', 'rank'])
+    recs = pd.DataFrame.from_records(
+        [(1, 1, 50, 1), (1, 1, 72, 2), (1, 1, 30, 3)],
+        columns=["LKRecID", "LKTruthID", "item", "rank"],
+    )
 
     rbp = _bulk_rbp(recs, truth)
     assert len(rbp) == 1
@@ -116,27 +118,29 @@ def test_rbp_bulk_not_at_top():
     assert rbp.iloc[0] == approx((1 + 0.25) * 0.5)
 
 
-@mark.parametrize('normalize', [False, True])
+@mark.parametrize("normalize", [False, True])
 def test_rbp_bulk_match(demo_recs, normalize):
     "bulk and normal match"
     train, test, recs = demo_recs
 
     rla = RecListAnalysis()
     rla.add_metric(rbp, normalize=normalize)
-    rla.add_metric(rbp, name='rbp_k', k=5, normalize=normalize)
+    rla.add_metric(rbp, name="rbp_k", k=5, normalize=normalize)
     # metric without the bulk capabilities
-    rla.add_metric(lambda *a: rbp(*a, normalize=normalize), name='ind_rbp')
-    rla.add_metric(lambda *a, **k: rbp(*a, normalize=normalize, **k), name='ind_rbp_k', k=5)
+    rla.add_metric(lambda *a: rbp(*a, normalize=normalize), name="ind_rbp")
+    rla.add_metric(lambda *a, **k: rbp(*a, normalize=normalize, **k), name="ind_rbp_k", k=5)
     res = rla.compute(recs, test)
 
-    res['diff'] = np.abs(res.rbp - res.ind_rbp)
-    rl = res.nlargest(5, 'diff')
-    _log.info('res:\n%s', rl)
+    res["diff"] = np.abs(res.rbp - res.ind_rbp)
+    rl = res.nlargest(5, "diff")
+    _log.info("res:\n%s", rl)
     user = rl.index[0]
-    _log.info('user: %s\n%s', user, rl.iloc[0])
-    _log.info('test:\n%s', test[test['user'] == user])
-    urecs = recs[recs['user'] == user].join(test.set_index(['user', 'item'])['rating'], on=['user', 'item'], how='left')
-    _log.info('recs:\n%s', urecs[urecs['rating'].notnull()])
+    _log.info("user: %s\n%s", user, rl.iloc[0])
+    _log.info("test:\n%s", test[test["user"] == user])
+    urecs = recs[recs["user"] == user].join(
+        test.set_index(["user", "item"])["rating"], on=["user", "item"], how="left"
+    )
+    _log.info("recs:\n%s", urecs[urecs["rating"].notnull()])
 
     assert res.rbp.values == approx(res.ind_rbp.values)
     assert res.rbp_k.values == approx(res.ind_rbp_k.values)
diff --git a/tests/test_topn_recall.py b/tests/test_topn_recall.py
index c11a6ad7f..612acdf08 100644
--- a/tests/test_topn_recall.py
+++ b/tests/test_topn_recall.py
@@ -12,8 +12,8 @@
 
 
 def _test_recall(items, rel, **kwargs):
-    recs = pd.DataFrame({'item': items})
-    truth = pd.DataFrame({'item': rel}).set_index('item')
+    recs = pd.DataFrame({"item": items})
+    truth = pd.DataFrame({"item": rel}).set_index("item")
     return recall(recs, truth, **kwargs)
 
 
@@ -84,7 +84,7 @@ def test_recall_series_array():
     prec = _test_recall(pd.Series([1, 2, 3, 4]), np.array([1, 3, 5, 7]))
     assert prec == approx(0.5)
 
-    prec = _test_recall(pd.Series([1, 2, 3, 4]), np.arange(4, 9, 1, 'u4'))
+    prec = _test_recall(pd.Series([1, 2, 3, 4]), np.arange(4, 9, 1, "u4"))
     assert prec == approx(0.2)
 
 
@@ -95,7 +95,7 @@ def test_recall_array():
     prec = _test_recall(np.array([1, 2, 3, 4]), np.array([1, 3, 5, 7]))
     assert prec == approx(0.5)
 
-    prec = _test_recall(np.array([1, 2, 3, 4]), np.arange(4, 9, 1, 'u4'))
+    prec = _test_recall(np.array([1, 2, 3, 4]), np.arange(4, 9, 1, "u4"))
     assert prec == approx(0.2)
 
 
@@ -126,19 +126,18 @@ def test_recall_partial_rel():
 def test_recall_bulk_k(demo_recs):
     "bulk and normal match"
     train, test, recs = demo_recs
-    assert test['user'].value_counts().max() > 5
+    assert test["user"].value_counts().max() > 5
 
     rla = topn.RecListAnalysis()
-    rla.add_metric(recall, name='rk', k=5)
+    rla.add_metric(recall, name="rk", k=5)
     rla.add_metric(recall)
     # metric without the bulk capabilities
-    rla.add_metric(lambda *a, **k: recall(*a, **k), name='ind_rk', k=5)
-    rla.add_metric(lambda *a: recall(*a), name='ind_r')
+    rla.add_metric(lambda *a, **k: recall(*a, **k), name="ind_rk", k=5)
+    rla.add_metric(lambda *a: recall(*a), name="ind_r")
     res = rla.compute(recs, test)
 
     print(res)
-    _log.info('recall mismatches:\n%s',
-              res[res.recall != res.ind_r])
+    _log.info("recall mismatches:\n%s", res[res.recall != res.ind_r])
 
     assert res.recall.values == approx(res.ind_r.values)
     assert res.rk.values == approx(res.ind_rk.values)
diff --git a/tests/test_topn_recs.py b/tests/test_topn_recs.py
index 07d1840fa..e570f8c1f 100644
--- a/tests/test_topn_recs.py
+++ b/tests/test_topn_recs.py
@@ -7,9 +7,9 @@
 import lenskit.util.test as lktu
 from pytest import approx
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13],
-                          'rating': [4.0, 3.0, 5.0, 2.0]})
+simple_df = pd.DataFrame(
+    {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]}
+)
 
 
 def test_topn_recommend():
@@ -37,14 +37,14 @@ def test_topn_config():
     rec = basic.TopN(pred)
 
     rs = str(rec)
-    assert rs.startswith('TopN/')
+    assert rs.startswith("TopN/")
 
 
 def test_topn_big():
     ratings = lktu.ml_test.ratings
     users = ratings.user.unique()
     items = ratings.item.unique()
-    user_items = ratings.set_index('user').item
+    user_items = ratings.set_index("user").item
 
     algo = basic.TopN(bias.Bias())
     a2 = algo.fit(ratings)
@@ -55,7 +55,7 @@ def test_topn_big():
         recs = algo.recommend(u, 100)
         assert len(recs) == 100
         rated = user_items.loc[u]
-        assert all(~recs['item'].isin(rated))
+        assert all(~recs["item"].isin(rated))
         unrated = np.setdiff1d(items, rated)
         scores = algo.predictor.predict_for_user(u, unrated)
         top = scores.nlargest(100)
diff --git a/tests/test_topn_utils.py b/tests/test_topn_utils.py
index c97f1f496..25257c5df 100644
--- a/tests/test_topn_utils.py
+++ b/tests/test_topn_utils.py
@@ -6,7 +6,7 @@
 
 def test_cs_rated_items_series():
     "rated_items should de-index series"
-    items = ['a', 'b', 'wombat']
+    items = ["a", "b", "wombat"]
     series = pd.Series(np.random.randn(3), index=items)
 
     i2 = CandidateSelector.rated_items(series)
@@ -16,7 +16,7 @@ def test_cs_rated_items_series():
 
 def test_cs_rated_items():
     "rated_items should return list as array"
-    items = ['a', 'b', 'wombat']
+    items = ["a", "b", "wombat"]
 
     i2 = CandidateSelector.rated_items(items)
     assert isinstance(i2, np.ndarray)
@@ -25,7 +25,7 @@ def test_cs_rated_items():
 
 def test_cs_rated_items_array():
     "rated_items should return array as itself"
-    items = ['a', 'b', 'wombat']
+    items = ["a", "b", "wombat"]
     items = np.array(items)
 
     i2 = CandidateSelector.rated_items(items)
diff --git a/tests/test_util.py b/tests/test_util.py
index 63fbc783a..6f134b0a2 100644
--- a/tests/test_util.py
+++ b/tests/test_util.py
@@ -31,14 +31,14 @@ def test_stopwatch_str():
     w = lku.Stopwatch()
     time.sleep(0.5)
     s = str(w)
-    assert s.endswith('ms')
+    assert s.endswith("ms")
 
 
 def test_stopwatch_long_str():
     w = lku.Stopwatch()
     time.sleep(1.2)
     s = str(w)
-    assert s.endswith('s')
+    assert s.endswith("s")
 
 
 def test_stopwatch_minutes():
@@ -46,7 +46,7 @@ def test_stopwatch_minutes():
     w.stop()
     w.start_time = w.stop_time - 62
     s = str(w)
-    p = re.compile(r'1m2.\d\ds')
+    p = re.compile(r"1m2.\d\ds")
     assert p.match(s)
 
 
@@ -55,7 +55,7 @@ def test_stopwatch_hours():
     w.stop()
     w.start_time = w.stop_time - 3663
     s = str(w)
-    p = re.compile(r'1h1m3.\d\ds')
+    p = re.compile(r"1h1m3.\d\ds")
     assert p.match(s)
 
 
@@ -64,6 +64,7 @@ def test_last_memo():
 
     def func(foo):
         history.append(foo)
+
     cache = lku.LastMemo(func)
 
     cache("foo")
diff --git a/tests/test_util_algos.py b/tests/test_util_algos.py
index 644b0240a..8437ebd35 100644
--- a/tests/test_util_algos.py
+++ b/tests/test_util_algos.py
@@ -5,9 +5,9 @@
 
 import lenskit.util.test as lktu
 
-simple_df = pd.DataFrame({'item': [1, 1, 2, 3],
-                          'user': [10, 12, 10, 13],
-                          'rating': [4.0, 3.0, 5.0, 2.0]})
+simple_df = pd.DataFrame(
+    {"item": [1, 1, 2, 3], "user": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0]}
+)
 
 
 def test_memorized():
@@ -26,9 +26,9 @@ def test_memorized():
 def test_memorized_batch():
     algo = basic.Memorized(simple_df)
 
-    preds = algo.predict(pd.DataFrame({'user': [10, 10, 12], 'item': [1, 2, 1]}))
+    preds = algo.predict(pd.DataFrame({"user": [10, 10, 12], "item": [1, 2, 1]}))
     assert isinstance(preds, pd.Series)
-    assert preds.name == 'prediction'
+    assert preds.name == "prediction"
     assert set(preds.index) == set([0, 1, 2])
     assert all(preds == [4.0, 5.0, 3.0])
 
@@ -36,7 +36,7 @@ def test_memorized_batch():
 def test_memorized_batch_ord():
     algo = basic.Memorized(simple_df)
 
-    preds = algo.predict(pd.DataFrame({'user': [10, 12, 10], 'item': [1, 1, 2]}))
+    preds = algo.predict(pd.DataFrame({"user": [10, 12, 10], "item": [1, 1, 2]}))
     assert set(preds.index) == set([0, 1, 2])
     assert all(preds == [4.0, 3.0, 5.0])
 
@@ -44,7 +44,7 @@ def test_memorized_batch_ord():
 def test_memorized_batch_missing():
     algo = basic.Memorized(simple_df)
 
-    preds = algo.predict(pd.DataFrame({'user': [10, 12, 12], 'item': [1, 1, 3]}))
+    preds = algo.predict(pd.DataFrame({"user": [10, 12, 12], "item": [1, 1, 3]}))
     assert set(preds.index) == set([0, 1, 2])
     assert all(preds.iloc[:2] == [4.0, 3.0])
     assert np.isnan(preds.iloc[2])
@@ -53,8 +53,9 @@ def test_memorized_batch_missing():
 def test_memorized_batch_keep_index():
     algo = basic.Memorized(simple_df)
 
-    query = pd.DataFrame({'user': [10, 10, 12], 'item': [1, 2, 1]},
-                         index=np.random.choice(np.arange(10), 3, False))
+    query = pd.DataFrame(
+        {"user": [10, 10, 12], "item": [1, 2, 1]}, index=np.random.choice(np.arange(10), 3, False)
+    )
     preds = algo.predict(query)
     assert all(preds.index == query.index)
     assert all(preds == [4.0, 5.0, 3.0])
@@ -64,7 +65,7 @@ def test_random():
     # test case: no seed
     algo = basic.Random()
     model = algo.fit(lktu.ml_test.ratings)
-    items = lktu.ml_test.ratings['item'].unique()
+    items = lktu.ml_test.ratings["item"].unique()
     nitems = len(items)
 
     assert model is not None
@@ -74,17 +75,17 @@ def test_random():
     assert len(recs1) == 100
     assert len(recs2) == 100
     # with very high probabilities
-    assert set(recs1['item']) != set(recs2['item'])
+    assert set(recs1["item"]) != set(recs2["item"])
 
     recs_all = algo.recommend(2038)
     assert len(recs_all) == nitems
-    assert set(items) == set(recs_all['item'])
+    assert set(items) == set(recs_all["item"])
 
 
 def test_random_derive_seed():
-    algo = basic.Random(rng_spec='user')
+    algo = basic.Random(rng_spec="user")
     model = algo.fit(lktu.ml_test.ratings)
-    items = lktu.ml_test.ratings['item'].unique()
+    items = lktu.ml_test.ratings["item"].unique()
     nitems = len(items)
 
     assert model is not None
@@ -94,17 +95,17 @@ def test_random_derive_seed():
     assert len(recs1) == 100
     assert len(recs2) == 100
     # with very high probabilities
-    assert set(recs1['item']) != set(recs2['item'])
+    assert set(recs1["item"]) != set(recs2["item"])
 
     recs_all = algo.recommend(2038)
     assert len(recs_all) == nitems
-    assert set(items) == set(recs_all['item'])
+    assert set(items) == set(recs_all["item"])
 
 
 def test_random_rec_from_candidates():
     algo = basic.Random()
-    items = lktu.ml_test.ratings['item'].unique()
-    users = lktu.ml_test.ratings['user'].unique()
+    items = lktu.ml_test.ratings["item"].unique()
+    users = lktu.ml_test.ratings["user"].unique()
     user1, user2 = np.random.choice(users, size=2, replace=False)
     algo.fit(lktu.ml_test.ratings)
 
@@ -134,7 +135,7 @@ def test_knownrating_batch_missing():
     algo = basic.KnownRating()
     algo.fit(simple_df)
 
-    preds = algo.predict(pd.DataFrame({'user': [10, 12, 12], 'item': [1, 1, 3]}))
+    preds = algo.predict(pd.DataFrame({"user": [10, 12, 12], "item": [1, 1, 3]}))
     assert set(preds.index) == set([0, 1, 2])
     assert all(preds.iloc[:2] == [4.0, 3.0])
     assert np.isnan(preds.iloc[2])
diff --git a/tests/test_util_random.py b/tests/test_util_random.py
index 62b3abe15..d582a2dcb 100644
--- a/tests/test_util_random.py
+++ b/tests/test_util_random.py
@@ -62,7 +62,7 @@ def test_initialize():
 
 
 def test_initialize_key():
-    random.init_rng(42, 'wombat')
+    random.init_rng(42, "wombat")
     assert root_seed().entropy == 42
     # assert root_seed().spawn_key == (zlib.crc32(b'wombat'),)
 
@@ -83,6 +83,6 @@ def test_derive_seed_intkey():
 
 def test_derive_seed_str():
     random.init_rng(42, propagate=False)
-    s2 = random.derive_seed(b'wombat')
+    s2 = random.derive_seed(b"wombat")
     assert s2.entropy == 42
     # assert s2.spawn_key == (zlib.crc32(b'wombat'),)