diff --git a/lenskit/lenskit/knn/item.py b/lenskit/lenskit/knn/item.py
index aa5011218..48b340b6a 100644
--- a/lenskit/lenskit/knn/item.py
+++ b/lenskit/lenskit/knn/item.py
@@ -16,7 +16,8 @@
 
 import numpy as np
 import torch
-from typing_extensions import Callable, Optional, TypeAlias, override
+from scipy.sparse import csr_array
+from typing_extensions import Optional, override
 
 from lenskit import util
 from lenskit.data import Dataset, FeedbackType, ItemList, QueryInput, RecQuery, Vocabulary
@@ -25,6 +26,7 @@
 from lenskit.math.sparse import normalize_sparse_rows, safe_spmv
 from lenskit.parallel import ensure_parallel_init
 from lenskit.pipeline import Component, Trainable
+from lenskit.util.torch import inference_mode
 
 _log = logging.getLogger(__name__)
 MAX_BLOCKS = 1024
@@ -39,6 +41,12 @@ class ItemKNNScorer(Component, Trainable):
     explicit-feedback mode, its output is equivalent to that of the Java
     version.
 
+    .. note::
+
+        This component must be used with queries containing the user's history,
+        either directly in the input or by wiring its query input to the output of a
+        user history component (e.g., :class:`~lenskit.basic.UserTrainingHistoryLookup`).
+
     Args:
         nnbrs:
             The maximum number of neighbors for scoring each item (``None`` for
@@ -66,16 +74,14 @@ class ItemKNNScorer(Component, Trainable):
 
     items_: Vocabulary
     "Vocabulary of item IDs."
-    item_means_: torch.Tensor | None
+    item_means_: np.ndarray[int, np.dtype[np.float32]] | None
     "Mean rating for each known item."
-    item_counts_: torch.Tensor
+    item_counts_: np.ndarray[int, np.dtype[np.int32]]
     "Number of saved neighbors for each item."
-    sim_matrix_: torch.Tensor
+    sim_matrix_: csr_array
     "Similarity matrix (sparse CSR tensor)."
     users_: Vocabulary
     "Vocabulary of user IDs."
-    rating_matrix_: torch.Tensor
-    "Normalized rating matrix to look up user ratings at prediction time."
 
     def __init__(
         self,
@@ -111,6 +117,7 @@ def is_trained(self) -> bool:
         return hasattr(self, "items_")
 
     @override
+    @inference_mode
     def train(self, data: Dataset):
         """
         Train a model.
@@ -175,16 +182,15 @@ def train(self, data: Dataset):
         _log.info("[%s] computed %d neighbor pairs", self._timer, len(smat.col_indices()))
 
         self.items_ = data.items
-        self.item_means_ = means
-        self.item_counts_ = torch.diff(smat.crow_indices())
-        self.sim_matrix_ = smat
+        self.item_means_ = means.numpy() if means is not None else None
+        self.item_counts_ = torch.diff(smat.crow_indices()).numpy()
+        self.sim_matrix_ = csr_array(
+            (smat.values(), smat.col_indices(), smat.crow_indices()), smat.shape
+        )
         self.users_ = data.users
-        self.rating_matrix_ = init_rmat
         _log.debug("[%s] done, memory use %s", self._timer, util.max_memory())
 
-        return self
-
-    def _compute_similarities(self, rmat: torch.Tensor):
+    def _compute_similarities(self, rmat: torch.Tensor) -> torch.Tensor:
         nitems, nusers = rmat.shape
 
         bs = max(self.block_size, nitems // MAX_BLOCKS)
@@ -195,72 +201,104 @@ def _compute_similarities(self, rmat: torch.Tensor):
         return smat.to(torch.float32)
 
     @override
+    @inference_mode
     def __call__(self, query: QueryInput, items: ItemList) -> ItemList:
         query = RecQuery.create(query)
         _log.debug("predicting %d items for user %s", len(items), query.user_id)
 
         ratings = query.user_items
-        if ratings is None:
-            if query.user_id is None:
-                warnings.warn(
-                    "cannot recommend without without either user ID or items", DataWarning
-                )
-                return ItemList(items, scores=np.nan)
-
-            upos = self.users_.number(query.user_id, missing=None)
-            if upos is None:
-                _log.debug("user %s missing, returning empty predictions", query.user_id)
-                return ItemList(items, scores=np.nan)
-            row = self.rating_matrix_[upos]  # type: ignore
-            ratings = ItemList(
-                item_nums=row.indices()[0], rating=row.values(), vocabulary=self.items_
-            )
+        if ratings is None or len(ratings) == 0:
+            if ratings is None:
+                warnings.warn("no user history, did you omit a history component?", DataWarning)
+            _log.debug("user has no history, returning")
+            return ItemList(items, scores=np.nan)
 
         # set up rating array
         # get rated item positions & limit to in-model items
-        ri_pos = ratings.numbers(format="torch", vocabulary=self.items_, missing="negative")
-        ri_mask = ri_pos >= 0
-        ri_vpos = ri_pos[ri_mask]
-        n_valid = len(ri_vpos)
+        ri_nums = ratings.numbers(format="torch", vocabulary=self.items_, missing="negative")
+        ri_mask = ri_nums >= 0
+        ri_valid_nums = ri_nums[ri_mask]
+        n_valid = len(ri_valid_nums)
         _log.debug("user %s: %d of %d rated items in model", query.user_id, n_valid, len(ratings))
 
         if self.feedback == "explicit":
-            ri_vals = ratings.field("rating", "torch")
+            ri_vals = ratings.field("rating", "numpy")
             if ri_vals is None:
                 raise RuntimeError("explicit-feedback scorer must have ratings")
-            ri_vals = ri_vals[ri_mask].to(torch.float64)
+            ri_vals = np.require(ri_vals[ri_mask], np.float32)
         else:
-            ri_vals = torch.full((n_valid,), 1.0, dtype=torch.float64)
+            ri_vals = np.full(n_valid, 1.0, dtype=np.float32)
 
         # mean-center the rating array
+        if self.item_means_ is not None:
+            ri_vals -= self.item_means_[ri_valid_nums]
+
+        # convert target item information
+        ti_nums = items.numbers(vocabulary=self.items_, missing="negative")
+        ti_mask = ti_nums >= 0
+        ti_valid_nums = ti_nums[ti_mask]
+
+        # subset the model to rated and target items
+        model = self.sim_matrix_
+        model = model[ri_valid_nums, :]
+        assert isinstance(model, csr_array)
+        model = model[:, ti_valid_nums]
+        assert isinstance(model, csr_array)
+        # convert to CSC so we can count neighbors per target item.
+        model = model.tocsc()
+
+        # count neighborhood sizes
+        sizes = np.diff(model.indptr)
+        # which neighborhoods are usable? (at least min neighbors)
+        scorable = sizes >= self.min_nbrs
+
+        # fast-path neighborhoods that fit within max neighbors
+        fast = sizes <= self.nnbrs
+        ti_fast_mask = ti_mask.copy()
+        ti_fast_mask[ti_mask] = scorable & fast
+
+        scores = np.full(len(items), np.nan, dtype=np.float32)
+        fast_mod = model[:, scorable & fast]
         if self.feedback == "explicit":
-            assert self.item_means_ is not None
-            ri_vals -= self.item_means_[ri_vpos]
-
-        # now compute the predictions
-        if self.feedback == "explicit":
-            sims = _predict_weighted_average(
-                self.sim_matrix_, (self.min_nbrs, self.nnbrs), ri_vals, ri_vpos
-            )
-            sims += self.item_means_
+            scores[ti_fast_mask] = ri_vals @ fast_mod
+            scores[ti_fast_mask] /= fast_mod.sum(axis=0)
         else:
-            sims = _predict_sum(self.sim_matrix_, (self.min_nbrs, self.nnbrs), ri_vals, ri_vpos)
-
-        # and prepare the output
-        scores = torch.full((len(items),), np.nan, dtype=sims.dtype)
-        out_nums = items.numbers("torch", vocabulary=self.items_, missing="negative")
-        out_good = out_nums >= 0
-        scores[out_good] = sims[out_nums[out_nums >= 0]]
-        results = ItemList(items, scores=scores)
+            scores[ti_fast_mask] = fast_mod.sum(axis=0)
+
+        # slow path: neighborhoods that we need to truncate. we will convert to
+        # PyTorch, make a dense matrix (this is usually small enough to be
+        # usable), and use the Torch topk function.
+        slow_mat = model.T[~fast, :]
+        assert isinstance(slow_mat, csr_array)
+        n_slow, _ = slow_mat.shape
+        if n_slow:
+            # mask for the slow items.
+            ti_slow_mask = ti_mask.copy()
+            ti_slow_mask[ti_mask] = ~fast
+
+            slow_mat = torch.from_numpy(slow_mat.toarray())
+            slow_trimmed, slow_inds = torch.topk(slow_mat, self.nnbrs)
+            assert slow_trimmed.shape == (n_slow, self.nnbrs)
+            if self.feedback == "explicit":
+                scores[ti_slow_mask] = torch.sum(
+                    slow_trimmed * torch.from_numpy(ri_vals)[slow_inds], axis=1
+                ).numpy()
+                scores[ti_slow_mask] /= torch.sum(slow_trimmed, axis=1).numpy()
+            else:
+                scores[ti_slow_mask] = torch.sum(slow_trimmed, axis=1).numpy()
+
+        # re-add the mean ratings in implicit feedback
+        if self.item_means_ is not None:
+            scores[ti_mask] += self.item_means_[ti_valid_nums]
 
         _log.debug(
             "user %s: predicted for %d of %d items",
             query.user_id,
-            int(torch.isfinite(scores).sum()),
+            int(np.isfinite(scores).sum()),
             len(items),
         )
 
-        return results
+        return ItemList(items, scores=scores)
 
     def __str__(self):
         return "ItemItem(nnbrs={}, msize={})".format(self.nnbrs, self.save_nbrs)
@@ -361,172 +399,3 @@ def _sim_blocks(
         values=c_values,
         size=(nitems, nitems),
     )
-
-
-def _predict_weighted_average(
-    model: torch.Tensor,
-    nrange: tuple[int, int],
-    rate_v: torch.Tensor,
-    rated: torch.Tensor,
-) -> torch.Tensor:
-    "Weighted average prediction function"
-    nitems, _ni = model.shape
-    assert nitems == _ni
-    min_nbrs, max_nbrs = nrange
-
-    # we proceed rating-by-rating, and accumulate results
-    scores = torch.zeros(nitems)
-    t_sims = torch.zeros(nitems)
-    counts = torch.zeros(nitems, dtype=torch.int32)
-    # these store the similarities and values for neighbors, so we can un-count
-    nbr_sims = torch.empty((nitems, max_nbrs))
-    nbr_vals = torch.empty((nitems, max_nbrs))
-    # and this stores the smallest similarity so far for each item
-    nbr_min = torch.full((nitems,), torch.finfo().max)
-
-    for i, iidx in enumerate(rated):
-        row = model[int(iidx)]
-        row_is = row.indices()[0]
-        row_vs = row.values()
-        assert row_is.shape == row_vs.shape
-
-        row_avs = torch.abs(row_vs)
-        fast = counts[row_is] < max_nbrs
-
-        # save the fast-path items
-        if torch.any(fast):
-            ris_fast = row_is[fast]
-            vs_fast = row_vs[fast]
-            avs_fast = row_avs[fast]
-            vals_fast = vs_fast * rate_v[i]
-            nbr_sims[ris_fast, counts[ris_fast]] = vs_fast
-            nbr_vals[ris_fast, counts[ris_fast]] = vals_fast
-            counts[ris_fast] += 1
-            t_sims[ris_fast] += avs_fast
-            scores[ris_fast] += vals_fast
-            nbr_min[ris_fast] = torch.minimum(nbr_min[ris_fast], vs_fast)
-
-        # skip early if we're done
-        if torch.all(fast):
-            continue
-
-        # now we have the slow-path items
-        slow = torch.logical_not(fast)
-        ris_slow = row_is[slow]
-        rvs_slow = row_vs[slow]
-        # which slow items might actually need an update?
-        exc = rvs_slow > nbr_min[ris_slow]
-        if not torch.any(exc):
-            continue
-
-        ris_slow = ris_slow[exc]
-        rvs_slow = rvs_slow[exc]
-
-        # this is brute-force linear search for simplicity right now
-        # for each, find the neighbor that's the smallest:
-        min_sims, mins = torch.min(nbr_sims[ris_slow], dim=1)
-        assert torch.all(min_sims < rvs_slow)
-
-        # now we need to update values: add in new and remove old
-        min_vals = nbr_vals[ris_slow, mins]
-        ravs_slow = row_avs[slow][exc]
-        slow_vals = rvs_slow * rate_v[i]
-        t_sims[ris_slow] += ravs_slow - min_sims.abs()
-        scores[ris_slow] += slow_vals - min_vals
-        # and save
-        nbr_sims[ris_slow, mins] = ravs_slow
-        nbr_vals[ris_slow, mins] = slow_vals
-        # and now we need to update the saved minimums
-        nm_sims, _nm_is = torch.min(nbr_sims[ris_slow], dim=1)
-        nbr_min[ris_slow] = nm_sims
-
-    # compute averages for items that pass match the threshold
-    mask = counts >= min_nbrs
-    scores[mask] /= t_sims[mask]
-    scores[torch.logical_not(mask)] = torch.nan
-
-    return scores
-
-
-def _predict_sum(
-    model: torch.Tensor,
-    nrange: tuple[int, int],
-    rate_v: torch.Tensor,
-    rated: torch.Tensor,
-) -> torch.Tensor:
-    "Sum-of-similarities prediction function"
-    nitems, _ni = model.shape
-    assert nitems == _ni
-    min_nbrs, max_nbrs = nrange
-    _msg(logging.DEBUG, f"sum-scoring with {len(rated)} items")
-
-    # we proceed rating-by-rating, and accumulate results
-    t_sims = torch.zeros(nitems)
-    counts = torch.zeros(nitems, dtype=torch.int32)
-    nbr_sims = torch.zeros((nitems, max_nbrs))
-    # and this stores the smallest similarity so far for each item
-    nbr_min = torch.full((nitems,), torch.finfo().max)
-
-    for i, iidx in enumerate(rated):
-        iidx = int(iidx)
-        row = model[iidx]
-        row_is = row.indices()[0]
-        row_vs = row.values()
-        assert row_is.shape == row_vs.shape
-
-        fast = counts[row_is] < max_nbrs
-
-        # save the fast-path items
-        if torch.any(fast):
-            ris_fast = row_is[fast]
-            vs_fast = row_vs[fast]
-            nbr_sims[ris_fast, counts[ris_fast]] = vs_fast
-            counts[ris_fast] += 1
-            t_sims[ris_fast] += vs_fast
-            nbr_min[ris_fast] = torch.minimum(nbr_min[ris_fast], vs_fast)
-
-        # skip early if we're done
-        if torch.all(fast):
-            continue
-
-        # now we have the slow-path items
-        slow = torch.logical_not(fast)
-        ris_slow = row_is[slow]
-        rvs_slow = row_vs[slow]
-        # which slow items might actually need an update?
-        exc = rvs_slow > nbr_min[ris_slow]
-        if not torch.any(exc):
-            continue
-
-        ris_slow = ris_slow[exc]
-        rvs_slow = rvs_slow[exc]
-
-        # this is brute-force linear search for simplicity right now
-        # for each, find the neighbor that's the smallest:
-        min_sims, mins = torch.min(nbr_sims[ris_slow], dim=1)
-
-        # now we need to update values: add in new and remove old
-        # anywhere our new neighbor is grater than smallest, replace smallest
-        t_sims[ris_slow] -= min_sims
-        t_sims[ris_slow] += rvs_slow
-        # and save
-        nbr_sims[ris_slow, mins] = rvs_slow
-        # save the minimums
-        nm_sims, _nm_is = torch.min(nbr_sims[ris_slow], dim=1)
-        nbr_min[ris_slow] = nm_sims
-
-    # compute averages for items that pass match the threshold
-    t_sims[counts < min_nbrs] = torch.nan
-
-    return t_sims
-
-
-AggFun: TypeAlias = Callable[
-    [
-        torch.Tensor,
-        tuple[int, int],
-        torch.Tensor,
-        torch.Tensor,
-    ],
-    torch.Tensor,
-]
diff --git a/lenskit/lenskit/util/torch.py b/lenskit/lenskit/util/torch.py
new file mode 100644
index 000000000..88f5bf456
--- /dev/null
+++ b/lenskit/lenskit/util/torch.py
@@ -0,0 +1,20 @@
+"""
+PyTorch utility functions.
+"""
+
+import functools
+
+import torch
+
+
+def inference_mode(func):
+    """
+    Function decorator that puts PyTorch in inference mode.
+    """
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        with torch.inference_mode():
+            return func(*args, **kwargs)
+
+    return wrapper
diff --git a/lenskit/tests/models/test_knn_item_item.py b/lenskit/tests/models/test_knn_item_item.py
index 3ef5a3d34..cb0957cca 100644
--- a/lenskit/tests/models/test_knn_item_item.py
+++ b/lenskit/tests/models/test_knn_item_item.py
@@ -11,6 +11,7 @@
 import numpy as np
 import pandas as pd
 import torch
+from numpy.typing import NDArray
 from scipy import linalg as la
 
 import pytest
@@ -18,15 +19,18 @@
 
 from lenskit import batch
 from lenskit.basic import BiasScorer
+from lenskit.basic.history import UserTrainingHistoryLookup
 from lenskit.batch import BatchPipelineRunner
 from lenskit.data import ItemList, ItemListCollection, UserIDKey, Vocabulary, from_interactions_df
 from lenskit.data.bulk import dict_to_df, iter_item_lists
 from lenskit.diagnostics import ConfigWarning, DataWarning
 from lenskit.knn.item import ItemKNNScorer
 from lenskit.metrics import MAE, RBP, RMSE, RecipRank, RunAnalysis, call_metric, quick_measure_model
+from lenskit.operations import score
 from lenskit.pipeline import RecPipelineBuilder, topn_pipeline
 from lenskit.splitting import SampleFrac, crossfold_users
 from lenskit.testing import BasicComponentTests, ScorerTests, wantjit
+from lenskit.util.torch import inference_mode
 
 _log = logging.getLogger(__name__)
 
@@ -79,13 +83,13 @@ def test_ii_train():
     algo = ItemKNNScorer(30, save_nbrs=500)
     algo.train(simple_ds)
 
-    assert isinstance(algo.item_means_, torch.Tensor)
-    assert isinstance(algo.item_counts_, torch.Tensor)
+    assert isinstance(algo.item_means_, np.ndarray)
+    assert isinstance(algo.item_counts_, np.ndarray)
     matrix = algo.sim_matrix_
 
     test_means = simple_ratings.groupby("item")["rating"].mean()
     test_means = test_means.reindex(algo.items_.ids())
-    assert np.all(algo.item_means_.numpy() == test_means.values.astype("f8"))
+    assert np.all(algo.item_means_ == test_means.values.astype("f8"))
 
     # 6 is a neighbor of 7
     six, seven = algo.items_.numbers([6, 7])
@@ -103,20 +107,20 @@ def test_ii_train():
     num = six_v.dot(seven_v)
     assert matrix[six, seven] == approx(num / denom, 0.01)  # type: ignore
 
-    assert all(np.logical_not(np.isnan(algo.sim_matrix_.values().numpy())))
-    assert all(algo.sim_matrix_.values() > 0)
+    assert all(np.logical_not(np.isnan(algo.sim_matrix_.data)))
+    assert all(algo.sim_matrix_.data > 0)
     # a little tolerance
-    assert all(algo.sim_matrix_.values() < 1 + 1.0e-6)
+    assert all(algo.sim_matrix_.data < 1 + 1.0e-6)
 
 
 def test_ii_train_unbounded():
     algo = ItemKNNScorer(30)
     algo.train(simple_ds)
 
-    assert all(np.logical_not(np.isnan(algo.sim_matrix_.values())))
-    assert all(algo.sim_matrix_.values() > 0)
+    assert all(np.logical_not(np.isnan(algo.sim_matrix_.data)))
+    assert all(algo.sim_matrix_.data > 0)
     # a little tolerance
-    assert all(algo.sim_matrix_.values() < 1 + 1.0e-6)
+    assert all(algo.sim_matrix_.data < 1 + 1.0e-6)
 
     # 6 is a neighbor of 7
     matrix = algo.sim_matrix_
@@ -135,10 +139,13 @@ def test_ii_train_unbounded():
 
 
 def test_ii_simple_predict():
+    history = UserTrainingHistoryLookup()
+    history.train(simple_ds)
     algo = ItemKNNScorer(30, save_nbrs=500)
     algo.train(simple_ds)
 
-    res = algo(3, ItemList([6]))
+    q = history(3)
+    res = algo(q, ItemList([6]))
     _log.info("got predictions: %s", res)
     assert res is not None
     assert len(res) == 1
@@ -147,10 +154,13 @@ def test_ii_simple_predict():
 
 
 def test_ii_simple_implicit_predict():
+    history = UserTrainingHistoryLookup()
+    history.train(simple_ds)
     algo = ItemKNNScorer(30, feedback="implicit")
     algo.train(from_interactions_df(simple_ratings.loc[:, ["user", "item"]]))
 
-    res = algo(3, ItemList([6]))
+    q = history(3)
+    res = algo(q, ItemList([6]))
     assert res is not None
     assert len(res) == 1
     assert 6 in res.ids()
@@ -159,10 +169,13 @@ def test_ii_simple_implicit_predict():
 
 
 def test_ii_simple_predict_unknown():
+    history = UserTrainingHistoryLookup()
+    history.train(simple_ds)
     algo = ItemKNNScorer(30, save_nbrs=500)
     algo.train(simple_ds)
 
-    res = algo(3, ItemList([6, 100]))
+    q = history(3)
+    res = algo(q, ItemList([6, 100]))
     _log.info("got predictions: %s", res)
     assert res is not None
     assert len(res) == 2
@@ -181,6 +194,7 @@ def test_ii_warns_center():
 
 @wantjit
 @mark.slow
+@inference_mode
 def test_ii_train_ml100k(tmp_path, ml_100k):
     "Test an unbounded model on ML-100K"
     algo = ItemKNNScorer(30)
@@ -189,13 +203,13 @@ def test_ii_train_ml100k(tmp_path, ml_100k):
 
     _log.info("testing model")
 
-    assert all(np.logical_not(np.isnan(algo.sim_matrix_.values())))
-    assert all(algo.sim_matrix_.values() > 0)
+    assert all(np.logical_not(np.isnan(algo.sim_matrix_.data)))
+    assert all(algo.sim_matrix_.data > 0)
 
     # a little tolerance
-    assert np.max(algo.sim_matrix_.values().numpy()) <= 1
+    assert np.max(algo.sim_matrix_.data) <= 1
 
-    assert algo.item_counts_.sum() == len(algo.sim_matrix_.values())
+    assert algo.item_counts_.sum() == len(algo.sim_matrix_.data)
 
     means = ml_100k.groupby("item_id").rating.mean()
     assert means[algo.items_.ids()].values == approx(algo.item_means_)
@@ -210,16 +224,17 @@ def test_ii_train_ml100k(tmp_path, ml_100k):
     with fn.open("rb") as modf:
         restored = pickle.load(modf)
 
-    assert all(restored.sim_matrix_.values() > 0)
+    assert all(restored.sim_matrix_.data > 0)
 
     r_mat = restored.sim_matrix_
     o_mat = algo.sim_matrix_
 
-    assert all(r_mat.values() == o_mat.values())
+    assert all(r_mat.data == o_mat.data)
 
 
 @wantjit
 @mark.slow
+@inference_mode
 def test_ii_large_models(rng, ml_ratings, ml_ds):
     "Several tests of large trained I-I models"
     _log.info("training limited model")
@@ -232,17 +247,17 @@ def test_ii_large_models(rng, ml_ratings, ml_ds):
     algo_ub.train(ml_ds)
 
     _log.info("testing models")
-    assert all(np.logical_not(np.isnan(algo_lim.sim_matrix_.values())))
-    assert algo_lim.sim_matrix_.values().min() > 0
+    assert all(np.logical_not(np.isnan(algo_lim.sim_matrix_.data)))
+    assert algo_lim.sim_matrix_.data.min() > 0
     # a little tolerance
-    assert algo_lim.sim_matrix_.values().max() <= 1
+    assert algo_lim.sim_matrix_.data.max() <= 1
 
     means = ml_ratings.groupby("item_id").rating.mean()
     assert means[algo_lim.items_.ids()].values == approx(algo_lim.item_means_)
 
-    assert all(np.logical_not(np.isnan(algo_ub.sim_matrix_.values())))
-    assert algo_ub.sim_matrix_.values().min() > 0
-    assert algo_ub.sim_matrix_.values().max() <= 1
+    assert all(np.logical_not(np.isnan(algo_ub.sim_matrix_.data)))
+    assert algo_ub.sim_matrix_.data.min() > 0
+    assert algo_ub.sim_matrix_.data.max() <= 1
 
     means = ml_ratings.groupby("item_id").rating.mean()
     assert means[algo_ub.items_.ids()].values == approx(algo_ub.item_means_)
@@ -258,10 +273,10 @@ def test_ii_large_models(rng, ml_ratings, ml_ds):
 
     _log.info("make sure the similarity matrix is sorted")
     for i in range(algo_lim.items_.size):
-        sp = algo_lim.sim_matrix_.crow_indices()[i]
-        ep = algo_lim.sim_matrix_.crow_indices()[i + 1]
-        cols = algo_lim.sim_matrix_.col_indices()[sp:ep]
-        diffs = np.diff(cols.numpy())
+        sp = algo_lim.sim_matrix_.indptr[i]
+        ep = algo_lim.sim_matrix_.indptr[i + 1]
+        cols = algo_lim.sim_matrix_.indices[sp:ep]
+        diffs = np.diff(cols)
         if np.any(diffs <= 0):
             _log.error("row %d: %d non-sorted indices", i, np.sum(diffs <= 0))
             (bad,) = np.nonzero(diffs <= 0)
@@ -271,18 +286,18 @@ def test_ii_large_models(rng, ml_ratings, ml_ds):
 
     _log.info("checking a sample of neighborhoods")
     items = algo_ub.items_.ids()
-    items = items[algo_ub.item_counts_.numpy() > 0]
+    items = items[algo_ub.item_counts_ > 0]
     for i in rng.choice(items, 50):
         ipos = algo_ub.items_.number(i)
         _log.debug("checking item %d at position %d", i, ipos)
         assert ipos == algo_lim.items_.number(i)
         irates = mc_rates.loc[[i], :].set_index("user_id").rating
 
-        ub_row = mat_ub[ipos]
-        b_row = mat_lim[ipos]
-        assert len(b_row.values()) <= MODEL_SIZE
-        ub_cols = ub_row.indices()[0].numpy()
-        b_cols = b_row.indices()[0].numpy()
+        ub_row = mat_ub[[ipos]]
+        b_row = mat_lim[[ipos]]
+        assert len(b_row.data) <= MODEL_SIZE
+        ub_cols = ub_row.indices
+        b_cols = b_row.indices
         _log.debug("kept %d of %d neighbors", len(b_cols), len(ub_cols))
 
         _log.debug("checking for sorted indices")
@@ -294,7 +309,7 @@ def test_ii_large_models(rng, ml_ratings, ml_ds):
         present = np.isin(b_cols, ub_cols)
         if not np.all(present):
             _log.error("missing items: %s", b_cols[~present])
-            _log.error("scores: %s", b_row.values()[~present])  # type: ignore
+            _log.error("scores: %s", b_row.data[~present])  # type: ignore
             raise AssertionError(f"missing {np.sum(~present)} values from unbounded")
 
         # spot-check some similarities
@@ -304,18 +319,18 @@ def test_ii_large_models(rng, ml_ratings, ml_ds):
             n_rates = mc_rates.loc[n_id, :].set_index("user_id").rating
             ir, nr = irates.align(n_rates, fill_value=0)
             cor = ir.corr(nr)
-            assert mat_ub[ipos, n].item() == approx(cor, abs=1.0e-6)
+            assert mat_ub[ipos, n] == approx(cor, abs=1.0e-6)
 
         # short rows are equal
         if len(b_cols) < MODEL_SIZE:
             _log.debug("short row of length %d", len(b_cols))
             assert len(b_row) == len(ub_row)
-            assert b_row.values().numpy() == approx(ub_row.values().numpy())
+            assert b_row.data == approx(ub_row.data)
             continue
 
         # row is truncated - check that truncation is correct
-        ub_nbrs = pd.Series(ub_row.values().numpy(), algo_ub.items_.ids(ub_cols))
-        b_nbrs = pd.Series(b_row.values().numpy(), algo_lim.items_.ids(b_cols))
+        ub_nbrs = pd.Series(ub_row.data, algo_ub.items_.ids(ub_cols))
+        b_nbrs = pd.Series(b_row.data, algo_lim.items_.ids(b_cols))
 
         assert len(ub_nbrs) >= len(b_nbrs)
         assert len(b_nbrs) <= MODEL_SIZE
@@ -341,6 +356,7 @@ def test_ii_large_models(rng, ml_ratings, ml_ds):
 
 @wantjit
 @mark.slow
+@inference_mode
 def test_ii_implicit_large(rng, ml_ratings):
     "Test that implicit-feedback mode works on full test data."
     _log.info("training model")
@@ -354,7 +370,7 @@ def test_ii_implicit_large(rng, ml_ratings):
     users = rng.choice(ml_ratings["user_id"].unique(), NUSERS)
 
     items: Vocabulary = algo.items_
-    mat: torch.Tensor = algo.sim_matrix_.to_dense()
+    mat: NDArray[np.float32] = algo.sim_matrix_.toarray()
 
     for user in users:
         recs = pipe.run("recommender", query=user, n=NRECS)
@@ -363,10 +379,10 @@ def test_ii_implicit_large(rng, ml_ratings):
         assert len(recs) == NRECS
         urates = ml_ratings[ml_ratings["user_id"] == user]
 
-        smat = mat[torch.from_numpy(items.numbers(urates["item_id"].values)), :]
+        smat = mat[items.numbers(urates["item_id"].values), :]
         for row in recs.to_df().itertuples():
             col = smat[:, items.number(row.item_id)]
-            top, _is = torch.topk(col, NBRS)
+            top, _is = torch.topk(torch.from_numpy(col), NBRS)
             score = top.sum()
             try:
                 assert row.score == approx(score)
@@ -381,6 +397,7 @@ def test_ii_implicit_large(rng, ml_ratings):
 
 
 @wantjit
+@inference_mode
 def test_ii_save_load(tmp_path, ml_ratings, ml_subset):
     "Save and load a model"
     original = ItemKNNScorer(30, save_nbrs=500)
@@ -398,20 +415,20 @@ def test_ii_save_load(tmp_path, ml_ratings, ml_subset):
         algo = pickle.load(modf)
 
     _log.info("checking model")
-    assert all(np.logical_not(np.isnan(algo.sim_matrix_.values())))
-    assert all(algo.sim_matrix_.values() > 0)
+    assert all(np.logical_not(np.isnan(algo.sim_matrix_.data)))
+    assert all(algo.sim_matrix_.data > 0)
     # a little tolerance
-    assert all(algo.sim_matrix_.values() < 1 + 1.0e-6)
+    assert all(algo.sim_matrix_.data < 1 + 1.0e-6)
 
     assert all(algo.item_counts_ == original.item_counts_)
-    assert algo.item_counts_.sum() == len(algo.sim_matrix_.values())
-    assert len(algo.sim_matrix_.values()) == len(algo.sim_matrix_.values())
-    assert all(algo.sim_matrix_.crow_indices() == original.sim_matrix_.crow_indices())
-    assert algo.sim_matrix_.values() == approx(original.sim_matrix_.values())
+    assert algo.item_counts_.sum() == len(algo.sim_matrix_.data)
+    assert len(algo.sim_matrix_.data) == len(algo.sim_matrix_.data)
+    assert all(algo.sim_matrix_.indptr == original.sim_matrix_.indptr)
+    assert algo.sim_matrix_.data == approx(original.sim_matrix_.data)
 
     r_mat = algo.sim_matrix_
     o_mat = original.sim_matrix_
-    assert all(r_mat.crow_indices() == o_mat.crow_indices())
+    assert all(r_mat.indptr == o_mat.indptr)
 
     means = ml_ratings.groupby("item_id").rating.mean()
     assert means[algo.items_.ids()].values == approx(original.item_means_)
@@ -441,8 +458,9 @@ def test_ii_known_preds(ml_ds):
     from lenskit import batch
 
     iknn = ItemKNNScorer(20, min_sim=1.0e-6)
+    pipe = topn_pipeline(iknn)
     _log.info("training %s on ml data", iknn)
-    iknn.train(ml_ds)
+    pipe.train(ml_ds)
     _log.info("model means: %s", iknn.item_means_)
 
     dir = Path(__file__).parent
@@ -451,7 +469,7 @@ def test_ii_known_preds(ml_ds):
     known_preds = pd.read_csv(str(pred_file))
 
     preds = {
-        user: iknn(user, ItemList(kps, prediction=False))
+        user: score(pipe, query=user, items=ItemList(kps, prediction=False))
         for (user, kps) in iter_item_lists(known_preds)
     }
     preds = dict_to_df(preds)