Add the DCG metric

lenskit · Jan 18, 2025 · bec38b9 · bec38b9
1 parent 33c5ec1
commit bec38b9
Show file tree

Hide file tree

Showing 3 changed files with 188 additions and 1 deletion.
diff --git a/lenskit/lenskit/metrics/ranking/__init__.py b/lenskit/lenskit/metrics/ranking/__init__.py
@@ -3,7 +3,7 @@
 """
 
 from ._base import RankingMetricBase
-from ._dcg import NDCG
+from ._dcg import DCG, NDCG
 from ._hit import Hit
 from ._pr import Precision, Recall
 from ._rbp import RBP
@@ -16,5 +16,6 @@
     "Recall",
     "RecipRank",
     "NDCG",
+    "DCG",
     "RBP",
 ]
diff --git a/lenskit/lenskit/metrics/ranking/_dcg.py b/lenskit/lenskit/metrics/ranking/_dcg.py
@@ -91,6 +91,73 @@ def measure_list(self, recs: ItemList, test: ItemList) -> float:
         return realized / ideal
 
 
+class DCG(ListMetric, RankingMetricBase):
+    """
+    Compute the _unnormalized_ discounted cumulative gain :cite:p:`ndcg`.
+
+    Discounted cumultative gain is computed as:
+
+    .. math::
+        \\begin{align*}
+        \\mathrm{DCG}(L,u) & = \\sum_{i=1}^{|L|} \\frac{r_{ui}}{d(i)}
+        \\end{align*}
+
+    Unrated items are assumed to have a utility of 0; if no rating values are
+    provided in the truth frame, item ratings are assumed to be 1.
+
+    This metric does *not* normalize by ideal DCG. For that, use :class:`NDCG`.
+
+    Args:
+        k:
+            The maximum recommendation list length to consider (longer lists are
+            truncated).
+        discount:
+            The discount function to use.  The default, base-2 logarithm, is the
+            original function used by :cite:t:`ndcg`.
+        gain:
+            The field on the test data to use for gain values.  If ``None`` (the
+            default), all items present in the test data have a gain of 1.  If set
+            to a string, it is the name of a field (e.g. ``'rating'``).  In all
+            cases, items not present in the truth data have a gain of 0.
+
+    Stability:
+        Caller
+    """
+
+    discount: Discount
+    gain: str | None
+
+    def __init__(
+        self, k: int | None = None, *, discount: Discount = np.log2, gain: str | None = None
+    ):
+        super().__init__(k=k)
+        self.discount = discount
+        self.gain = gain
+
+    @property
+    def label(self):
+        if self.k is not None:
+            return f"DCG@{self.k}"
+        else:
+            return "DCG"
+
+    @override
+    def measure_list(self, recs: ItemList, test: ItemList) -> float:
+        recs = self.truncate(recs)
+        items = recs.ids()
+
+        if self.gain:
+            gains = test.field(self.gain, "pandas", index="ids")
+            if gains is None:
+                raise KeyError(f"test items have no field {self.gain}")
+            scores = gains.reindex(items, fill_value=0).values
+        else:
+            scores = np.zeros_like(items, dtype=np.float32)
+            scores[np.isin(items, test.ids())] = 1.0
+
+        return array_dcg(np.require(scores, np.float32), self.discount)
+
+
 def array_dcg(scores: NDArray[np.number], discount: Discount = np.log2):
     """
     Compute the Discounted Cumulative Gain of a series of recommended items with rating scores.

diff --git a/lenskit/tests/eval/test_rank_dcg.py b/lenskit/tests/eval/test_rank_dcg.py
@@ -0,0 +1,119 @@
+# This file is part of LensKit.
+# Copyright (C) 2018-2023 Boise State University
+# Copyright (C) 2023-2024 Drexel University
+# Licensed under the MIT license, see LICENSE.md for details.
+# SPDX-License-Identifier: MIT
+
+import numpy as np
+import pandas as pd
+
+from pytest import approx, mark
+
+from lenskit.data import ItemList
+from lenskit.metrics import call_metric
+from lenskit.metrics.ranking import DCG
+from lenskit.metrics.ranking._dcg import array_dcg, fixed_dcg
+
+
+def test_dcg_empty():
+    "empty should be zero"
+    assert array_dcg(np.array([])) == approx(0)
+
+
+def test_dcg_zeros():
+    assert array_dcg(np.zeros(10)) == approx(0)
+
+
+def test_dcg_single():
+    "a single element should be scored at the right place"
+    assert array_dcg(np.array([0.5])) == approx(0.5)
+    assert array_dcg(np.array([0, 0.5])) == approx(0.5)
+    assert array_dcg(np.array([0, 0, 0.5])) == approx(0.5 / np.log2(3))
+    assert array_dcg(np.array([0, 0, 0.5, 0])) == approx(0.5 / np.log2(3))
+
+
+def test_dcg_mult():
+    "multiple elements should score correctly"
+    assert array_dcg(np.array([np.e, np.pi])) == approx(np.e + np.pi)
+    assert array_dcg(np.array([np.e, 0, 0, np.pi])) == approx(np.e + np.pi / np.log2(4))
+
+
+def test_dcg_empty2():
+    "empty should be zero"
+    assert array_dcg(np.array([])) == approx(0)
+
+
+def test_dcg_zeros2():
+    assert array_dcg(np.zeros(10)) == approx(0)
+
+
+def test_dcg_single2():
+    "a single element should be scored at the right place"
+    assert array_dcg(np.array([0.5])) == approx(0.5)
+    assert array_dcg(np.array([0, 0.5])) == approx(0.5)
+    assert array_dcg(np.array([0, 0, 0.5])) == approx(0.5 / np.log2(3))
+    assert array_dcg(np.array([0, 0, 0.5, 0])) == approx(0.5 / np.log2(3))
+
+
+def test_dcg_nan():
+    "NANs should be 0"
+    assert array_dcg(np.array([np.nan, 0.5])) == approx(0.5)
+
+
+def test_dcg_mult2():
+    "multiple elements should score correctly"
+    assert array_dcg(np.array([np.e, np.pi])) == approx(np.e + np.pi)
+    assert array_dcg(np.array([np.e, 0, 0, np.pi])) == approx((np.e + np.pi / np.log2(4)))
+
+
+def test_ndcg_empty():
+    recs = ItemList(ordered=True)
+    truth = ItemList([1, 2, 3], rating=[3.0, 5.0, 4.0])
+    assert call_metric(DCG, recs, truth) == approx(0.0)
+
+
+def test_ndcg_no_match():
+    recs = ItemList([4], ordered=True)
+    truth = ItemList([1, 2, 3], rating=[3.0, 5.0, 4.0])
+    assert call_metric(DCG, recs, truth) == approx(0.0)
+
+
+def test_ndcg_perfect():
+    recs = ItemList([2, 3, 1], ordered=True)
+    truth = ItemList([1, 2, 3], rating=[3.0, 5.0, 4.0])
+    assert call_metric(DCG, recs, truth) == approx(np.sum(np.reciprocal(np.log2([2, 2, 3]))))
+
+
+def test_ndcg_perfect_k_short():
+    recs = ItemList([2, 3, 1], ordered=True)
+    truth = ItemList([1, 2, 3], rating=[3.0, 5.0, 4.0])
+    assert call_metric(DCG, recs, truth, k=2) == approx(2.0)
+    assert call_metric(DCG, recs[:2], truth, k=2) == approx(2.0)
+
+
+def test_ndcg_shorter_not_best():
+    recs = ItemList([1, 2], ordered=True)
+    truth = ItemList([1, 2, 3], rating=[3.0, 5.0, 4.0])
+    assert call_metric(DCG, recs, truth) == approx(fixed_dcg(2))
+    assert call_metric(DCG, recs, truth, k=2) == approx(2.0)
+    assert call_metric(DCG, recs, truth, gain="rating") == approx(array_dcg(np.array([3.0, 5.0])))
+
+
+def test_ndcg_perfect_k():
+    recs = ItemList([2, 3], ordered=True)
+    truth = ItemList([1, 2, 3], rating=[3.0, 5.0, 4.0])
+    assert call_metric(DCG, recs, truth, k=2) == approx(2.0)
+
+
+def test_ndcg_perfect_k_norate():
+    recs = ItemList([1, 3], ordered=True)
+    truth = ItemList([1, 2, 3], rating=[3.0, 5.0, 4.0])
+    assert call_metric(DCG, recs, truth, k=2) == approx(2.0)
+
+
+def test_ndcg_almost_perfect_k_gain():
+    recs = ItemList([1, 3], ordered=True)
+    truth = ItemList([1, 2, 3], rating=[3.0, 5.0, 4.0])
+    assert call_metric(DCG, recs, truth, k=2, gain="rating") == approx(
+        array_dcg(np.array([3.0, 4.0]))
+    )