Skip to content

Commit

Permalink
Add the DCG metric
Browse files Browse the repository at this point in the history
  • Loading branch information
mdekstrand committed Jan 18, 2025
1 parent 33c5ec1 commit bec38b9
Show file tree
Hide file tree
Showing 3 changed files with 188 additions and 1 deletion.
3 changes: 2 additions & 1 deletion lenskit/lenskit/metrics/ranking/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""

from ._base import RankingMetricBase
from ._dcg import NDCG
from ._dcg import DCG, NDCG
from ._hit import Hit
from ._pr import Precision, Recall
from ._rbp import RBP
Expand All @@ -16,5 +16,6 @@
"Recall",
"RecipRank",
"NDCG",
"DCG",
"RBP",
]
67 changes: 67 additions & 0 deletions lenskit/lenskit/metrics/ranking/_dcg.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,73 @@ def measure_list(self, recs: ItemList, test: ItemList) -> float:
return realized / ideal


class DCG(ListMetric, RankingMetricBase):
"""
Compute the _unnormalized_ discounted cumulative gain :cite:p:`ndcg`.
Discounted cumultative gain is computed as:
.. math::
\\begin{align*}
\\mathrm{DCG}(L,u) & = \\sum_{i=1}^{|L|} \\frac{r_{ui}}{d(i)}
\\end{align*}
Unrated items are assumed to have a utility of 0; if no rating values are
provided in the truth frame, item ratings are assumed to be 1.
This metric does *not* normalize by ideal DCG. For that, use :class:`NDCG`.
Args:
k:
The maximum recommendation list length to consider (longer lists are
truncated).
discount:
The discount function to use. The default, base-2 logarithm, is the
original function used by :cite:t:`ndcg`.
gain:
The field on the test data to use for gain values. If ``None`` (the
default), all items present in the test data have a gain of 1. If set
to a string, it is the name of a field (e.g. ``'rating'``). In all
cases, items not present in the truth data have a gain of 0.
Stability:
Caller
"""

discount: Discount
gain: str | None

def __init__(
self, k: int | None = None, *, discount: Discount = np.log2, gain: str | None = None
):
super().__init__(k=k)
self.discount = discount
self.gain = gain

@property
def label(self):
if self.k is not None:
return f"DCG@{self.k}"
else:
return "DCG"

@override
def measure_list(self, recs: ItemList, test: ItemList) -> float:
recs = self.truncate(recs)
items = recs.ids()

if self.gain:
gains = test.field(self.gain, "pandas", index="ids")
if gains is None:
raise KeyError(f"test items have no field {self.gain}")
scores = gains.reindex(items, fill_value=0).values
else:
scores = np.zeros_like(items, dtype=np.float32)
scores[np.isin(items, test.ids())] = 1.0

return array_dcg(np.require(scores, np.float32), self.discount)


def array_dcg(scores: NDArray[np.number], discount: Discount = np.log2):
"""
Compute the Discounted Cumulative Gain of a series of recommended items with rating scores.
Expand Down
119 changes: 119 additions & 0 deletions lenskit/tests/eval/test_rank_dcg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# This file is part of LensKit.
# Copyright (C) 2018-2023 Boise State University
# Copyright (C) 2023-2024 Drexel University
# Licensed under the MIT license, see LICENSE.md for details.
# SPDX-License-Identifier: MIT

import numpy as np
import pandas as pd

from pytest import approx, mark

from lenskit.data import ItemList
from lenskit.metrics import call_metric
from lenskit.metrics.ranking import DCG
from lenskit.metrics.ranking._dcg import array_dcg, fixed_dcg


def test_dcg_empty():
"empty should be zero"
assert array_dcg(np.array([])) == approx(0)


def test_dcg_zeros():
assert array_dcg(np.zeros(10)) == approx(0)


def test_dcg_single():
"a single element should be scored at the right place"
assert array_dcg(np.array([0.5])) == approx(0.5)
assert array_dcg(np.array([0, 0.5])) == approx(0.5)
assert array_dcg(np.array([0, 0, 0.5])) == approx(0.5 / np.log2(3))
assert array_dcg(np.array([0, 0, 0.5, 0])) == approx(0.5 / np.log2(3))


def test_dcg_mult():
"multiple elements should score correctly"
assert array_dcg(np.array([np.e, np.pi])) == approx(np.e + np.pi)
assert array_dcg(np.array([np.e, 0, 0, np.pi])) == approx(np.e + np.pi / np.log2(4))


def test_dcg_empty2():
"empty should be zero"
assert array_dcg(np.array([])) == approx(0)


def test_dcg_zeros2():
assert array_dcg(np.zeros(10)) == approx(0)


def test_dcg_single2():
"a single element should be scored at the right place"
assert array_dcg(np.array([0.5])) == approx(0.5)
assert array_dcg(np.array([0, 0.5])) == approx(0.5)
assert array_dcg(np.array([0, 0, 0.5])) == approx(0.5 / np.log2(3))
assert array_dcg(np.array([0, 0, 0.5, 0])) == approx(0.5 / np.log2(3))


def test_dcg_nan():
"NANs should be 0"
assert array_dcg(np.array([np.nan, 0.5])) == approx(0.5)


def test_dcg_mult2():
"multiple elements should score correctly"
assert array_dcg(np.array([np.e, np.pi])) == approx(np.e + np.pi)
assert array_dcg(np.array([np.e, 0, 0, np.pi])) == approx((np.e + np.pi / np.log2(4)))


def test_ndcg_empty():
recs = ItemList(ordered=True)
truth = ItemList([1, 2, 3], rating=[3.0, 5.0, 4.0])
assert call_metric(DCG, recs, truth) == approx(0.0)


def test_ndcg_no_match():
recs = ItemList([4], ordered=True)
truth = ItemList([1, 2, 3], rating=[3.0, 5.0, 4.0])
assert call_metric(DCG, recs, truth) == approx(0.0)


def test_ndcg_perfect():
recs = ItemList([2, 3, 1], ordered=True)
truth = ItemList([1, 2, 3], rating=[3.0, 5.0, 4.0])
assert call_metric(DCG, recs, truth) == approx(np.sum(np.reciprocal(np.log2([2, 2, 3]))))


def test_ndcg_perfect_k_short():
recs = ItemList([2, 3, 1], ordered=True)
truth = ItemList([1, 2, 3], rating=[3.0, 5.0, 4.0])
assert call_metric(DCG, recs, truth, k=2) == approx(2.0)
assert call_metric(DCG, recs[:2], truth, k=2) == approx(2.0)


def test_ndcg_shorter_not_best():
recs = ItemList([1, 2], ordered=True)
truth = ItemList([1, 2, 3], rating=[3.0, 5.0, 4.0])
assert call_metric(DCG, recs, truth) == approx(fixed_dcg(2))
assert call_metric(DCG, recs, truth, k=2) == approx(2.0)
assert call_metric(DCG, recs, truth, gain="rating") == approx(array_dcg(np.array([3.0, 5.0])))


def test_ndcg_perfect_k():
recs = ItemList([2, 3], ordered=True)
truth = ItemList([1, 2, 3], rating=[3.0, 5.0, 4.0])
assert call_metric(DCG, recs, truth, k=2) == approx(2.0)


def test_ndcg_perfect_k_norate():
recs = ItemList([1, 3], ordered=True)
truth = ItemList([1, 2, 3], rating=[3.0, 5.0, 4.0])
assert call_metric(DCG, recs, truth, k=2) == approx(2.0)


def test_ndcg_almost_perfect_k_gain():
recs = ItemList([1, 3], ordered=True)
truth = ItemList([1, 2, 3], rating=[3.0, 5.0, 4.0])
assert call_metric(DCG, recs, truth, k=2, gain="rating") == approx(
array_dcg(np.array([3.0, 4.0]))
)

0 comments on commit bec38b9

Please sign in to comment.