From ceaf9b31570576f21794af1c119f2a92b5f5bba4 Mon Sep 17 00:00:00 2001
From: jteijema <j.j.teijema@uu.nl>
Date: Thu, 31 Oct 2024 10:05:26 +0100
Subject: [PATCH] Simplify denominator

---
 asreviewcontrib/insights/algorithms.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/asreviewcontrib/insights/algorithms.py b/asreviewcontrib/insights/algorithms.py
index 2535ce7..9a3256f 100644
--- a/asreviewcontrib/insights/algorithms.py
+++ b/asreviewcontrib/insights/algorithms.py
@@ -31,18 +31,17 @@ def _loss_value(labels):
     # of +1 as a result of the stepwise curve.
     best_auc = Nx * Ny - ((Ny * (Ny - 1)) / 2)
 
-    # Compute recall values (y) based on the provided labels.
     # The actual AUC is the sum of the recall curve.
     actual_auc = np.cumsum(labels).sum()
 
     # The worst AUC represents the area under the worst-case step curve, which
     # is the area under the recall curve where all positive labels are clumped
-    # at the end.
-    worst_auc = (Ny * (Ny + 1)) / 2
+    # at the end. (Ny * (Ny + 1)) / 2. This is simplified together with the best
+    # auc in the normalized loss.
 
     # The normalized loss is the difference between the best AUC and the actual
     # AUC, normalized by the range between the best and worst AUCs.
-    normalized_loss = (best_auc - actual_auc) / (best_auc - worst_auc)
+    normalized_loss = (best_auc - actual_auc) / (Ny * (Nx - Ny))
 
     return normalized_loss