Merge branch rlos2021_cfe to master (#36)

* Initial file structure changes for Package (#16) * Interface Approach2 (#29) * Split ips and snips. Make two classes: Estimator and Interval * Split pseudo_inverse into 2 classes: Estimator and Interval * Fix test_pi, remove redundant file ips_snips, Remove type argument from get_estimate * Slates interface implementation * Cb interface initial commit * Rename file and change class names * Edit doc strings * Change count datatype to float in cb_base * Added gaussian, clopper_pearson files and removed type from cb interface * Add newline at the end of file * Changes for slates - Renamed file from slates_helper to slates_base - Added gaussian.py - Removed type from get_interval - Removed type from get_estimate - Change doc strings for the slates interface - Changed class names - Changed data type of count - Fixed data type of p_log and p_pred - Removed unused imports * Remove redundant imports and code * Change method name to get() * Rename file to base and change class name of ips, snips * Change doc strings and variable name: slates * Changes for test_pi * Cressieread Interval update * Changes folder name and class names (#31) * Minimal changes tobasic-usage (#32) * Improvements for setup.py and slates (#33) * imports fix (#34) * Adding Tests (#35) * Unit tests added * Test for multiple examples * Added test for checking narrowing intervals * Combine all unit test functions into one * Added comments * Added another example generator * Fixed Imports * Change variable names and fix typo * Added check for correct format of Confidence Interval * Separate bandit and slates tests * Move functions to utils * Added test for correctness(slates) * Comments added for test_bandits * Added tests for slates intervals * Move data generators from helper files to test_* files * Remove num_slots as a parameter in util functions * Combine run_estimator function * Combine SlatesHelper and BanditsHelper * Move assert statements from run_estimator() to test_*.py * Move assert statements from Helper() functions to test_*.py file * Improving code consistency * Defined static methods and renamed file to utils.py * Add function assert_is_close to utils * Variable name changed * Restructuring of code * CI improvements (#38) * Added support for Python version 3.9 * CI: Check test coverage * Added interface and module for ccb (#37) * Added ccb estimator (#39) * Added ccb estimator file * Removed type and added Interval() * Added unit test for ccb + code corrections in ccb.py * Test for correctness and narrowing Intervals added * Changed module name * Change variable name * Removed hard coding for specific alpha values in gaussain files (#44) * Add tests (#43) * use random.seed() to make test scenarios reproducible * Change function names * Rename variables * Rename variables listofestimators->estimators and listofintervals->intervals * Renamed variables for test_narrowing_intervals * Added test to check alpha value is not hardcoded for bandits * Renamed to test_different_alpha_CI * Rlos2021 minor cleanup (#45) * minor cleanups * py35 removal * more type hints * snake case * ValueError * snake case Co-authored-by: Alexey Taymanov <[email protected]> Co-authored-by: Alexey Taymanov <[email protected]>
VowpalWabbit · Feb 14, 2022 · bb971f6 · bb971f6
1 parent 993c080
commit bb971f6
Show file tree

Hide file tree

Showing 31 changed files with 958 additions and 279 deletions.
diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.5, 3.6, 3.7, 3.8]
+        python-version: [3.6, 3.7, 3.8, 3.9]
 
     steps:
     - uses: actions/checkout@v2
@@ -34,7 +34,9 @@ jobs:
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-    - name: Test with pytest
+    - name: Test with pytest and check coverage
       run: |
         pip install pytest
         pytest
+        pip install pytest-cov
+        pytest --cov=estimators
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,24 @@
+#Jupyter notebook checkpoints
+**/.ipynb_checkpoints/*
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+*.egg-info
+
+# Python build artifacts
+build/
+dist/
+
+#ignored examples files
+examples/*.log
+
+# Editors
+.vscode/
+.idea/
+
+# Type checking
+.mypy_cache
+
+.coverage
diff --git a/estimators/__init__.py b/estimators/__init__.py
diff --git a/estimators/bandits/__init__.py b/estimators/bandits/__init__.py
diff --git a/estimators/bandits/base.py b/estimators/bandits/base.py
@@ -0,0 +1,51 @@
+""" Interface for implementation of contextual bandit estimators """
+
+from abc import ABC, abstractmethod
+from typing import List
+
+class Estimator(ABC):
+	""" Interface for implementation of contextual bandit estimators """
+
+	@abstractmethod
+	def add_example(self, p_log: float, r: float, p_pred: float, count: float) -> None:
+		""" 
+		Args:
+			p_log: probability of the logging policy
+			r: reward for choosing an action in the given context
+			p_pred: predicted probability of making decision
+			count: weight
+		"""
+		...
+
+	@abstractmethod
+	def get(self) -> float:
+		""" Calculates the selected estimator
+		
+		Returns:
+			The estimator value
+		"""
+		...
+
+class Interval(ABC):
+	""" Interface for implementation of contextual bandit estimators interval """
+
+	@abstractmethod
+	def add_example(self, p_log: float, r: float, p_pred: float, count: float) -> None:
+		""" 
+		Args:
+			p_log: probability of the logging policy
+			r: reward for choosing an action in the given context
+			p_pred: predicted probability of making decision
+			count: weight
+		"""
+		...
+
+	@abstractmethod
+	def get(self, alpha: float) -> List[float]:
+		""" Calculates the CI
+		Args:
+			alpha: alpha value
+		Returns:
+			Returns the confidence interval as list[float]
+		"""
+		...
diff --git a/cats_utils.py → estimators/bandits/cats_utils.py b/cats_utils.py → estimators/bandits/cats_utils.py
diff --git a/estimators/bandits/clopper_pearson.py b/estimators/bandits/clopper_pearson.py
@@ -0,0 +1,41 @@
+import math
+from scipy.stats import beta
+from estimators.bandits import base
+from typing import List
+
+class Interval(base.Interval):
+
+    def __init__(self):
+        ################################# Aggregates quantities #########################################
+        #
+        # 'n':   IPS of numerator
+        # 'N':   total number of samples in bin from log (IPS = n/N)
+        # 'c':   max abs. value of numerator's items (needed for Clopper-Pearson confidence intervals)
+        #
+        #################################################################################################
+
+        self.data = {'n':0.,'N':0,'c':0.}
+
+    def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
+        self.data['N'] += count
+        if p_pred > 0:
+            p_over_p = p_pred/p_log
+            if r != 0:
+                self.data['n'] += r*p_over_p*count
+                self.data['c'] = max(self.data['c'], r*p_over_p)
+
+    def get(self, alpha: float = 0.05) -> List[float]:
+        bounds = []
+        num = self.data['n']
+        den = self.data['N']
+        max_weighted_cost = self.data['c']
+
+        if max_weighted_cost > 0.0:
+            successes = num / max_weighted_cost
+            n = den / max_weighted_cost
+            bounds.append(beta.ppf(alpha / 2, successes, n - successes + 1))
+            bounds.append(beta.ppf(1 - alpha / 2, successes + 1, n - successes))
+
+        if not bounds:
+            bounds = [0, 0]
+        return bounds
diff --git a/cressieread.py → estimators/bandits/cressieread.py b/cressieread.py → estimators/bandits/cressieread.py
@@ -1,12 +1,14 @@
 # CR(-2) is particularly computationally convenient
 
 from math import fsum, inf
+from estimators.bandits import base
+from typing import List
 
-class Estimator:
+class Estimator(base.Estimator):
     # NB: This works better you use the true wmin and wmax
     #     which is _not_ the empirical minimum and maximum
     #     but rather the actual smallest and largest possible values
-    def __init__(self, wmin=0, wmax=inf):
+    def __init__(self, wmin: float = 0, wmax: float = inf):
         assert wmin < 1
         assert wmax > 1
 
@@ -15,7 +17,7 @@ def __init__(self, wmin=0, wmax=inf):
 
         self.data = []
 
-    def add_example(self, p_log, r, p_pred, count=1):
+    def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
         if count > 0:
             w = p_pred / p_log
             assert w >= 0, 'Error: negative importance weight'
@@ -24,7 +26,7 @@ def add_example(self, p_log, r, p_pred, count=1):
             self.wmax = max(self.wmax, w)
             self.wmin = min(self.wmin, w)
 
-    def get_estimate(self, rmin=0, rmax=1):
+    def get(self) -> float:
         n = fsum(c for c, _, _ in self.data)
         assert n > 0, 'Error: No data point added'
 
@@ -53,20 +55,23 @@ def get_estimate(self, rmin=0, rmax=1):
 
         return vhat
 
-class Interval:
+class Interval(base.Interval):
     # NB: This works better you use the true wmin and wmax
     #     which is _not_ the empirical minimum and maximum
     #     but rather the actual smallest and largest possible values
-    def __init__(self, wmin=0, wmax=inf):
+    def __init__(self, wmin: float = 0, wmax: float = inf, rmin: float = 0, rmax: float = 1):
         assert wmin < 1
         assert wmax > 1
 
         self.wmin = wmin
         self.wmax = wmax
 
+        self.rmin = rmin
+        self.rmax = rmax
+
         self.data = []
 
-    def add_example(self, p_log, r, p_pred, count=1):
+    def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
         if count > 0:
             w = p_pred / p_log
             assert w >= 0, 'Error: negative importance weight'
@@ -75,7 +80,7 @@ def add_example(self, p_log, r, p_pred, count=1):
             self.wmax = max(self.wmax, w)
             self.wmin = min(self.wmin, w)
 
-    def get_interval(self, alpha=0.05, rmin=0, rmax=1):
+    def get(self, alpha: float = 0.05) -> List[float]:
         from math import isclose, sqrt
         from scipy.stats import f
 
@@ -100,7 +105,7 @@ def get_interval(self, alpha=0.05, rmin=0, rmax=1):
         phi = (-uncgstar - Delta) / (2 * (1 + n))
 
         bounds = []
-        for r, sign in ((rmin, 1), (rmax, -1)):
+        for r, sign in ((self.rmin, 1), (self.rmax, -1)):
             candidates = []
             for wfake in (self.wmin, self.wmax):
                 if wfake == inf:
@@ -144,7 +149,7 @@ def get_interval(self, alpha=0.05, rmin=0, rmax=1):
                                 candidates.append(gstar)
 
             best = min(candidates)
-            vbound = min(rmax, max(rmin, sign*best))
+            vbound = min(self.rmax, max(self.rmin, sign*best))
             bounds.append(vbound)
 
         return bounds
diff --git a/estimators/bandits/gaussian.py b/estimators/bandits/gaussian.py
@@ -0,0 +1,43 @@
+import math
+from estimators.bandits import base
+from scipy import stats
+from typing import List
+
+class Interval(base.Interval):
+
+    def __init__(self):
+        ################################# Aggregates quantities #########################################
+        #
+        # 'n':   IPS of numerator
+        # 'N':   total number of samples in bin from log (IPS = n/N)
+        # 'SoS': sum of squares of numerator's items (needed for Gaussian confidence intervals)
+        #
+        #################################################################################################
+
+        self.data = {'n':0.,'N':0,'SoS':0}
+
+    def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
+        self.data['N'] += count
+        if p_pred > 0:
+            p_over_p = p_pred/p_log
+            if r != 0:
+                self.data['n'] += r*p_over_p*count
+                self.data['SoS'] += ((r*p_over_p)**2)*count
+
+    def get(self, alpha: float = 0.05) -> List[float]:
+        bounds = []
+        num = self.data['n']
+        den = self.data['N']
+        sum_of_sq = self.data['SoS']
+
+        if sum_of_sq > 0.0 and den > 1:
+            z_gaussian_cdf = stats.norm.ppf(1-alpha/2)
+
+            variance = (sum_of_sq - num * num / den) / (den - 1)
+            gauss_delta = z_gaussian_cdf * math.sqrt(variance/den)
+            bounds.append(num / den - gauss_delta)
+            bounds.append(num / den + gauss_delta)
+
+        if not bounds:
+            bounds = [0, 0]
+        return bounds
diff --git a/estimators/bandits/ips.py b/estimators/bandits/ips.py
@@ -0,0 +1,26 @@
+from estimators.bandits import base
+
+class Estimator(base.Estimator):
+
+    def __init__(self):
+        ################################# Aggregates quantities #########################################
+        #
+        # 'n':   IPS of numerator
+        # 'N':   total number of samples in bin from log (IPS = n/N)
+        #
+        #################################################################################################
+
+        self.data = {'n':0.,'N':0}
+
+    def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
+        self.data['N'] += count
+        if p_pred > 0:
+            p_over_p = p_pred/p_log
+            if r != 0:
+                self.data['n'] += r*p_over_p*count
+
+    def get(self) -> float:
+        if self.data['N'] == 0:
+            raise ValueError('Error: No data point added')
+
+        return self.data['n']/self.data['N']
diff --git a/mle.py → estimators/bandits/mle.py b/mle.py → estimators/bandits/mle.py
@@ -1,12 +1,13 @@
 # Empirical likehood estimator
 
 from math import fsum, inf
+from estimators.bandits import base
 
-class Estimator:
+class Estimator(base.Estimator):
     # NB: This works better you use the true wmin and wmax
     #     which is _not_ the empirical minimum and maximum
     #     but rather the actual smallest and largest possible values
-    def __init__(self, wmin=0, wmax=inf):
+    def __init__(self, wmin: float = 0, wmax: float = inf):
         assert wmin < 1
         assert wmax > 1
 
@@ -15,7 +16,7 @@ def __init__(self, wmin=0, wmax=inf):
 
         self.data = []
 
-    def add_example(self, p_log, r, p_pred, count=1):
+    def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
         if count > 0:
             w = p_pred / p_log
             assert w >= 0, 'Error: negative importance weight'
@@ -28,7 +29,7 @@ def graddualobjective(self, n, beta):
        return fsum(c * (w - 1)/((w - 1) * beta + n)
                   for c, w, _ in self.data)
 
-    def get_estimate(self, rmin=0, rmax=1):
+    def get(self) -> float:
         from scipy.optimize import brentq
 
         n = fsum(c for c, _, _ in self.data)

diff --git a/estimators/bandits/snips.py b/estimators/bandits/snips.py
@@ -0,0 +1,31 @@
+from estimators.bandits import base
+
+class Estimator(base.Estimator):
+
+    def __init__(self):
+        ################################# Aggregates quantities #########################################
+        #
+        # 'n':   IPS of numerator
+        # 'N':   total number of samples in bin from log (IPS = n/N)
+        # 'd':   IPS of denominator (SNIPS = n/d)
+        #
+        #################################################################################################
+
+        self.data = {'n':0.,'N':0,'d':0.}
+
+    def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
+        self.data['N'] += count
+        if p_pred > 0:
+            p_over_p = p_pred/p_log
+            self.data['d'] += p_over_p*count
+            if r != 0:
+                self.data['n'] += r*p_over_p*count
+
+    def get(self) -> float:
+        if self.data['N'] == 0:
+            raise ValueError('Error: No data point added')
+
+        if self.data['d'] != 0:
+            return self.data['n']/self.data['d']
+        else:
+            return 0