diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 0d78aa5..2cdfc3a 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.5, 3.6, 3.7, 3.8] + python-version: [3.6, 3.7, 3.8, 3.9] steps: - uses: actions/checkout@v2 @@ -34,7 +34,9 @@ jobs: flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with pytest + - name: Test with pytest and check coverage run: | pip install pytest pytest + pip install pytest-cov + pytest --cov=estimators diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2d981b1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +#Jupyter notebook checkpoints +**/.ipynb_checkpoints/* + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class +*.egg-info + +# Python build artifacts +build/ +dist/ + +#ignored examples files +examples/*.log + +# Editors +.vscode/ +.idea/ + +# Type checking +.mypy_cache + +.coverage diff --git a/estimators/__init__.py b/estimators/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/estimators/bandits/__init__.py b/estimators/bandits/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/estimators/bandits/base.py b/estimators/bandits/base.py new file mode 100644 index 0000000..4b268d4 --- /dev/null +++ b/estimators/bandits/base.py @@ -0,0 +1,51 @@ +""" Interface for implementation of contextual bandit estimators """ + +from abc import ABC, abstractmethod +from typing import List + +class Estimator(ABC): + """ Interface for implementation of contextual bandit estimators """ + + @abstractmethod + def add_example(self, p_log: float, r: float, p_pred: float, count: float) -> None: + """ + Args: + p_log: probability of the logging policy + r: reward for choosing an action in the given context + p_pred: predicted probability of making decision + count: weight + """ + ... + + @abstractmethod + def get(self) -> float: + """ Calculates the selected estimator + + Returns: + The estimator value + """ + ... + +class Interval(ABC): + """ Interface for implementation of contextual bandit estimators interval """ + + @abstractmethod + def add_example(self, p_log: float, r: float, p_pred: float, count: float) -> None: + """ + Args: + p_log: probability of the logging policy + r: reward for choosing an action in the given context + p_pred: predicted probability of making decision + count: weight + """ + ... + + @abstractmethod + def get(self, alpha: float) -> List[float]: + """ Calculates the CI + Args: + alpha: alpha value + Returns: + Returns the confidence interval as list[float] + """ + ... diff --git a/cats_utils.py b/estimators/bandits/cats_utils.py similarity index 100% rename from cats_utils.py rename to estimators/bandits/cats_utils.py diff --git a/estimators/bandits/clopper_pearson.py b/estimators/bandits/clopper_pearson.py new file mode 100644 index 0000000..e96c14e --- /dev/null +++ b/estimators/bandits/clopper_pearson.py @@ -0,0 +1,41 @@ +import math +from scipy.stats import beta +from estimators.bandits import base +from typing import List + +class Interval(base.Interval): + + def __init__(self): + ################################# Aggregates quantities ######################################### + # + # 'n': IPS of numerator + # 'N': total number of samples in bin from log (IPS = n/N) + # 'c': max abs. value of numerator's items (needed for Clopper-Pearson confidence intervals) + # + ################################################################################################# + + self.data = {'n':0.,'N':0,'c':0.} + + def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None: + self.data['N'] += count + if p_pred > 0: + p_over_p = p_pred/p_log + if r != 0: + self.data['n'] += r*p_over_p*count + self.data['c'] = max(self.data['c'], r*p_over_p) + + def get(self, alpha: float = 0.05) -> List[float]: + bounds = [] + num = self.data['n'] + den = self.data['N'] + max_weighted_cost = self.data['c'] + + if max_weighted_cost > 0.0: + successes = num / max_weighted_cost + n = den / max_weighted_cost + bounds.append(beta.ppf(alpha / 2, successes, n - successes + 1)) + bounds.append(beta.ppf(1 - alpha / 2, successes + 1, n - successes)) + + if not bounds: + bounds = [0, 0] + return bounds diff --git a/cressieread.py b/estimators/bandits/cressieread.py similarity index 87% rename from cressieread.py rename to estimators/bandits/cressieread.py index 2e961a2..015362a 100644 --- a/cressieread.py +++ b/estimators/bandits/cressieread.py @@ -1,12 +1,14 @@ # CR(-2) is particularly computationally convenient from math import fsum, inf +from estimators.bandits import base +from typing import List -class Estimator: +class Estimator(base.Estimator): # NB: This works better you use the true wmin and wmax # which is _not_ the empirical minimum and maximum # but rather the actual smallest and largest possible values - def __init__(self, wmin=0, wmax=inf): + def __init__(self, wmin: float = 0, wmax: float = inf): assert wmin < 1 assert wmax > 1 @@ -15,7 +17,7 @@ def __init__(self, wmin=0, wmax=inf): self.data = [] - def add_example(self, p_log, r, p_pred, count=1): + def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None: if count > 0: w = p_pred / p_log assert w >= 0, 'Error: negative importance weight' @@ -24,7 +26,7 @@ def add_example(self, p_log, r, p_pred, count=1): self.wmax = max(self.wmax, w) self.wmin = min(self.wmin, w) - def get_estimate(self, rmin=0, rmax=1): + def get(self) -> float: n = fsum(c for c, _, _ in self.data) assert n > 0, 'Error: No data point added' @@ -53,20 +55,23 @@ def get_estimate(self, rmin=0, rmax=1): return vhat -class Interval: +class Interval(base.Interval): # NB: This works better you use the true wmin and wmax # which is _not_ the empirical minimum and maximum # but rather the actual smallest and largest possible values - def __init__(self, wmin=0, wmax=inf): + def __init__(self, wmin: float = 0, wmax: float = inf, rmin: float = 0, rmax: float = 1): assert wmin < 1 assert wmax > 1 self.wmin = wmin self.wmax = wmax + self.rmin = rmin + self.rmax = rmax + self.data = [] - def add_example(self, p_log, r, p_pred, count=1): + def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None: if count > 0: w = p_pred / p_log assert w >= 0, 'Error: negative importance weight' @@ -75,7 +80,7 @@ def add_example(self, p_log, r, p_pred, count=1): self.wmax = max(self.wmax, w) self.wmin = min(self.wmin, w) - def get_interval(self, alpha=0.05, rmin=0, rmax=1): + def get(self, alpha: float = 0.05) -> List[float]: from math import isclose, sqrt from scipy.stats import f @@ -100,7 +105,7 @@ def get_interval(self, alpha=0.05, rmin=0, rmax=1): phi = (-uncgstar - Delta) / (2 * (1 + n)) bounds = [] - for r, sign in ((rmin, 1), (rmax, -1)): + for r, sign in ((self.rmin, 1), (self.rmax, -1)): candidates = [] for wfake in (self.wmin, self.wmax): if wfake == inf: @@ -144,7 +149,7 @@ def get_interval(self, alpha=0.05, rmin=0, rmax=1): candidates.append(gstar) best = min(candidates) - vbound = min(rmax, max(rmin, sign*best)) + vbound = min(self.rmax, max(self.rmin, sign*best)) bounds.append(vbound) return bounds diff --git a/estimators/bandits/gaussian.py b/estimators/bandits/gaussian.py new file mode 100644 index 0000000..24b6363 --- /dev/null +++ b/estimators/bandits/gaussian.py @@ -0,0 +1,43 @@ +import math +from estimators.bandits import base +from scipy import stats +from typing import List + +class Interval(base.Interval): + + def __init__(self): + ################################# Aggregates quantities ######################################### + # + # 'n': IPS of numerator + # 'N': total number of samples in bin from log (IPS = n/N) + # 'SoS': sum of squares of numerator's items (needed for Gaussian confidence intervals) + # + ################################################################################################# + + self.data = {'n':0.,'N':0,'SoS':0} + + def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None: + self.data['N'] += count + if p_pred > 0: + p_over_p = p_pred/p_log + if r != 0: + self.data['n'] += r*p_over_p*count + self.data['SoS'] += ((r*p_over_p)**2)*count + + def get(self, alpha: float = 0.05) -> List[float]: + bounds = [] + num = self.data['n'] + den = self.data['N'] + sum_of_sq = self.data['SoS'] + + if sum_of_sq > 0.0 and den > 1: + z_gaussian_cdf = stats.norm.ppf(1-alpha/2) + + variance = (sum_of_sq - num * num / den) / (den - 1) + gauss_delta = z_gaussian_cdf * math.sqrt(variance/den) + bounds.append(num / den - gauss_delta) + bounds.append(num / den + gauss_delta) + + if not bounds: + bounds = [0, 0] + return bounds diff --git a/estimators/bandits/ips.py b/estimators/bandits/ips.py new file mode 100644 index 0000000..e0ec8e6 --- /dev/null +++ b/estimators/bandits/ips.py @@ -0,0 +1,26 @@ +from estimators.bandits import base + +class Estimator(base.Estimator): + + def __init__(self): + ################################# Aggregates quantities ######################################### + # + # 'n': IPS of numerator + # 'N': total number of samples in bin from log (IPS = n/N) + # + ################################################################################################# + + self.data = {'n':0.,'N':0} + + def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None: + self.data['N'] += count + if p_pred > 0: + p_over_p = p_pred/p_log + if r != 0: + self.data['n'] += r*p_over_p*count + + def get(self) -> float: + if self.data['N'] == 0: + raise ValueError('Error: No data point added') + + return self.data['n']/self.data['N'] diff --git a/mle.py b/estimators/bandits/mle.py similarity index 89% rename from mle.py rename to estimators/bandits/mle.py index 74d7009..2ead64e 100644 --- a/mle.py +++ b/estimators/bandits/mle.py @@ -1,12 +1,13 @@ # Empirical likehood estimator from math import fsum, inf +from estimators.bandits import base -class Estimator: +class Estimator(base.Estimator): # NB: This works better you use the true wmin and wmax # which is _not_ the empirical minimum and maximum # but rather the actual smallest and largest possible values - def __init__(self, wmin=0, wmax=inf): + def __init__(self, wmin: float = 0, wmax: float = inf): assert wmin < 1 assert wmax > 1 @@ -15,7 +16,7 @@ def __init__(self, wmin=0, wmax=inf): self.data = [] - def add_example(self, p_log, r, p_pred, count=1): + def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None: if count > 0: w = p_pred / p_log assert w >= 0, 'Error: negative importance weight' @@ -28,7 +29,7 @@ def graddualobjective(self, n, beta): return fsum(c * (w - 1)/((w - 1) * beta + n) for c, w, _ in self.data) - def get_estimate(self, rmin=0, rmax=1): + def get(self) -> float: from scipy.optimize import brentq n = fsum(c for c, _, _ in self.data) diff --git a/estimators/bandits/snips.py b/estimators/bandits/snips.py new file mode 100644 index 0000000..b8001d1 --- /dev/null +++ b/estimators/bandits/snips.py @@ -0,0 +1,31 @@ +from estimators.bandits import base + +class Estimator(base.Estimator): + + def __init__(self): + ################################# Aggregates quantities ######################################### + # + # 'n': IPS of numerator + # 'N': total number of samples in bin from log (IPS = n/N) + # 'd': IPS of denominator (SNIPS = n/d) + # + ################################################################################################# + + self.data = {'n':0.,'N':0,'d':0.} + + def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None: + self.data['N'] += count + if p_pred > 0: + p_over_p = p_pred/p_log + self.data['d'] += p_over_p*count + if r != 0: + self.data['n'] += r*p_over_p*count + + def get(self) -> float: + if self.data['N'] == 0: + raise ValueError('Error: No data point added') + + if self.data['d'] != 0: + return self.data['n']/self.data['d'] + else: + return 0 diff --git a/basic-usage.py b/estimators/basic-usage.py similarity index 53% rename from basic-usage.py rename to estimators/basic-usage.py index 51221ac..d1f38c0 100644 --- a/basic-usage.py +++ b/estimators/basic-usage.py @@ -1,23 +1,37 @@ import argparse, os, gzip -import cressieread -import ips_snips -import mle -import ds_parse -import cats_utils +from bandits import cressieread +from bandits import ips +from bandits import snips +from bandits import mle +from bandits import gaussian +from bandits import clopper_pearson +from bandits import cats_utils +from utils import ds_parse def compute_estimates(log_fp, cats_transformer=None): # Init estimators - online = ips_snips.Estimator() - baseline1 = ips_snips.Estimator() - baselineR = ips_snips.Estimator() + online_ips = ips.Estimator() + online_snips = snips.Estimator() online_mle = mle.Estimator() - baseline1_mle = mle.Estimator() - baselineR_mle = mle.Estimator() online_cressieread = cressieread.Estimator() + + baseline1_ips = ips.Estimator() + baseline1_snips = snips.Estimator() + baseline1_mle = mle.Estimator() baseline1_cressieread = cressieread.Estimator() + + baselineR_ips = ips.Estimator() + baselineR_snips = snips.Estimator() + baselineR_mle = mle.Estimator() baselineR_cressieread = cressieread.Estimator() + baseline1_gaussian = gaussian.Interval() + baseline1_clopper_pearson = clopper_pearson.Interval() + + baselineR_gaussian = gaussian.Interval() + baselineR_clopper_pearson = clopper_pearson.Interval() + print('Processing: {}'.format(log_fp)) bytes_count = 0 tot_bytes = os.path.getsize(log_fp) @@ -41,9 +55,13 @@ def compute_estimates(log_fp, cats_transformer=None): r = 0 if data['cost'] == b'0' else -float(data['cost']) # Update estimators with tuple (p_log, r, p_pred) - online.add_example(data['p'], r, data['p']) - baseline1.add_example(data['p'], r, 1 if data['a'] == 1 else 0) - baselineR.add_example(data['p'], r, 1/data['num_a']) + online_ips.add_example(data['p'], r, data['p']) + baseline1_ips.add_example(data['p'], r, 1 if data['a'] == 1 else 0) + baselineR_ips.add_example(data['p'], r, 1/data['num_a']) + + online_snips.add_example(data['p'], r, data['p']) + baseline1_snips.add_example(data['p'], r, 1 if data['a'] == 1 else 0) + baselineR_snips.add_example(data['p'], r, 1/data['num_a']) online_mle.add_example(data['p'], r, data['p']) baseline1_mle.add_example(data['p'], r, 1 if data['a'] == 1 else 0) @@ -53,6 +71,12 @@ def compute_estimates(log_fp, cats_transformer=None): baseline1_cressieread.add_example(data['p'], r, 1 if data['a'] == 1 else 0) baselineR_cressieread.add_example(data['p'], r, 1/data['num_a']) + baseline1_gaussian.add_example(data['p'], r, 1 if data['a'] == 1 else 0) + baseline1_clopper_pearson.add_example(data['p'], r, 1 if data['a'] == 1 else 0) + + baselineR_gaussian.add_example(data['p'], r, 1/data['num_a']) + baselineR_clopper_pearson.add_example(data['p'], r, 1/data['num_a']) + evts += 1 if x.startswith(b'{"_label_ca":') and x.strip().endswith(b'}'): @@ -70,9 +94,13 @@ def compute_estimates(log_fp, cats_transformer=None): r = 0 if data['cost'] == b'0' else -float(data['cost']) # Update estimators with tuple (p_log, r, p_pred) - online.add_example(data['p'], r, data['p']) - baseline1.add_example(data['p'], r, data_baseline1['pred_p']) - baselineR.add_example(data['p'], r, 1.0 / cats_transformer.continuous_range) + online_ips.add_example(data['p'], r, data['p']) + baseline1_ips.add_example(data['p'], r, data_baseline1['pred_p']) + baselineR_ips.add_example(data['p'], r, 1.0 / cats_transformer.continuous_range) + + online_snips.add_example(data['p'], r, data['p']) + baseline1_snips.add_example(data['p'], r, data_baseline1['pred_p']) + baselineR_snips.add_example(data['p'], r, 1.0 / cats_transformer.continuous_range) online_mle.add_example(data['p'], r, data['p']) baseline1_mle.add_example(data['p'], r, data_baseline1['pred_p']) @@ -82,6 +110,12 @@ def compute_estimates(log_fp, cats_transformer=None): baseline1_cressieread.add_example(data['p'], r, data_baseline1['pred_p']) baselineR_cressieread.add_example(data['p'], r, 1.0 / cats_transformer.continuous_range) + baseline1_gaussian.add_example(data['p'], r, data_baseline1['pred_p']) + baseline1_clopper_pearson.add_example(data['p'], r, data_baseline1['pred_p']) + + baselineR_gaussian.add_example(data['p'], r, 1.0 / cats_transformer.continuous_range) + baselineR_clopper_pearson.add_example(data['p'], r, 1.0 / cats_transformer.continuous_range) + evts += 1 @@ -92,28 +126,28 @@ def compute_estimates(log_fp, cats_transformer=None): print('\nProcessed {} events out of {} lines'.format(evts,i+1)) - print('online_ips:',online.get_estimate('ips')) + print('online_ips:',online_ips.get()) - print('baseline1_ips:', baseline1.get_estimate('ips')) - print('baseline1 gaussian ci:', baseline1.get_interval('gaussian')) - print('baseline1 clopper pearson ci:', baseline1.get_interval('clopper-pearson')) + print('baseline1_ips:', baseline1_ips.get()) + print('baseline1 gaussian ci:', baseline1_gaussian.get()) + print('baseline1 clopper pearson ci:', baseline1_clopper_pearson.get()) - print('baselineR_ips:',baselineR.get_estimate('ips')) - print('baselineR gaussian ci:', baselineR.get_interval('gaussian')) - print('baselineR clopper pearson ci:', baselineR.get_interval('clopper-pearson')) + print('baselineR_ips:',baselineR_ips.get()) + print('baselineR gaussian ci:', baselineR_gaussian.get()) + print('baselineR clopper pearson ci:', baselineR_clopper_pearson.get()) - print('online_snips:',online.get_estimate('snips')) - print('baseline1_snips:',baseline1.get_estimate('snips')) - print('baselineR_snips:',baselineR.get_estimate('snips')) + print('online_snips:',online_snips.get()) + print('baseline1_snips:',baseline1_snips.get()) + print('baselineR_snips:',baselineR_snips.get()) - print('online_mle:',online_mle.get_estimate()) - print('baseline1_mle:',baseline1_mle.get_estimate()) - print('baselineR_mle:',baselineR_mle.get_estimate()) + print('online_mle:',online_mle.get()) + print('baseline1_mle:',baseline1_mle.get()) + print('baselineR_mle:',baselineR_mle.get()) - print('online_cressieread:',online_cressieread.get_estimate()) - print('baseline1_cressieread:',baseline1_cressieread.get_estimate()) - print('baselineR_cressieread:',baselineR_cressieread.get_estimate()) + print('online_cressieread:',online_cressieread.get()) + print('baseline1_cressieread:',baseline1_cressieread.get()) + print('baselineR_cressieread:',baselineR_cressieread.get()) if __name__ == '__main__': diff --git a/estimators/ccb/__init__.py b/estimators/ccb/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/estimators/ccb/base.py b/estimators/ccb/base.py new file mode 100644 index 0000000..5c80d62 --- /dev/null +++ b/estimators/ccb/base.py @@ -0,0 +1,51 @@ +""" Interface for implementation of conditional contextual bandits estimators """ + +from abc import ABC, abstractmethod +from typing import List + +class Estimator(ABC): + """ Interface for implementation of conditional contextual bandits estimators """ + + @abstractmethod + def add_example(self, p_log: List, r: List, p_pred: List, count: float) -> None: + """ + Args: + p_log: List of probability of the logging policy + r: List of reward for choosing an action in the given context + p_pred: List of predicted probability of making decision + count: weight + """ + ... + + @abstractmethod + def get(self) -> float: + """ Calculates the selected estimator + + Returns: + The estimator value + """ + ... + +class Interval(ABC): + """ Interface for implementation of conditional contextual bandits estimators interval """ + + @abstractmethod + def add_example(self, p_log: List[float], r: List[float], p_pred: List[float], count: float) -> None: + """ + Args: + p_log: List of probability of the logging policy + r: List of reward for choosing an action in the given context + p_pred: List of predicted probability of making decision + count: weight + """ + ... + + @abstractmethod + def get(self, alpha: float) -> List[float]: + """ Calculates the CI + Args: + alpha: alpha value + Returns: + Returns the confidence interval as list[float] + """ + ... diff --git a/estimators/ccb/first_slot.py b/estimators/ccb/first_slot.py new file mode 100644 index 0000000..c6f9b5a --- /dev/null +++ b/estimators/ccb/first_slot.py @@ -0,0 +1,38 @@ +from estimators.ccb import base +from typing import List + +class Estimator(base.Estimator): + def __init__(self, bandits_estimator): + self.estimator = bandits_estimator + + def add_example(self, p_logs: List[float], r: List[float], p_preds: List[float], count: float = 1.0) -> None: + """Expects lists for logged probabilities, rewards and predicted probabilities. These should correspond to each slot.""" + + if not isinstance(p_logs, list) and not isinstance(r, list) and not isinstance(p_preds, list): + raise ValueError('Error: p_logs, r and p_preds must be lists') + + if(len(p_logs) != len(p_preds) and len(p_logs) != len(r) and len(r) != len(p_preds)): + raise ValueError('Error: p_logs, r and p_preds must be the same length, found {}, {} and {} respectively'.format(len(p_logs), len(r), len(p_preds))) + + self.estimator.add_example(p_logs[0], r[0], p_preds[0]) + + def get(self) -> List[float]: + return self.estimator.get() + +class Interval(base.Estimator): + def __init__(self, bandits_interval): + self.interval = bandits_interval + + def add_example(self, p_logs: List[float], r: List[float], p_preds: List[float], count: float = 1.0) -> None: + """Expects lists for logged probabilities, rewards and predicted probabilities. These should correspond to each slot.""" + + if not isinstance(p_logs, list) and not isinstance(r, list) and not isinstance(p_preds, list): + raise ValueError('Error: p_logs, r and p_preds must be lists') + + if(len(p_logs) != len(p_preds) and len(p_logs) != len(r) and len(r) != len(p_preds)): + raise ValueError('Error: p_logs, r and p_preds must be the same length, found {}, {} and {} respectively'.format(len(p_logs), len(r), len(p_preds))) + + self.interval.add_example(p_logs[0], r[0], p_preds[0]) + + def get(self, alpha: float = 0.05) -> List[float]: + return self.interval.get(alpha) diff --git a/estimators/slates/__init__.py b/estimators/slates/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/estimators/slates/base.py b/estimators/slates/base.py new file mode 100644 index 0000000..b2a7ccf --- /dev/null +++ b/estimators/slates/base.py @@ -0,0 +1,50 @@ +""" Interface for implementation of slates estimator """ + +from abc import ABC, abstractmethod +from typing import List + +class Estimator(ABC): + """ Interface for implementation of slates estimator """ + + @abstractmethod + def add_example(self, p_logs: List[float], r: float, p_preds: List[float], count: float) -> None: + """ + Args: + p_logs: List of probabilities of the logging policy + r: reward for choosing an action in the given context + p_preds: List of predicted probabilities of making decision + count: weight + """ + ... + + @abstractmethod + def get(self) -> float: + """ Calculates the selected estimator + Returns: + The estimator value + """ + ... + +class Interval(ABC): + """ Interface for implementation of slates estimator interval """ + + @abstractmethod + def add_example(self, p_logs: List[float], r: float, p_preds: List[float], count: float) -> None: + """ + Args: + p_logs: List of probabilities of the logging policy + r: reward for choosing an action in the given context + p_preds: List of predicted probabilities of making decision + count: weight + """ + ... + + @abstractmethod + def get(self, alpha: float) -> List[float]: + """ Calculates the CI + Args: + alpha: alpha value + Returns: + Returns the confidence interval as list[float] + """ + ... diff --git a/estimators/slates/gaussian.py b/estimators/slates/gaussian.py new file mode 100644 index 0000000..a135063 --- /dev/null +++ b/estimators/slates/gaussian.py @@ -0,0 +1,49 @@ +import math +from estimators.slates import base +from scipy import stats +from typing import List + +class Interval(base.Interval): + def __init__(self): + self.data = {'n':0.,'N':0, 'SoS':0} + + def add_example(self, p_logs: List[float], r: float, p_preds: List[float], count: float = 1.0) -> None: + """Expects lists for logged probabilities and predicted probabilities. These should correspond to each slot. + This function is implemented under the simplifying assumptions of + example 4 in the paper 'Off-policy evaluation for slate recommendation' + where the slate space is a cartesian product and the logging policy is a + product distribution""" + if not isinstance(p_logs, list) or not isinstance(p_preds, list): + raise ValueError('Error: p_logs and p_preds must be lists') + + if(len(p_logs) != len(p_preds)): + raise ValueError('Error: p_logs and p_preds must be the same length, found {} and {} respectively'.format(len(p_logs), len(p_preds))) + + self.data['N'] += count + p_over_ps = 0 + num_slots = len(p_logs) + for p_log, p_pred in zip(p_logs, p_preds): + p_over_ps += p_pred/p_log + p_over_ps -= num_slots - 1 + + if r != 0: + self.data['n'] += r*p_over_ps*count + self.data['SoS'] += ((r*p_over_ps)**2)*count + + def get(self, alpha: float = 0.05) -> List[float]: + bounds = [] + num = self.data['n'] + den = self.data['N'] + SoS = self.data['SoS'] + + if SoS > 0.0: + zGaussianCdf = stats.norm.ppf(1-alpha/2) + + variance = (SoS - num * num / den) / (den - 1) + gaussDelta = zGaussianCdf * math.sqrt(variance/den) + bounds.append(num / den - gaussDelta) + bounds.append(num / den + gaussDelta) + + if not bounds: + bounds = [0, 0] + return bounds diff --git a/estimators/slates/pseudo_inverse.py b/estimators/slates/pseudo_inverse.py new file mode 100644 index 0000000..3f7ad84 --- /dev/null +++ b/estimators/slates/pseudo_inverse.py @@ -0,0 +1,40 @@ +import math +from estimators.slates import base +from typing import List + +# PseudoInverse estimator for slate recommendation. The following implements the +# case for a Cartesian product when mu is a product distribution. This can be +# seen in example 4 of the paper. +# https://arxiv.org/abs/1605.04812 + +class Estimator(base.Estimator): + def __init__(self): + self.data = {'n':0.,'N':0} + + def add_example(self, p_logs: List[float], r: float, p_preds: List[float], count: float = 1.0) -> None: + """Expects lists for logged probabilities and predicted probabilities. These should correspond to each slot. + This function is implemented under the simplifying assumptions of + example 4 in the paper 'Off-policy evaluation for slate recommendation' + where the slate space is a cartesian product and the logging policy is a + product distribution""" + if not isinstance(p_logs, list) or not isinstance(p_preds, list): + raise ValueError('Error: p_logs and p_preds must be lists') + + if(len(p_logs) != len(p_preds)): + raise ValueError('Error: p_logs and p_preds must be the same length, found {} and {} respectively'.format(len(p_logs), len(p_preds))) + + self.data['N'] += count + p_over_ps = 0 + num_slots = len(p_logs) + for p_log, p_pred in zip(p_logs, p_preds): + p_over_ps += p_pred/p_log + p_over_ps -= num_slots - 1 + + if r != 0: + self.data['n'] += r*p_over_ps*count + + def get(self) -> float: + if self.data['N'] == 0: + raise ValueError('Error: No data point added') + + return self.data['n']/self.data['N'] diff --git a/estimators/test/__init__.py b/estimators/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/estimators/test/test_bandits.py b/estimators/test/test_bandits.py new file mode 100644 index 0000000..16fecab --- /dev/null +++ b/estimators/test/test_bandits.py @@ -0,0 +1,188 @@ +import os, sys, random, copy +import numpy as np +import pytest +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from estimators.bandits import ips +from estimators.bandits import snips +from estimators.bandits import mle +from estimators.bandits import cressieread +from estimators.bandits import cats_utils +from estimators.bandits import gaussian +from estimators.bandits import clopper_pearson +from estimators.test.utils import Helper + +@pytest.fixture +def random_fixture(): + random.seed(0) + np.random.seed(0) + +def test_single_example(): + estimators = [(ips.Estimator(), 2.0), (snips.Estimator(), 1.0), (mle.Estimator(), 1.0), (cressieread.Estimator(), 1.0)] + + p_log = 0.3 + p_pred = 0.6 + reward = 1 + + for Estimator in estimators: + Estimator[0].add_example(p_log, reward, p_pred) + assert Estimator[0].get() == Estimator[1] + + +def test_multiple_examples(): + ''' To test correctness of estimators: Compare the expected value with value returned by Estimator.get()''' + + # The tuple (Estimator, expected value) for each estimator is stored in estimators + estimators = [ + (ips.Estimator(), 1), + (snips.Estimator(), 1), + (mle.Estimator(), 1), + (cressieread.Estimator(), 1), + ] + + def datagen(): + return {'p_log': 1, + 'r': 1, + 'p_pred': 1} + + estimates = Helper.get_estimate(datagen, estimators=[l[0] for l in estimators], num_examples=4) + + for Estimator, estimate in zip(estimators, estimates): + Helper.assert_is_close(Estimator[1], estimate) + + +def test_narrowing_intervals(): + ''' To test if confidence intervals are getting tighter with more data points ''' + + intervals = [ + cressieread.Interval(), + gaussian.Interval(), + clopper_pearson.Interval(), + ] + + def datagen(epsilon, delta=0.5): + # Logged Policy + # 0 - (1-epsilon) : Reward is Bernoulli(delta) + # 1 - epsilon : Reward is Bernoulli(1-delta) + + # p_pred: 1 if action is chosen, 0 if action not chosen + + # policy to estimate + # (delta), (1-delta) reward from a Bernoulli distribution - for probability p_pred + + chosen = int(random.random() < epsilon) + return {'p_log': epsilon if chosen == 1 else 1 - epsilon, + 'r': int(random.random() < 1-delta) if chosen == 1 else int(random.random() < delta), + 'p_pred': int(chosen==1)} + + intervals_less_data = Helper.get_estimate(lambda: datagen(epsilon=0.5), intervals, num_examples=100) + intervals_more_data = Helper.get_estimate(lambda: datagen(epsilon=0.5), intervals, num_examples=10000) + + for interval_less_data, interval_more_data in zip(intervals_less_data, intervals_more_data): + width_wider = interval_less_data[1] - interval_less_data[0] + width_narrower = interval_more_data[1] - interval_more_data[0] + assert width_wider > 0 + assert width_narrower > 0 + assert width_narrower < width_wider + + +def test_different_alpha_CI(): + ''' To test that alpha value is not hard coded: get confidence intervals for randomly generated alpha values ''' + + intervals = [ + cressieread.Interval(), + gaussian.Interval(), + clopper_pearson.Interval(), + ] + alphas = np.arange(0.1, 1, 0.1) + + def datagen(epsilon, delta=0.5): + # Logged Policy + # 0 - (1-epsilon) : Reward is Bernoulli(delta) + # 1 - epsilon : Reward is Bernoulli(1-delta) + + # p_pred: 1 if action is chosen, 0 if action not chosen + + # policy to estimate + # (delta), (1-delta) reward from a Bernoulli distribution - for probability p_pred + + chosen = int(random.random() < epsilon) + return {'p_log': epsilon if chosen == 1 else 1 - epsilon, + 'r': int(random.random() < 1-delta) if chosen == 1 else int(random.random() < delta), + 'p_pred': int(chosen==1)} + + for interval in intervals: + interval = Helper.run_add_example(lambda: datagen(epsilon=0.5), interval, num_examples=100) + for alpha in alphas: + assert interval.get(alpha=alpha) + + +def test_cats_ips(): + ips_estimator = ips.Estimator() + snips_estimator = snips.Estimator() + + prob_logs = [0.151704, 0.006250, 0.086, 0.086, 0.086] + action_logs = [15.0, 3.89, 22.3, 17.34, 31] + rewards = [0.1, 0.2, 0, 1.0, 1.0] + + max_value = 32 + bandwidth = 1 + cats_transformer = cats_utils.CatsTransformer(num_actions=8, min_value=0, max_value=max_value, bandwidth=bandwidth) + + for logged_action, r, logged_prob in zip(action_logs, rewards, prob_logs): + data = {} + data['a'] = logged_action + data['cost'] = r + data['p'] = logged_prob + if logged_action < (max_value / 2.0): + pred_action = logged_action + 2 * bandwidth + data = cats_transformer.transform(data, pred_action) # pred_action should be too far away, so pred_p should be 0 + assert data['pred_p'] == 0.0 + else: + pred_action = logged_action + data = cats_transformer.transform(data, logged_action) # same action, so pred_p should be 1 + assert data['pred_p'] == 1.0 / (2 * bandwidth) + + ips_estimator.add_example(data['p'], r, data['pred_p']) + snips_estimator.add_example(data['p'], r, data['pred_p']) + assert ips_estimator.get() >= snips_estimator.get() + + +def test_cats_transformer_on_edges(): + prob_logs = [0.151704, 0.006250, 0.086, 0.086] + action_logs = [0, 1, 31, 32] + rewards = [1.0, 1.0, 1.0, 1.0] + + max_value = 32 + bandwidth = 2 + cats_transformer = cats_utils.CatsTransformer(num_actions=8, min_value=0, max_value=max_value, bandwidth=bandwidth) + + for logged_action, r, logged_prob in zip(action_logs, rewards, prob_logs): + data = {} + data['a'] = logged_action + data['cost'] = r + data['p'] = logged_prob + + pred_action = logged_action + data = cats_transformer.transform(data, logged_action) # same action, so pred_p should be 1 + assert data['pred_p'] == 1.0 / (2 * bandwidth) + + +def test_cats_baseline(): + max_value = 32 + min_value = 0 + bandwidth = 1 + num_actions = 8 + cats_transformer = cats_utils.CatsTransformer(num_actions=num_actions, min_value=min_value, max_value=max_value, bandwidth=bandwidth) + baseline = cats_transformer.get_baseline1_prediction() + ## unit range is 4, min_value is 0 so baseline action should be the centre of the firt unit range, starting off from min_value i.e. 2 + assert baseline == 2 + + max_value = 33 + min_value = 1 + bandwidth = 1 + num_actions = 8 + cats_transformer = cats_utils.CatsTransformer(num_actions=num_actions, min_value=min_value, max_value=max_value, bandwidth=bandwidth) + baseline = cats_transformer.get_baseline1_prediction() + ## unit range is 4, min_value is 1 so baseline action should be the centre of the firt unit range, starting off from min_value i.e. 3 + assert baseline == 3 diff --git a/estimators/test/test_ccb.py b/estimators/test/test_ccb.py new file mode 100644 index 0000000..0d2cf7d --- /dev/null +++ b/estimators/test/test_ccb.py @@ -0,0 +1,92 @@ +import os, sys, random, copy +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from estimators.bandits import ips +from estimators.bandits import snips +from estimators.bandits import mle +from estimators.bandits import cressieread +from estimators.bandits import gaussian +from estimators.bandits import clopper_pearson +from estimators.ccb import first_slot +from estimators.test.utils import Helper + +random.seed(0) + +def test_single_example(): + estimators = [ + (first_slot.Estimator(ips.Estimator()), 2.0), + (first_slot.Estimator(snips.Estimator()), 1.0), + (first_slot.Estimator(mle.Estimator()), 1.0), + (first_slot.Estimator(cressieread.Estimator()), 1.0), + ] + + p_log = [0.3] + p_pred = [0.6] + reward = [1] + + for Estimator in estimators: + Estimator[0].add_example(p_log, reward, p_pred) + assert Estimator[0].get() == Estimator[1] + +def test_multiple_examples(): + ''' To test correctness of estimators: Compare the expected value with value returned by Estimator.get()''' + + # The tuple (Estimator, expected value) for each estimator is stored in estimators + estimators = [ + (first_slot.Estimator(ips.Estimator()), 1.0), + (first_slot.Estimator(snips.Estimator()), 1.0), + (first_slot.Estimator(mle.Estimator()), 1.0), + (first_slot.Estimator(cressieread.Estimator()), 1.0) + ] + + def datagen_multiple_slot_values(): + return {'p_log': [1, 0.5, 0.7], + 'r': [1, 2, 3], + 'p_pred': [1, 0.7, 0.5]} + + def datagen_single_slot_value(): + return {'p_log': [1], + 'r': [1], + 'p_pred': [1]} + + estimates_multiple = Helper.get_estimate(datagen_multiple_slot_values, estimators=[l[0] for l in estimators], num_examples=4) + estimates_single = Helper.get_estimate(datagen_single_slot_value, estimators=[l[0] for l in estimators], num_examples=4) + + for Estimator, estimate_multiple, estimate_single in zip(estimators, estimates_multiple, estimates_single): + Helper.assert_is_close(Estimator[1], estimate_multiple) + Helper.assert_is_close(Estimator[1], estimate_single) + assert estimate_single == estimate_multiple + +def test_narrowing_intervals(): + ''' To test if confidence intervals are getting tighter with more data points ''' + + intervals = [ + first_slot.Interval(cressieread.Interval()), + first_slot.Interval(gaussian.Interval()), + first_slot.Interval(clopper_pearson.Interval()), + ] + + def datagen(epsilon, delta=0.5): + # Logged Policy + # 0 - (1-epsilon) : Reward is Bernoulli(delta) + # 1 - epsilon : Reward is Bernoulli(1-delta) + + # p_pred: 1 if action is chosen, 0 if action not chosen + + # policy to estimate + # (delta), (1-delta) reward from a Bernoulli distribution - for probability p_pred + + chosen = int(random.random() < epsilon) + return {'p_log': [epsilon if chosen == 1 else 1 - epsilon], + 'r': [int(random.random() < 1-delta) if chosen == 1 else int(random.random() < delta)], + 'p_pred': [int(chosen==1)]} + + intervals_less_data = Helper.get_estimate(lambda: datagen(epsilon=0.5), intervals, num_examples=100) + intervals_more_data = Helper.get_estimate(lambda: datagen(epsilon=0.5), intervals, num_examples=10000) + + for interval_less_data, interval_more_data in zip(intervals_less_data, intervals_more_data): + width_wider = interval_less_data[1] - interval_less_data[0] + width_narrower = interval_more_data[1] - interval_more_data[0] + assert width_wider > 0 + assert width_narrower > 0 + assert width_narrower < width_wider diff --git a/estimators/test/test_slates.py b/estimators/test/test_slates.py new file mode 100644 index 0000000..5ffd36b --- /dev/null +++ b/estimators/test/test_slates.py @@ -0,0 +1,88 @@ +import os, sys, random, copy +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from estimators.slates import pseudo_inverse +from estimators.slates import gaussian +from estimators.bandits import ips +from estimators.test.utils import Helper + +random.seed(0) + +def test_single_slot_pi_equivalent_to_ips(): + ''' PI should be equivalent to IPS when there is only a single slot ''' + + pi_estimator = pseudo_inverse.Estimator() + ips_estimator = ips.Estimator() + + p_logs = [0.8, 0.25, 0.5, 0.2] + p_preds = [0.6, 0.4, 0.3, 0.9] + rewards = [0.1, 0.2, 0, 1.0] + + for p_log, r, p_pred in zip(p_logs, rewards, p_preds): + pi_estimator.add_example([p_log], r, [p_pred]) + ips_estimator.add_example(p_log, r, p_pred) + Helper.assert_is_close(pi_estimator.get() , ips_estimator.get()) + +def test_multiple_slots(): + ''' To test correctness of estimators: Compare the expected value with value returned by Estimator.get()''' + + # The tuple (Estimator, expected value) for each estimator is stored in estimators + estimators = [ + (pseudo_inverse.Estimator(), 1), + ] + + def datagen(num_slots): + # num_slots represents the len(p_logs) or len(p_pred) for each example + data = {'p_log': [], 'r': 0.0, 'p_pred': []} + for s in range(num_slots): + data['p_log'].append(1) + data['p_pred'].append(1) + data['r'] = 1 + return data + + # 4 examples; each example of the type-> + # p_logs = [1,1,1,1] + # p_pred = [1,1,1,1] + # reward = 1 + estimates = Helper.get_estimate(lambda: datagen(num_slots=4), estimators=[l[0] for l in estimators], num_examples=4) + + for Estimator, estimate in zip(estimators, estimates): + Helper.assert_is_close(Estimator[1], estimate) + +def test_narrowing_intervals(): + ''' To test for narrowing intervals; Number of examples increase => narrowing CI ''' + + intervals = [ + gaussian.Interval(), + ] + + def datagen(num_slots, epsilon, delta=0.5): + + data = {'p_log': [], 'r': 0.0, 'p_pred': []} + + for s in range(num_slots): + # Logged Policy for each slot s + # 0 - (1-epsilon) : Reward is Bernoulli(delta) + # 1 - epsilon : Reward is Bernoulli(1-delta) + + # p_pred: 1 if action is chosen, 0 if action not chosen + + # policy to estimate + # (delta), (1-delta) reward from a Bernoulli distribution - for probability p_pred; looking at the matches per slot s + + chosen = int(random.random() < epsilon) + data['p_log'].append(epsilon if chosen == 1 else 1 - epsilon) + data['r'] += int(random.random() < 1-delta) if chosen == 1 else int(random.random() < delta) + data['p_pred'].append(int(chosen==1)) + + return data + + intervals_less_data = Helper.get_estimate(lambda: datagen(num_slots=4, epsilon=0.5), intervals, num_examples=100) + intervals_more_data = Helper.get_estimate(lambda: datagen(num_slots=4, epsilon=0.5), intervals, num_examples=10000) + + for interval_less_data, interval_more_data in zip(intervals_less_data, intervals_more_data): + width_wider = interval_less_data[1] - interval_less_data[0] + width_narrower = interval_more_data[1] - interval_more_data[0] + assert width_wider > 0 + assert width_narrower > 0 + assert width_narrower < width_wider diff --git a/estimators/test/utils.py b/estimators/test/utils.py new file mode 100644 index 0000000..afbda66 --- /dev/null +++ b/estimators/test/utils.py @@ -0,0 +1,31 @@ +import random, copy + +class Helper(): + ''' Helper Class for tests ''' + + @staticmethod + def assert_is_close(n1, n2): + ''' Function to check if two numbers n1 and n2 are nearly equal''' + + assert abs(n1 - n2) <= 1e-6 * (1 + abs(n1) + abs(n2)) + + @staticmethod + def run_add_example(datagen, estimator, num_examples): + # class_object is the object of class Estimator() or class Interval() + Estimator = copy.deepcopy(estimator) + + for n in range(0,num_examples): + data = datagen() + Estimator.add_example(data['p_log'], data['r'], data['p_pred']) + + return Estimator + + @staticmethod + def get_estimate(datagen, estimators, num_examples): + estimates = [] + for Estimator in estimators: + + estimator = Helper.run_add_example(datagen, Estimator, num_examples) + estimates.append(estimator.get()) + + return estimates diff --git a/estimators/utils/__init__.py b/estimators/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ds_parse.py b/estimators/utils/ds_parse.py similarity index 100% rename from ds_parse.py rename to estimators/utils/ds_parse.py diff --git a/ips_snips.py b/ips_snips.py deleted file mode 100644 index 1cba0d1..0000000 --- a/ips_snips.py +++ /dev/null @@ -1,74 +0,0 @@ -import math -from scipy.stats import beta - - -class Estimator: - def __init__(self): - ############################### Aggregates quantities ###################################### - # - # 'n': IPS of numerator - # 'N': total number of samples in bin from log (IPS = n/N) - # 'd': IPS of denominator (SNIPS = n/d) - # 'Ne': number of samples in bin when off-policy agrees with log policy - # 'c': max abs. value of numerator's items (needed for Clopper-Pearson confidence intervals) - # 'SoS': sum of squares of numerator's items (needed for Gaussian confidence intervals) - # - ################################################################################################# - - self.data = {'n':0.,'N':0,'d':0.,'Ne':0,'c':0.,'SoS':0} - - def add_example(self, p_log, r, p_pred, count=1): - self.data['N'] += count - if p_pred > 0: - p_over_p = p_pred/p_log - self.data['d'] += p_over_p*count - self.data['Ne'] += count - if r != 0: - self.data['n'] += r*p_over_p*count - self.data['c'] = max(self.data['c'], r*p_over_p) - self.data['SoS'] += ((r*p_over_p)**2)*count - - def get_estimate(self, type): - if self.data['N'] == 0: - raise('Error: No data point added') - - if type == 'ips': - return self.data['n']/self.data['N'] - elif type == 'snips': - if self.data['d'] != 0: - return self.data['n']/self.data['d'] - else: - return 0 - else: - raise('Error: Incorrect estimator type {}. Supported options are ips or snips'.format(type)) - - - def get_interval(self, type, alpha=0.05): - bounds = [] - num = self.data['n'] - den = self.data['N'] - maxWeightedCost = self.data['c'] - SoS = self.data['SoS'] - - if type == "clopper-pearson": - if maxWeightedCost > 0.0: - successes = num / maxWeightedCost - n = den / maxWeightedCost - bounds.append(beta.ppf(alpha / 2, successes, n - successes + 1)) - bounds.append(beta.ppf(1 - alpha / 2, successes + 1, n - successes)) - elif type == "gaussian": - if SoS > 0.0 and den > 1: - zGaussianCdf = { - 0.25: 1.15, - 0.1: 1.645, - 0.05: 1.96 - } - - variance = (SoS - num * num / den) / (den - 1) - gaussDelta = zGaussianCdf[alpha] * math.sqrt(variance/den) - bounds.append(num / den - gaussDelta) - bounds.append(num / den + gaussDelta) - - if not bounds: - bounds = [0, 0] - return bounds diff --git a/pseudo_inverse.py b/pseudo_inverse.py deleted file mode 100644 index fdb392f..0000000 --- a/pseudo_inverse.py +++ /dev/null @@ -1,68 +0,0 @@ -import math -from scipy.stats import beta - -# PseudoInverse estimator for slate recommendation. The following implements the -# case for a Cartesian product when mu is a product distribution. This can be -# seen in example 4 of the paper. -# https://arxiv.org/abs/1605.04812 - -class Estimator: - def __init__(self): - self.data = {'n':0.,'N':0, 'SoS':0} - - def add_example(self, p_logs, r, p_preds, count=1): - """Expects lists for logged probabilities and predicted probabilities. These should correspond to each slot. - This function is implemented under the simplifying assumptions of - example 4 in the paper 'Off-policy evaluation for slate recommendation' - where the slate space is a cartesian product and the logging policy is a - product distribution""" - if not isinstance(p_logs, list) or not isinstance(p_preds, list): - raise('Error: p_logs and p_preds must be lists') - - if(len(p_logs) != len(p_preds)): - raise('Error: p_logs and p_preds must be the same length, found {} and {} respectively'.format(len(p_logs), len(p_preds))) - - self.data['N'] += count - p_over_ps = 0 - num_slots = len(p_logs) - for p_log, p_pred in zip(p_logs, p_preds): - p_over_ps += p_pred/p_log - p_over_ps -= num_slots - 1 - - if r != 0: - self.data['n'] += r*p_over_ps*count - self.data['SoS'] += ((r*p_over_ps)**2)*count - - def get_estimate(self, type): - if self.data['N'] == 0: - raise('Error: No data point added') - - if type == 'pi': - return self.data['n']/self.data['N'] - else: - raise('Error: Incorrect estimator type {}. Supported options are pi'.format(type)) - - def get_interval(self, type, alpha=0.05): - bounds = [] - num = self.data['n'] - den = self.data['N'] - SoS = self.data['SoS'] - - if type == "gaussian": - if SoS > 0.0: - zGaussianCdf = { - 0.25: 1.15, - 0.1: 1.645, - 0.05: 1.96 - } - - variance = (SoS - num * num / den) / (den - 1) - gaussDelta = zGaussianCdf[alpha] * math.sqrt(variance/den) - bounds.append(num / den - gaussDelta) - bounds.append(num / den + gaussDelta) - else: - raise('Error: Incorrect interval type {}. Supported options are gaussian'.format(type)) - - if not bounds: - bounds = [0, 0] - return bounds diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..ed5554e --- /dev/null +++ b/setup.py @@ -0,0 +1,25 @@ +import setuptools + +with open("README.md", "r") as f: + long_description = f.read() + +setuptools.setup( + name="vw-estimators", + version="0.0.1", + description="Python package of estimators to perform off-policy evaluation", + long_description=long_description, + long_description_content_type="text/markdown", + url="https://github.com/VowpalWabbit/estimators.git", + license="BSD 3-Clause License", + classifiers=[ + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Programming Language :: Python :: 3.6", + "Operating System :: OS Independent", + "Topic :: Scientific/Engineering" + ], + packages=["estimators", "estimators.bandits", "estimators.ccb", "estimators.slates", "estimators.utils"], + install_requires= ['scipy>=0.9'], + tests_require=['pytest'], + python_requires=">=3.6", +) \ No newline at end of file diff --git a/test/test_pi.py b/test/test_pi.py deleted file mode 100644 index f9d3838..0000000 --- a/test/test_pi.py +++ /dev/null @@ -1,89 +0,0 @@ -import os, sys -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -import pseudo_inverse -import ips_snips -import cats_utils - -def test_single_slot_pi_equivalent_to_ips(): - """PI should be equivalent to IPS when there is only a single slot""" - pi_estimator = pseudo_inverse.Estimator() - ips_estimator = ips_snips.Estimator() - is_close = lambda a, b: abs(a - b) <= 1e-6 * (1 + abs(a) + abs(b)) - - p_logs = [0.8, 0.25, 0.5, 0.2] - p_preds = [0.6, 0.4, 0.3, 0.9] - rewards = [0.1, 0.2, 0, 1.0] - - for p_log, r, p_pred in zip(p_logs, rewards, p_preds): - pi_estimator.add_example([p_log], r, [p_pred]) - ips_estimator.add_example(p_log, r, p_pred) - assert is_close(pi_estimator.get_estimate('pi') , ips_estimator.get_estimate('ips')) - - -def test_cats_ips(): - ips_estimator = ips_snips.Estimator() - - prob_logs = [0.151704, 0.006250, 0.086, 0.086, 0.086] - action_logs = [15.0, 3.89, 22.3, 17.34, 31] - rewards = [0.1, 0.2, 0, 1.0, 1.0] - - max_value = 32 - bandwidth = 1 - cats_transformer = cats_utils.CatsTransformer(num_actions=8, min_value=0, max_value=max_value, bandwidth=bandwidth) - - for logged_action, r, logged_prob in zip(action_logs, rewards, prob_logs): - data = {} - data['a'] = logged_action - data['cost'] = r - data['p'] = logged_prob - if logged_action < (max_value / 2.0): - pred_action = logged_action + 2 * bandwidth - data = cats_transformer.transform(data, pred_action) # pred_action should be too far away, so pred_p should be 0 - assert data['pred_p'] == 0.0 - else: - pred_action = logged_action - data = cats_transformer.transform(data, logged_action) # same action, so pred_p should be 1 - assert data['pred_p'] == 1.0 / (2 * bandwidth) - - ips_estimator.add_example(data['p'], r, data['pred_p']) - assert ips_estimator.get_estimate('ips') >= ips_estimator.get_estimate('snips') - -def test_cats_transformer_on_edges(): - prob_logs = [0.151704, 0.006250, 0.086, 0.086] - action_logs = [0, 1, 31, 32] - rewards = [1.0, 1.0, 1.0, 1.0] - - max_value = 32 - bandwidth = 2 - cats_transformer = cats_utils.CatsTransformer(num_actions=8, min_value=0, max_value=max_value, bandwidth=bandwidth) - - for logged_action, r, logged_prob in zip(action_logs, rewards, prob_logs): - data = {} - data['a'] = logged_action - data['cost'] = r - data['p'] = logged_prob - - pred_action = logged_action - data = cats_transformer.transform(data, logged_action) # same action, so pred_p should be 1 - assert data['pred_p'] == 1.0 / (2 * bandwidth) - - -def test_cats_baseline(): - max_value = 32 - min_value = 0 - bandwidth = 1 - num_actions = 8 - cats_transformer = cats_utils.CatsTransformer(num_actions=num_actions, min_value=min_value, max_value=max_value, bandwidth=bandwidth) - baseline = cats_transformer.get_baseline1_prediction() - ## unit range is 4, min_value is 0 so baseline action should be the centre of the firt unit range, starting off from min_value i.e. 2 - assert baseline == 2 - - max_value = 33 - min_value = 1 - bandwidth = 1 - num_actions = 8 - cats_transformer = cats_utils.CatsTransformer(num_actions=num_actions, min_value=min_value, max_value=max_value, bandwidth=bandwidth) - baseline = cats_transformer.get_baseline1_prediction() - ## unit range is 4, min_value is 1 so baseline action should be the centre of the firt unit range, starting off from min_value i.e. 3 - assert baseline == 3