diff --git a/estimators/ccb/base.py b/estimators/ccb/base.py index 065d07e..5daa609 100644 --- a/estimators/ccb/base.py +++ b/estimators/ccb/base.py @@ -20,7 +20,7 @@ def add_example(self, p_log: List, r: List, p_pred: List, count: float) -> None: @abstractmethod def get(self) -> float: """ Calculates the selected estimator - + Returns: The estimator value """ diff --git a/estimators/ccb/first_slot.py b/estimators/ccb/first_slot.py new file mode 100644 index 0000000..c325e50 --- /dev/null +++ b/estimators/ccb/first_slot.py @@ -0,0 +1,39 @@ +from estimators.ccb import base + +class Estimator(base.Estimator): + def __init__(self, bandits_estimator): + self.estimator = bandits_estimator + + def add_example(self, p_logs, r, p_preds, count=1): + """Expects lists for logged probabilities, rewards and predicted probabilities. These should correspond to each slot.""" + + if not isinstance(p_logs, list) and not isinstance(r, list) and not isinstance(p_preds, list): + raise('Error: p_logs, r and p_preds must be lists') + + if(len(p_logs) != len(p_preds) and len(p_logs) != len(r) and len(r) != len(p_preds)): + raise('Error: p_logs, r and p_preds must be the same length, found {}, {} and {} respectively'.format(len(p_logs), len(r), len(p_preds))) + + self.estimator.add_example(p_logs[0], r[0], p_preds[0]) + + def get(self): + + return self.estimator.get() + +class Interval(base.Estimator): + def __init__(self, bandits_interval): + self.interval = bandits_interval + + def add_example(self, p_logs, r, p_preds, count=1): + """Expects lists for logged probabilities, rewards and predicted probabilities. These should correspond to each slot.""" + + if not isinstance(p_logs, list) and not isinstance(r, list) and not isinstance(p_preds, list): + raise('Error: p_logs, r and p_preds must be lists') + + if(len(p_logs) != len(p_preds) and len(p_logs) != len(r) and len(r) != len(p_preds)): + raise('Error: p_logs, r and p_preds must be the same length, found {}, {} and {} respectively'.format(len(p_logs), len(r), len(p_preds))) + + self.interval.add_example(p_logs[0], r[0], p_preds[0]) + + def get(self): + + return self.interval.get() diff --git a/estimators/test/test_ccb.py b/estimators/test/test_ccb.py new file mode 100644 index 0000000..406b610 --- /dev/null +++ b/estimators/test/test_ccb.py @@ -0,0 +1,82 @@ +import os, sys, random, copy +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from estimators.bandits import ips +from estimators.bandits import snips +from estimators.bandits import mle +from estimators.bandits import cressieread +from estimators.bandits import gaussian +from estimators.bandits import clopper_pearson +from estimators.ccb import first_slot +from estimators.test.utils import Helper + +def test_ccb_unit_test(): + listofestimators = [(first_slot.Estimator(ips.Estimator()), 2.0), + (first_slot.Estimator(snips.Estimator()), 1.0), + (first_slot.Estimator(mle.Estimator()), 1.0), + (first_slot.Estimator(cressieread.Estimator()), 1.0)] + + p_log = [0.3] + p_pred = [0.6] + reward = [1] + + for Estimator in listofestimators: + Estimator[0].add_example(p_log, reward, p_pred) + assert Estimator[0].get() == Estimator[1] + +def test_ccb(): + ''' To test correctness of estimators: Compare the expected value with value returned by Estimator.get()''' + + # The tuple (Estimator, expected value) for each estimator is stored in listofestimators + listofestimators = [(first_slot.Estimator(ips.Estimator()), 1.0), + (first_slot.Estimator(snips.Estimator()), 1.0), + (first_slot.Estimator(mle.Estimator()), 1.0), + (first_slot.Estimator(cressieread.Estimator()), 1.0)] + + def datagen_multiple_slot_values(): + return {'p_log': [1, 0.5, 0.7], + 'r': [1, 2, 3], + 'p_pred': [1, 0.7, 0.5]} + + def datagen_single_slot_value(): + return {'p_log': [1], + 'r': [1], + 'p_pred': [1]} + + estimates_multiple = Helper.get_estimate(datagen_multiple_slot_values, listofestimators=[l[0] for l in listofestimators], num_examples=4) + estimates_single = Helper.get_estimate(datagen_single_slot_value, listofestimators=[l[0] for l in listofestimators], num_examples=4) + + for Estimator, estimate_multiple, estimate_single in zip(listofestimators, estimates_multiple, estimates_single): + Helper.assert_is_close(Estimator[1], estimate_multiple) + Helper.assert_is_close(Estimator[1], estimate_single) + assert estimate_single == estimate_multiple + +def test_narrowing_intervals(): + ''' To test for narrowing intervals; Number of examples increase => narrowing CI ''' + + listofintervals = [first_slot.Interval(cressieread.Interval()), first_slot.Interval(gaussian.Interval()), first_slot.Interval(clopper_pearson.Interval())] + + def datagen(epsilon, delta=0.5): + # Logged Policy + # 0 - (1-epsilon) : Reward is Bernoulli(delta) + # 1 - epsilon : Reward is Bernoulli(1-delta) + + # p_pred: 1 if action is chosen, 0 if action not chosen + + # policy to estimate + # (delta), (1-delta) reward from a Bernoulli distribution - for probability p_pred + + chosen = int(random.random() < epsilon) + return {'p_log': [epsilon if chosen == 1 else 1 - epsilon], + 'r': [int(random.random() < 1-delta) if chosen == 1 else int(random.random() < delta)], + 'p_pred': [int(chosen==1)]} + + intervals_n1 = Helper.get_estimate(lambda: datagen(epsilon=0.5), listofintervals, num_examples=100) + intervals_n2 = Helper.get_estimate(lambda: datagen(epsilon=0.5), listofintervals, num_examples=10000) + + for interval_n1, interval_n2 in zip(intervals_n1, intervals_n2): + width_n1 = interval_n1[1] - interval_n1[0] + width_n2 = interval_n2[1] - interval_n2[0] + assert width_n1 > 0 + assert width_n2 > 0 + assert width_n2 < width_n1