diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
index 0d78aa5..2cdfc3a 100644
--- a/.github/workflows/pythonpackage.yml
+++ b/.github/workflows/pythonpackage.yml
@@ -15,7 +15,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.5, 3.6, 3.7, 3.8]
+        python-version: [3.6, 3.7, 3.8, 3.9]
 
     steps:
     - uses: actions/checkout@v2
@@ -34,7 +34,9 @@ jobs:
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-    - name: Test with pytest
+    - name: Test with pytest and check coverage
       run: |
         pip install pytest
         pytest
+        pip install pytest-cov
+        pytest --cov=estimators
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2d981b1
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,24 @@
+#Jupyter notebook checkpoints
+**/.ipynb_checkpoints/*
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+*.egg-info
+
+# Python build artifacts
+build/
+dist/
+
+#ignored examples files
+examples/*.log
+
+# Editors
+.vscode/
+.idea/
+
+# Type checking
+.mypy_cache
+
+.coverage
diff --git a/estimators/__init__.py b/estimators/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/estimators/bandits/__init__.py b/estimators/bandits/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/estimators/bandits/base.py b/estimators/bandits/base.py
new file mode 100644
index 0000000..4b268d4
--- /dev/null
+++ b/estimators/bandits/base.py
@@ -0,0 +1,51 @@
+""" Interface for implementation of contextual bandit estimators """
+
+from abc import ABC, abstractmethod
+from typing import List
+
+class Estimator(ABC):
+	""" Interface for implementation of contextual bandit estimators """
+
+	@abstractmethod
+	def add_example(self, p_log: float, r: float, p_pred: float, count: float) -> None:
+		""" 
+		Args:
+			p_log: probability of the logging policy
+			r: reward for choosing an action in the given context
+			p_pred: predicted probability of making decision
+			count: weight
+		"""
+		...
+
+	@abstractmethod
+	def get(self) -> float:
+		""" Calculates the selected estimator
+		
+		Returns:
+			The estimator value
+		"""
+		...
+
+class Interval(ABC):
+	""" Interface for implementation of contextual bandit estimators interval """
+
+	@abstractmethod
+	def add_example(self, p_log: float, r: float, p_pred: float, count: float) -> None:
+		""" 
+		Args:
+			p_log: probability of the logging policy
+			r: reward for choosing an action in the given context
+			p_pred: predicted probability of making decision
+			count: weight
+		"""
+		...
+
+	@abstractmethod
+	def get(self, alpha: float) -> List[float]:
+		""" Calculates the CI
+		Args:
+			alpha: alpha value
+		Returns:
+			Returns the confidence interval as list[float]
+		"""
+		...
diff --git a/cats_utils.py b/estimators/bandits/cats_utils.py
similarity index 100%
rename from cats_utils.py
rename to estimators/bandits/cats_utils.py
diff --git a/estimators/bandits/clopper_pearson.py b/estimators/bandits/clopper_pearson.py
new file mode 100644
index 0000000..e96c14e
--- /dev/null
+++ b/estimators/bandits/clopper_pearson.py
@@ -0,0 +1,41 @@
+import math
+from scipy.stats import beta
+from estimators.bandits import base
+from typing import List
+
+class Interval(base.Interval):
+
+    def __init__(self):
+        ################################# Aggregates quantities #########################################
+        #
+        # 'n':   IPS of numerator
+        # 'N':   total number of samples in bin from log (IPS = n/N)
+        # 'c':   max abs. value of numerator's items (needed for Clopper-Pearson confidence intervals)
+        #
+        #################################################################################################
+
+        self.data = {'n':0.,'N':0,'c':0.}
+
+    def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
+        self.data['N'] += count
+        if p_pred > 0:
+            p_over_p = p_pred/p_log
+            if r != 0:
+                self.data['n'] += r*p_over_p*count
+                self.data['c'] = max(self.data['c'], r*p_over_p)
+
+    def get(self, alpha: float = 0.05) -> List[float]:
+        bounds = []
+        num = self.data['n']
+        den = self.data['N']
+        max_weighted_cost = self.data['c']
+
+        if max_weighted_cost > 0.0:
+            successes = num / max_weighted_cost
+            n = den / max_weighted_cost
+            bounds.append(beta.ppf(alpha / 2, successes, n - successes + 1))
+            bounds.append(beta.ppf(1 - alpha / 2, successes + 1, n - successes))
+
+        if not bounds:
+            bounds = [0, 0]
+        return bounds
diff --git a/cressieread.py b/estimators/bandits/cressieread.py
similarity index 87%
rename from cressieread.py
rename to estimators/bandits/cressieread.py
index 2e961a2..015362a 100644
--- a/cressieread.py
+++ b/estimators/bandits/cressieread.py
@@ -1,12 +1,14 @@
 # CR(-2) is particularly computationally convenient
 
 from math import fsum, inf
+from estimators.bandits import base
+from typing import List
 
-class Estimator:
+class Estimator(base.Estimator):
     # NB: This works better you use the true wmin and wmax
     #     which is _not_ the empirical minimum and maximum
     #     but rather the actual smallest and largest possible values
-    def __init__(self, wmin=0, wmax=inf):
+    def __init__(self, wmin: float = 0, wmax: float = inf):
         assert wmin < 1
         assert wmax > 1
 
@@ -15,7 +17,7 @@ def __init__(self, wmin=0, wmax=inf):
 
         self.data = []
 
-    def add_example(self, p_log, r, p_pred, count=1):
+    def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
         if count > 0:
             w = p_pred / p_log
             assert w >= 0, 'Error: negative importance weight'
@@ -24,7 +26,7 @@ def add_example(self, p_log, r, p_pred, count=1):
             self.wmax = max(self.wmax, w)
             self.wmin = min(self.wmin, w)
 
-    def get_estimate(self, rmin=0, rmax=1):
+    def get(self) -> float:
         n = fsum(c for c, _, _ in self.data)
         assert n > 0, 'Error: No data point added'
 
@@ -53,20 +55,23 @@ def get_estimate(self, rmin=0, rmax=1):
 
         return vhat
 
-class Interval:
+class Interval(base.Interval):
     # NB: This works better you use the true wmin and wmax
     #     which is _not_ the empirical minimum and maximum
     #     but rather the actual smallest and largest possible values
-    def __init__(self, wmin=0, wmax=inf):
+    def __init__(self, wmin: float = 0, wmax: float = inf, rmin: float = 0, rmax: float = 1):
         assert wmin < 1
         assert wmax > 1
 
         self.wmin = wmin
         self.wmax = wmax
 
+        self.rmin = rmin
+        self.rmax = rmax
+
         self.data = []
 
-    def add_example(self, p_log, r, p_pred, count=1):
+    def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
         if count > 0:
             w = p_pred / p_log
             assert w >= 0, 'Error: negative importance weight'
@@ -75,7 +80,7 @@ def add_example(self, p_log, r, p_pred, count=1):
             self.wmax = max(self.wmax, w)
             self.wmin = min(self.wmin, w)
 
-    def get_interval(self, alpha=0.05, rmin=0, rmax=1):
+    def get(self, alpha: float = 0.05) -> List[float]:
         from math import isclose, sqrt
         from scipy.stats import f
 
@@ -100,7 +105,7 @@ def get_interval(self, alpha=0.05, rmin=0, rmax=1):
         phi = (-uncgstar - Delta) / (2 * (1 + n))
 
         bounds = []
-        for r, sign in ((rmin, 1), (rmax, -1)):
+        for r, sign in ((self.rmin, 1), (self.rmax, -1)):
             candidates = []
             for wfake in (self.wmin, self.wmax):
                 if wfake == inf:
@@ -144,7 +149,7 @@ def get_interval(self, alpha=0.05, rmin=0, rmax=1):
                                 candidates.append(gstar)
 
             best = min(candidates)
-            vbound = min(rmax, max(rmin, sign*best))
+            vbound = min(self.rmax, max(self.rmin, sign*best))
             bounds.append(vbound)
 
         return bounds
diff --git a/estimators/bandits/gaussian.py b/estimators/bandits/gaussian.py
new file mode 100644
index 0000000..24b6363
--- /dev/null
+++ b/estimators/bandits/gaussian.py
@@ -0,0 +1,43 @@
+import math
+from estimators.bandits import base
+from scipy import stats
+from typing import List
+
+class Interval(base.Interval):
+
+    def __init__(self):
+        ################################# Aggregates quantities #########################################
+        #
+        # 'n':   IPS of numerator
+        # 'N':   total number of samples in bin from log (IPS = n/N)
+        # 'SoS': sum of squares of numerator's items (needed for Gaussian confidence intervals)
+        #
+        #################################################################################################
+
+        self.data = {'n':0.,'N':0,'SoS':0}
+
+    def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
+        self.data['N'] += count
+        if p_pred > 0:
+            p_over_p = p_pred/p_log
+            if r != 0:
+                self.data['n'] += r*p_over_p*count
+                self.data['SoS'] += ((r*p_over_p)**2)*count
+
+    def get(self, alpha: float = 0.05) -> List[float]:
+        bounds = []
+        num = self.data['n']
+        den = self.data['N']
+        sum_of_sq = self.data['SoS']
+
+        if sum_of_sq > 0.0 and den > 1:
+            z_gaussian_cdf = stats.norm.ppf(1-alpha/2)
+
+            variance = (sum_of_sq - num * num / den) / (den - 1)
+            gauss_delta = z_gaussian_cdf * math.sqrt(variance/den)
+            bounds.append(num / den - gauss_delta)
+            bounds.append(num / den + gauss_delta)
+
+        if not bounds:
+            bounds = [0, 0]
+        return bounds
diff --git a/estimators/bandits/ips.py b/estimators/bandits/ips.py
new file mode 100644
index 0000000..e0ec8e6
--- /dev/null
+++ b/estimators/bandits/ips.py
@@ -0,0 +1,26 @@
+from estimators.bandits import base
+
+class Estimator(base.Estimator):
+
+    def __init__(self):
+        ################################# Aggregates quantities #########################################
+        #
+        # 'n':   IPS of numerator
+        # 'N':   total number of samples in bin from log (IPS = n/N)
+        #
+        #################################################################################################
+
+        self.data = {'n':0.,'N':0}
+
+    def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
+        self.data['N'] += count
+        if p_pred > 0:
+            p_over_p = p_pred/p_log
+            if r != 0:
+                self.data['n'] += r*p_over_p*count
+
+    def get(self) -> float:
+        if self.data['N'] == 0:
+            raise ValueError('Error: No data point added')
+
+        return self.data['n']/self.data['N']
diff --git a/mle.py b/estimators/bandits/mle.py
similarity index 89%
rename from mle.py
rename to estimators/bandits/mle.py
index 74d7009..2ead64e 100644
--- a/mle.py
+++ b/estimators/bandits/mle.py
@@ -1,12 +1,13 @@
 # Empirical likehood estimator
 
 from math import fsum, inf
+from estimators.bandits import base
 
-class Estimator:
+class Estimator(base.Estimator):
     # NB: This works better you use the true wmin and wmax
     #     which is _not_ the empirical minimum and maximum
     #     but rather the actual smallest and largest possible values
-    def __init__(self, wmin=0, wmax=inf):
+    def __init__(self, wmin: float = 0, wmax: float = inf):
         assert wmin < 1
         assert wmax > 1
 
@@ -15,7 +16,7 @@ def __init__(self, wmin=0, wmax=inf):
 
         self.data = []
 
-    def add_example(self, p_log, r, p_pred, count=1):
+    def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
         if count > 0:
             w = p_pred / p_log
             assert w >= 0, 'Error: negative importance weight'
@@ -28,7 +29,7 @@ def graddualobjective(self, n, beta):
        return fsum(c * (w - 1)/((w - 1) * beta + n)
                   for c, w, _ in self.data)
 
-    def get_estimate(self, rmin=0, rmax=1):
+    def get(self) -> float:
         from scipy.optimize import brentq
 
         n = fsum(c for c, _, _ in self.data)
diff --git a/estimators/bandits/snips.py b/estimators/bandits/snips.py
new file mode 100644
index 0000000..b8001d1
--- /dev/null
+++ b/estimators/bandits/snips.py
@@ -0,0 +1,31 @@
+from estimators.bandits import base
+
+class Estimator(base.Estimator):
+
+    def __init__(self):
+        ################################# Aggregates quantities #########################################
+        #
+        # 'n':   IPS of numerator
+        # 'N':   total number of samples in bin from log (IPS = n/N)
+        # 'd':   IPS of denominator (SNIPS = n/d)
+        #
+        #################################################################################################
+
+        self.data = {'n':0.,'N':0,'d':0.}
+
+    def add_example(self, p_log: float, r: float, p_pred: float, count: float = 1.0) -> None:
+        self.data['N'] += count
+        if p_pred > 0:
+            p_over_p = p_pred/p_log
+            self.data['d'] += p_over_p*count
+            if r != 0:
+                self.data['n'] += r*p_over_p*count
+
+    def get(self) -> float:
+        if self.data['N'] == 0:
+            raise ValueError('Error: No data point added')
+
+        if self.data['d'] != 0:
+            return self.data['n']/self.data['d']
+        else:
+            return 0
diff --git a/basic-usage.py b/estimators/basic-usage.py
similarity index 53%
rename from basic-usage.py
rename to estimators/basic-usage.py
index 51221ac..d1f38c0 100644
--- a/basic-usage.py
+++ b/estimators/basic-usage.py
@@ -1,23 +1,37 @@
 import argparse, os, gzip
-import cressieread
-import ips_snips
-import mle
-import ds_parse
-import cats_utils
+from bandits import cressieread
+from bandits import ips
+from bandits import snips
+from bandits import mle
+from bandits import gaussian
+from bandits import clopper_pearson
+from bandits import cats_utils
+from utils import ds_parse
 
 
 def compute_estimates(log_fp, cats_transformer=None):
     # Init estimators
-    online = ips_snips.Estimator()
-    baseline1 = ips_snips.Estimator()
-    baselineR = ips_snips.Estimator()
+    online_ips = ips.Estimator()
+    online_snips = snips.Estimator()
     online_mle = mle.Estimator()
-    baseline1_mle = mle.Estimator()
-    baselineR_mle = mle.Estimator()
     online_cressieread = cressieread.Estimator()
+
+    baseline1_ips = ips.Estimator()
+    baseline1_snips = snips.Estimator()
+    baseline1_mle = mle.Estimator()
     baseline1_cressieread = cressieread.Estimator()
+
+    baselineR_ips = ips.Estimator()
+    baselineR_snips = snips.Estimator()
+    baselineR_mle = mle.Estimator()
     baselineR_cressieread = cressieread.Estimator()
 
+    baseline1_gaussian = gaussian.Interval()
+    baseline1_clopper_pearson = clopper_pearson.Interval()
+
+    baselineR_gaussian = gaussian.Interval()
+    baselineR_clopper_pearson = clopper_pearson.Interval()
+
     print('Processing: {}'.format(log_fp))
     bytes_count = 0
     tot_bytes = os.path.getsize(log_fp)
@@ -41,9 +55,13 @@ def compute_estimates(log_fp, cats_transformer=None):
             r = 0 if data['cost'] == b'0' else -float(data['cost'])
 
             # Update estimators with tuple (p_log, r, p_pred)
-            online.add_example(data['p'], r, data['p'])
-            baseline1.add_example(data['p'], r, 1 if data['a'] == 1 else 0)
-            baselineR.add_example(data['p'], r, 1/data['num_a'])
+            online_ips.add_example(data['p'], r, data['p'])
+            baseline1_ips.add_example(data['p'], r, 1 if data['a'] == 1 else 0)
+            baselineR_ips.add_example(data['p'], r, 1/data['num_a'])
+
+            online_snips.add_example(data['p'], r, data['p'])
+            baseline1_snips.add_example(data['p'], r, 1 if data['a'] == 1 else 0)
+            baselineR_snips.add_example(data['p'], r, 1/data['num_a'])
 
             online_mle.add_example(data['p'], r, data['p'])
             baseline1_mle.add_example(data['p'], r, 1 if data['a'] == 1 else 0)
@@ -53,6 +71,12 @@ def compute_estimates(log_fp, cats_transformer=None):
             baseline1_cressieread.add_example(data['p'], r, 1 if data['a'] == 1 else 0)
             baselineR_cressieread.add_example(data['p'], r, 1/data['num_a'])
 
+            baseline1_gaussian.add_example(data['p'], r, 1 if data['a'] == 1 else 0)
+            baseline1_clopper_pearson.add_example(data['p'], r, 1 if data['a'] == 1 else 0)
+
+            baselineR_gaussian.add_example(data['p'], r, 1/data['num_a'])
+            baselineR_clopper_pearson.add_example(data['p'], r, 1/data['num_a'])
+
             evts += 1
 
         if x.startswith(b'{"_label_ca":') and x.strip().endswith(b'}'):
@@ -70,9 +94,13 @@ def compute_estimates(log_fp, cats_transformer=None):
             r = 0 if data['cost'] == b'0' else -float(data['cost'])
 
             # Update estimators with tuple (p_log, r, p_pred)
-            online.add_example(data['p'], r, data['p'])
-            baseline1.add_example(data['p'], r, data_baseline1['pred_p'])
-            baselineR.add_example(data['p'], r, 1.0 / cats_transformer.continuous_range)
+            online_ips.add_example(data['p'], r, data['p'])
+            baseline1_ips.add_example(data['p'], r, data_baseline1['pred_p'])
+            baselineR_ips.add_example(data['p'], r, 1.0 / cats_transformer.continuous_range)
+
+            online_snips.add_example(data['p'], r, data['p'])
+            baseline1_snips.add_example(data['p'], r, data_baseline1['pred_p'])
+            baselineR_snips.add_example(data['p'], r, 1.0 / cats_transformer.continuous_range)
 
             online_mle.add_example(data['p'], r, data['p'])
             baseline1_mle.add_example(data['p'], r, data_baseline1['pred_p'])
@@ -82,6 +110,12 @@ def compute_estimates(log_fp, cats_transformer=None):
             baseline1_cressieread.add_example(data['p'], r, data_baseline1['pred_p'])
             baselineR_cressieread.add_example(data['p'], r, 1.0 / cats_transformer.continuous_range)
 
+            baseline1_gaussian.add_example(data['p'], r, data_baseline1['pred_p'])
+            baseline1_clopper_pearson.add_example(data['p'], r, data_baseline1['pred_p'])
+
+            baselineR_gaussian.add_example(data['p'], r, 1.0 / cats_transformer.continuous_range)
+            baselineR_clopper_pearson.add_example(data['p'], r, 1.0 / cats_transformer.continuous_range)
+            
             evts += 1
 
 
@@ -92,28 +126,28 @@ def compute_estimates(log_fp, cats_transformer=None):
 
     print('\nProcessed {} events out of {} lines'.format(evts,i+1))
 
-    print('online_ips:',online.get_estimate('ips'))
+    print('online_ips:',online_ips.get())
 
-    print('baseline1_ips:', baseline1.get_estimate('ips'))
-    print('baseline1 gaussian ci:', baseline1.get_interval('gaussian'))
-    print('baseline1 clopper pearson ci:', baseline1.get_interval('clopper-pearson'))
+    print('baseline1_ips:', baseline1_ips.get())
+    print('baseline1 gaussian ci:', baseline1_gaussian.get())
+    print('baseline1 clopper pearson ci:', baseline1_clopper_pearson.get())
 
-    print('baselineR_ips:',baselineR.get_estimate('ips'))
-    print('baselineR gaussian ci:', baselineR.get_interval('gaussian'))
-    print('baselineR clopper pearson ci:', baselineR.get_interval('clopper-pearson'))
+    print('baselineR_ips:',baselineR_ips.get())
+    print('baselineR gaussian ci:', baselineR_gaussian.get())
+    print('baselineR clopper pearson ci:', baselineR_clopper_pearson.get())
 
 
-    print('online_snips:',online.get_estimate('snips'))
-    print('baseline1_snips:',baseline1.get_estimate('snips'))
-    print('baselineR_snips:',baselineR.get_estimate('snips'))
+    print('online_snips:',online_snips.get())
+    print('baseline1_snips:',baseline1_snips.get())
+    print('baselineR_snips:',baselineR_snips.get())
 
-    print('online_mle:',online_mle.get_estimate())
-    print('baseline1_mle:',baseline1_mle.get_estimate())
-    print('baselineR_mle:',baselineR_mle.get_estimate())
+    print('online_mle:',online_mle.get())
+    print('baseline1_mle:',baseline1_mle.get())
+    print('baselineR_mle:',baselineR_mle.get())
 
-    print('online_cressieread:',online_cressieread.get_estimate())
-    print('baseline1_cressieread:',baseline1_cressieread.get_estimate())
-    print('baselineR_cressieread:',baselineR_cressieread.get_estimate())
+    print('online_cressieread:',online_cressieread.get())
+    print('baseline1_cressieread:',baseline1_cressieread.get())
+    print('baselineR_cressieread:',baselineR_cressieread.get())
 
 if __name__ == '__main__':
 
diff --git a/estimators/ccb/__init__.py b/estimators/ccb/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/estimators/ccb/base.py b/estimators/ccb/base.py
new file mode 100644
index 0000000..5c80d62
--- /dev/null
+++ b/estimators/ccb/base.py
@@ -0,0 +1,51 @@
+""" Interface for implementation of conditional contextual bandits estimators """
+
+from abc import ABC, abstractmethod
+from typing import List
+
+class Estimator(ABC):
+	""" Interface for implementation of conditional contextual bandits estimators """
+
+	@abstractmethod
+	def add_example(self, p_log: List, r: List, p_pred: List, count: float) -> None:
+		""" 
+		Args:
+			p_log: List of probability of the logging policy
+			r: List of reward for choosing an action in the given context
+			p_pred: List of predicted probability of making decision
+			count: weight
+		"""
+		...
+
+	@abstractmethod
+	def get(self) -> float:
+		""" Calculates the selected estimator
+
+		Returns:
+			The estimator value
+		"""
+		...
+
+class Interval(ABC):
+	""" Interface for implementation of conditional contextual bandits estimators interval """
+
+	@abstractmethod
+	def add_example(self, p_log: List[float], r: List[float], p_pred: List[float], count: float) -> None:
+		""" 
+		Args:
+			p_log: List of probability of the logging policy
+			r: List of reward for choosing an action in the given context
+			p_pred: List of predicted probability of making decision
+			count: weight
+		"""
+		...
+
+	@abstractmethod
+	def get(self, alpha: float) -> List[float]:
+		""" Calculates the CI
+		Args:
+			alpha: alpha value
+		Returns:
+			Returns the confidence interval as list[float]
+		"""
+		...
diff --git a/estimators/ccb/first_slot.py b/estimators/ccb/first_slot.py
new file mode 100644
index 0000000..c6f9b5a
--- /dev/null
+++ b/estimators/ccb/first_slot.py
@@ -0,0 +1,38 @@
+from estimators.ccb import base
+from typing import List
+
+class Estimator(base.Estimator):
+    def __init__(self, bandits_estimator):
+        self.estimator = bandits_estimator
+
+    def add_example(self, p_logs: List[float], r: List[float], p_preds: List[float], count: float = 1.0) -> None:
+        """Expects lists for logged probabilities, rewards and predicted probabilities. These should correspond to each slot."""
+
+        if not isinstance(p_logs, list) and not isinstance(r, list) and not isinstance(p_preds, list):
+            raise ValueError('Error: p_logs, r and p_preds must be lists')
+
+        if(len(p_logs) != len(p_preds) and len(p_logs) != len(r) and len(r) != len(p_preds)):
+            raise ValueError('Error: p_logs, r and p_preds must be the same length, found {}, {} and {} respectively'.format(len(p_logs), len(r), len(p_preds)))
+
+        self.estimator.add_example(p_logs[0], r[0], p_preds[0])
+
+    def get(self) -> List[float]:
+        return self.estimator.get()
+
+class Interval(base.Estimator):
+    def __init__(self, bandits_interval):
+        self.interval = bandits_interval
+
+    def add_example(self, p_logs: List[float], r: List[float], p_preds: List[float], count: float = 1.0) -> None:
+        """Expects lists for logged probabilities, rewards and predicted probabilities. These should correspond to each slot."""
+
+        if not isinstance(p_logs, list) and not isinstance(r, list) and not isinstance(p_preds, list):
+            raise ValueError('Error: p_logs, r and p_preds must be lists')
+
+        if(len(p_logs) != len(p_preds) and len(p_logs) != len(r) and len(r) != len(p_preds)):
+            raise ValueError('Error: p_logs, r and p_preds must be the same length, found {}, {} and {} respectively'.format(len(p_logs), len(r), len(p_preds)))
+
+        self.interval.add_example(p_logs[0], r[0], p_preds[0])
+
+    def get(self, alpha: float = 0.05) -> List[float]:
+        return self.interval.get(alpha)
diff --git a/estimators/slates/__init__.py b/estimators/slates/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/estimators/slates/base.py b/estimators/slates/base.py
new file mode 100644
index 0000000..b2a7ccf
--- /dev/null
+++ b/estimators/slates/base.py
@@ -0,0 +1,50 @@
+""" Interface for implementation of slates estimator """
+
+from abc import ABC, abstractmethod
+from typing import List
+
+class Estimator(ABC):
+	""" Interface for implementation of slates estimator """
+
+	@abstractmethod
+	def add_example(self, p_logs: List[float], r: float, p_preds: List[float], count: float) -> None:
+		""" 
+		Args:
+			p_logs: List of probabilities of the logging policy
+			r: reward for choosing an action in the given context
+			p_preds: List of predicted probabilities of making decision
+			count: weight
+		"""
+		...
+
+	@abstractmethod
+	def get(self) -> float:
+		""" Calculates the selected estimator
+		Returns:
+			The estimator value
+		"""
+		...
+
+class Interval(ABC):
+	""" Interface for implementation of slates estimator interval """
+
+	@abstractmethod
+	def add_example(self, p_logs: List[float], r: float, p_preds: List[float], count: float) -> None:
+		""" 
+		Args:
+			p_logs: List of probabilities of the logging policy
+			r: reward for choosing an action in the given context
+			p_preds: List of predicted probabilities of making decision
+			count: weight
+		"""
+		...
+
+	@abstractmethod
+	def get(self, alpha: float) -> List[float]:
+		""" Calculates the CI
+		Args:
+			alpha: alpha value
+		Returns:
+			Returns the confidence interval as list[float]
+		"""
+		...
diff --git a/estimators/slates/gaussian.py b/estimators/slates/gaussian.py
new file mode 100644
index 0000000..a135063
--- /dev/null
+++ b/estimators/slates/gaussian.py
@@ -0,0 +1,49 @@
+import math
+from estimators.slates import base
+from scipy import stats
+from typing import List
+
+class Interval(base.Interval):
+    def __init__(self):
+        self.data = {'n':0.,'N':0, 'SoS':0}
+
+    def add_example(self, p_logs: List[float], r: float, p_preds: List[float], count: float = 1.0) -> None:
+        """Expects lists for logged probabilities and predicted probabilities. These should correspond to each slot.
+        This function is implemented under the simplifying assumptions of
+        example 4 in the paper 'Off-policy evaluation for slate recommendation'
+        where the slate space is a cartesian product and the logging policy is a
+        product distribution"""
+        if not isinstance(p_logs, list) or not isinstance(p_preds, list):
+            raise ValueError('Error: p_logs and p_preds must be lists')
+
+        if(len(p_logs) != len(p_preds)):
+            raise ValueError('Error: p_logs and p_preds must be the same length, found {} and {} respectively'.format(len(p_logs), len(p_preds)))
+
+        self.data['N'] += count
+        p_over_ps = 0
+        num_slots = len(p_logs)
+        for p_log, p_pred in zip(p_logs, p_preds):
+            p_over_ps += p_pred/p_log
+        p_over_ps -= num_slots - 1
+
+        if r != 0:
+            self.data['n'] += r*p_over_ps*count
+            self.data['SoS'] += ((r*p_over_ps)**2)*count
+
+    def get(self, alpha: float = 0.05) -> List[float]:
+        bounds = []
+        num = self.data['n']
+        den = self.data['N']
+        SoS = self.data['SoS']
+
+        if SoS > 0.0:
+            zGaussianCdf = stats.norm.ppf(1-alpha/2)
+
+            variance = (SoS - num * num / den) / (den - 1)
+            gaussDelta = zGaussianCdf * math.sqrt(variance/den)
+            bounds.append(num / den - gaussDelta)
+            bounds.append(num / den + gaussDelta)
+
+        if not bounds:
+            bounds = [0, 0]
+        return bounds
diff --git a/estimators/slates/pseudo_inverse.py b/estimators/slates/pseudo_inverse.py
new file mode 100644
index 0000000..3f7ad84
--- /dev/null
+++ b/estimators/slates/pseudo_inverse.py
@@ -0,0 +1,40 @@
+import math
+from estimators.slates import base
+from typing import List
+
+# PseudoInverse estimator for slate recommendation. The following implements the
+# case for a Cartesian product when mu is a product distribution. This can be
+# seen in example 4 of the paper.
+# https://arxiv.org/abs/1605.04812
+
+class Estimator(base.Estimator):
+    def __init__(self):
+        self.data = {'n':0.,'N':0}
+
+    def add_example(self, p_logs: List[float], r: float, p_preds: List[float], count: float = 1.0) -> None:
+        """Expects lists for logged probabilities and predicted probabilities. These should correspond to each slot.
+        This function is implemented under the simplifying assumptions of
+        example 4 in the paper 'Off-policy evaluation for slate recommendation'
+        where the slate space is a cartesian product and the logging policy is a
+        product distribution"""
+        if not isinstance(p_logs, list) or not isinstance(p_preds, list):
+            raise ValueError('Error: p_logs and p_preds must be lists')
+
+        if(len(p_logs) != len(p_preds)):
+            raise ValueError('Error: p_logs and p_preds must be the same length, found {} and {} respectively'.format(len(p_logs), len(p_preds)))
+
+        self.data['N'] += count
+        p_over_ps = 0
+        num_slots = len(p_logs)
+        for p_log, p_pred in zip(p_logs, p_preds):
+            p_over_ps += p_pred/p_log
+        p_over_ps -= num_slots - 1
+
+        if r != 0:
+            self.data['n'] += r*p_over_ps*count
+
+    def get(self) -> float:
+        if self.data['N'] == 0:
+            raise ValueError('Error: No data point added')
+
+        return self.data['n']/self.data['N']
diff --git a/estimators/test/__init__.py b/estimators/test/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/estimators/test/test_bandits.py b/estimators/test/test_bandits.py
new file mode 100644
index 0000000..16fecab
--- /dev/null
+++ b/estimators/test/test_bandits.py
@@ -0,0 +1,188 @@
+import os, sys, random, copy
+import numpy as np
+import pytest
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from estimators.bandits import ips
+from estimators.bandits import snips
+from estimators.bandits import mle
+from estimators.bandits import cressieread
+from estimators.bandits import cats_utils
+from estimators.bandits import gaussian
+from estimators.bandits import clopper_pearson
+from estimators.test.utils import Helper
+
+@pytest.fixture
+def random_fixture():
+    random.seed(0)
+    np.random.seed(0)
+
+def test_single_example():
+    estimators = [(ips.Estimator(), 2.0), (snips.Estimator(), 1.0), (mle.Estimator(), 1.0), (cressieread.Estimator(), 1.0)]
+
+    p_log = 0.3
+    p_pred = 0.6
+    reward = 1
+
+    for Estimator in estimators:
+        Estimator[0].add_example(p_log, reward, p_pred)
+        assert Estimator[0].get() == Estimator[1]
+
+
+def test_multiple_examples():
+    ''' To test correctness of estimators: Compare the expected value with value returned by Estimator.get()'''
+
+    # The tuple (Estimator, expected value) for each estimator is stored in estimators
+    estimators = [
+        (ips.Estimator(), 1),
+        (snips.Estimator(), 1),
+        (mle.Estimator(), 1),
+        (cressieread.Estimator(), 1),
+        ]
+
+    def datagen():
+        return  {'p_log': 1,
+                'r': 1,
+                'p_pred': 1}
+
+    estimates = Helper.get_estimate(datagen, estimators=[l[0] for l in estimators], num_examples=4)
+
+    for Estimator, estimate in zip(estimators, estimates):
+        Helper.assert_is_close(Estimator[1], estimate)
+
+
+def test_narrowing_intervals():
+    ''' To test if confidence intervals are getting tighter with more data points '''
+
+    intervals = [
+        cressieread.Interval(),
+        gaussian.Interval(),
+        clopper_pearson.Interval(),
+        ]
+
+    def datagen(epsilon, delta=0.5):
+        # Logged Policy
+        # 0 - (1-epsilon) : Reward is Bernoulli(delta)
+        # 1 - epsilon : Reward is Bernoulli(1-delta)
+
+        # p_pred: 1 if action is chosen, 0 if action not chosen
+
+        # policy to estimate
+        # (delta), (1-delta) reward from a Bernoulli distribution - for probability p_pred
+
+        chosen = int(random.random() < epsilon)
+        return {'p_log': epsilon if chosen == 1 else 1 - epsilon,
+                'r': int(random.random() < 1-delta) if chosen == 1 else int(random.random() < delta),
+                'p_pred': int(chosen==1)}
+
+    intervals_less_data = Helper.get_estimate(lambda: datagen(epsilon=0.5), intervals, num_examples=100)
+    intervals_more_data = Helper.get_estimate(lambda: datagen(epsilon=0.5), intervals, num_examples=10000)
+
+    for interval_less_data, interval_more_data in zip(intervals_less_data, intervals_more_data):
+        width_wider = interval_less_data[1] - interval_less_data[0]
+        width_narrower = interval_more_data[1] - interval_more_data[0]
+        assert width_wider > 0
+        assert width_narrower > 0
+        assert width_narrower < width_wider
+
+
+def test_different_alpha_CI():
+    ''' To test that alpha value is not hard coded: get confidence intervals for randomly generated alpha values '''
+
+    intervals = [
+        cressieread.Interval(),
+        gaussian.Interval(),
+        clopper_pearson.Interval(),
+        ]
+    alphas = np.arange(0.1, 1, 0.1)
+
+    def datagen(epsilon, delta=0.5):
+        # Logged Policy
+        # 0 - (1-epsilon) : Reward is Bernoulli(delta)
+        # 1 - epsilon : Reward is Bernoulli(1-delta)
+
+        # p_pred: 1 if action is chosen, 0 if action not chosen
+
+        # policy to estimate
+        # (delta), (1-delta) reward from a Bernoulli distribution - for probability p_pred
+
+        chosen = int(random.random() < epsilon)
+        return {'p_log': epsilon if chosen == 1 else 1 - epsilon,
+                'r': int(random.random() < 1-delta) if chosen == 1 else int(random.random() < delta),
+                'p_pred': int(chosen==1)}
+
+    for interval in intervals:
+        interval = Helper.run_add_example(lambda: datagen(epsilon=0.5), interval, num_examples=100)
+        for alpha in alphas:
+            assert interval.get(alpha=alpha)
+
+
+def test_cats_ips():
+    ips_estimator = ips.Estimator()
+    snips_estimator = snips.Estimator()
+
+    prob_logs = [0.151704, 0.006250, 0.086, 0.086, 0.086]
+    action_logs = [15.0, 3.89, 22.3, 17.34, 31]
+    rewards = [0.1, 0.2, 0, 1.0, 1.0]
+
+    max_value = 32
+    bandwidth = 1
+    cats_transformer = cats_utils.CatsTransformer(num_actions=8, min_value=0, max_value=max_value, bandwidth=bandwidth)
+
+    for logged_action, r, logged_prob in zip(action_logs, rewards, prob_logs):
+        data = {}
+        data['a'] = logged_action
+        data['cost'] = r
+        data['p'] = logged_prob
+        if logged_action < (max_value / 2.0):
+            pred_action = logged_action + 2 * bandwidth
+            data = cats_transformer.transform(data, pred_action) # pred_action should be too far away, so pred_p should be 0
+            assert data['pred_p'] == 0.0
+        else:
+            pred_action = logged_action
+            data = cats_transformer.transform(data, logged_action) # same action, so pred_p should be 1
+            assert data['pred_p'] == 1.0 / (2 * bandwidth)
+
+        ips_estimator.add_example(data['p'], r, data['pred_p'])
+        snips_estimator.add_example(data['p'], r, data['pred_p'])
+        assert ips_estimator.get() >= snips_estimator.get()
+
+
+def test_cats_transformer_on_edges():
+    prob_logs = [0.151704, 0.006250, 0.086, 0.086]
+    action_logs = [0, 1, 31, 32]
+    rewards = [1.0, 1.0, 1.0, 1.0]
+
+    max_value = 32
+    bandwidth = 2
+    cats_transformer = cats_utils.CatsTransformer(num_actions=8, min_value=0, max_value=max_value, bandwidth=bandwidth)
+
+    for logged_action, r, logged_prob in zip(action_logs, rewards, prob_logs):
+        data = {}
+        data['a'] = logged_action
+        data['cost'] = r
+        data['p'] = logged_prob
+
+        pred_action = logged_action
+        data = cats_transformer.transform(data, logged_action) # same action, so pred_p should be 1
+        assert data['pred_p'] == 1.0 / (2 * bandwidth)
+
+
+def test_cats_baseline():
+    max_value = 32
+    min_value = 0
+    bandwidth = 1
+    num_actions = 8
+    cats_transformer = cats_utils.CatsTransformer(num_actions=num_actions, min_value=min_value, max_value=max_value, bandwidth=bandwidth)
+    baseline = cats_transformer.get_baseline1_prediction()
+    ## unit range is 4, min_value is 0 so baseline action should be the centre of the firt unit range, starting off from min_value i.e. 2
+    assert baseline == 2
+
+    max_value = 33
+    min_value = 1
+    bandwidth = 1
+    num_actions = 8
+    cats_transformer = cats_utils.CatsTransformer(num_actions=num_actions, min_value=min_value, max_value=max_value, bandwidth=bandwidth)
+    baseline = cats_transformer.get_baseline1_prediction()
+    ## unit range is 4, min_value is 1 so baseline action should be the centre of the firt unit range, starting off from min_value i.e. 3
+    assert baseline == 3
diff --git a/estimators/test/test_ccb.py b/estimators/test/test_ccb.py
new file mode 100644
index 0000000..0d2cf7d
--- /dev/null
+++ b/estimators/test/test_ccb.py
@@ -0,0 +1,92 @@
+import os, sys, random, copy
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from estimators.bandits import ips
+from estimators.bandits import snips
+from estimators.bandits import mle
+from estimators.bandits import cressieread
+from estimators.bandits import gaussian
+from estimators.bandits import clopper_pearson
+from estimators.ccb import first_slot
+from estimators.test.utils import Helper
+
+random.seed(0)
+
+def test_single_example():
+    estimators = [
+        (first_slot.Estimator(ips.Estimator()), 2.0),
+        (first_slot.Estimator(snips.Estimator()), 1.0),
+        (first_slot.Estimator(mle.Estimator()), 1.0),
+        (first_slot.Estimator(cressieread.Estimator()), 1.0),
+        ]
+
+    p_log = [0.3]
+    p_pred = [0.6]
+    reward = [1]
+
+    for Estimator in estimators:
+        Estimator[0].add_example(p_log, reward, p_pred)
+        assert Estimator[0].get() == Estimator[1]
+
+def test_multiple_examples():
+    ''' To test correctness of estimators: Compare the expected value with value returned by Estimator.get()'''
+
+    # The tuple (Estimator, expected value) for each estimator is stored in estimators
+    estimators = [
+        (first_slot.Estimator(ips.Estimator()), 1.0),
+        (first_slot.Estimator(snips.Estimator()), 1.0),
+        (first_slot.Estimator(mle.Estimator()), 1.0),
+        (first_slot.Estimator(cressieread.Estimator()), 1.0)
+        ]
+
+    def datagen_multiple_slot_values():
+        return  {'p_log': [1, 0.5, 0.7],
+                'r': [1, 2, 3],
+                'p_pred': [1, 0.7, 0.5]}
+
+    def datagen_single_slot_value():
+        return  {'p_log': [1],
+                'r': [1],
+                'p_pred': [1]}
+
+    estimates_multiple = Helper.get_estimate(datagen_multiple_slot_values, estimators=[l[0] for l in estimators], num_examples=4)
+    estimates_single = Helper.get_estimate(datagen_single_slot_value, estimators=[l[0] for l in estimators], num_examples=4)
+
+    for Estimator, estimate_multiple, estimate_single in zip(estimators, estimates_multiple, estimates_single):
+        Helper.assert_is_close(Estimator[1], estimate_multiple)
+        Helper.assert_is_close(Estimator[1], estimate_single)
+        assert estimate_single == estimate_multiple
+
+def test_narrowing_intervals():
+    ''' To test if confidence intervals are getting tighter with more data points '''
+
+    intervals = [
+        first_slot.Interval(cressieread.Interval()),
+        first_slot.Interval(gaussian.Interval()),
+        first_slot.Interval(clopper_pearson.Interval()),
+        ]
+
+    def datagen(epsilon, delta=0.5):
+        # Logged Policy
+        # 0 - (1-epsilon) : Reward is Bernoulli(delta)
+        # 1 - epsilon : Reward is Bernoulli(1-delta)
+
+        # p_pred: 1 if action is chosen, 0 if action not chosen
+
+        # policy to estimate
+        # (delta), (1-delta) reward from a Bernoulli distribution - for probability p_pred
+
+        chosen = int(random.random() < epsilon)
+        return {'p_log': [epsilon if chosen == 1 else 1 - epsilon],
+                'r': [int(random.random() < 1-delta) if chosen == 1 else int(random.random() < delta)],
+                'p_pred': [int(chosen==1)]}
+
+    intervals_less_data = Helper.get_estimate(lambda: datagen(epsilon=0.5), intervals, num_examples=100)
+    intervals_more_data = Helper.get_estimate(lambda: datagen(epsilon=0.5), intervals, num_examples=10000)
+
+    for interval_less_data, interval_more_data in zip(intervals_less_data, intervals_more_data):
+        width_wider = interval_less_data[1] - interval_less_data[0]
+        width_narrower = interval_more_data[1] - interval_more_data[0]
+        assert width_wider > 0
+        assert width_narrower > 0
+        assert width_narrower < width_wider
diff --git a/estimators/test/test_slates.py b/estimators/test/test_slates.py
new file mode 100644
index 0000000..5ffd36b
--- /dev/null
+++ b/estimators/test/test_slates.py
@@ -0,0 +1,88 @@
+import os, sys, random, copy
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from estimators.slates import pseudo_inverse
+from estimators.slates import gaussian
+from estimators.bandits import ips
+from estimators.test.utils import Helper
+
+random.seed(0)
+
+def test_single_slot_pi_equivalent_to_ips():
+    ''' PI should be equivalent to IPS when there is only a single slot '''
+
+    pi_estimator = pseudo_inverse.Estimator()
+    ips_estimator = ips.Estimator()
+
+    p_logs = [0.8, 0.25, 0.5, 0.2]
+    p_preds = [0.6, 0.4, 0.3, 0.9]
+    rewards = [0.1, 0.2, 0, 1.0]
+
+    for p_log, r, p_pred in zip(p_logs, rewards, p_preds):
+        pi_estimator.add_example([p_log], r, [p_pred])
+        ips_estimator.add_example(p_log, r, p_pred)
+        Helper.assert_is_close(pi_estimator.get() , ips_estimator.get())
+
+def test_multiple_slots():
+    ''' To test correctness of estimators: Compare the expected value with value returned by Estimator.get()'''
+
+    # The tuple (Estimator, expected value) for each estimator is stored in estimators
+    estimators = [
+        (pseudo_inverse.Estimator(), 1),
+        ]
+
+    def datagen(num_slots):
+        # num_slots represents the len(p_logs) or len(p_pred) for each example
+        data = {'p_log': [], 'r': 0.0, 'p_pred': []}
+        for s in range(num_slots):
+            data['p_log'].append(1)
+            data['p_pred'].append(1)
+        data['r'] = 1
+        return  data
+
+    # 4 examples; each example of the type->
+    # p_logs = [1,1,1,1]
+    # p_pred = [1,1,1,1]
+    # reward = 1
+    estimates = Helper.get_estimate(lambda: datagen(num_slots=4), estimators=[l[0] for l in estimators], num_examples=4)
+
+    for Estimator, estimate in zip(estimators, estimates):
+        Helper.assert_is_close(Estimator[1], estimate)
+
+def test_narrowing_intervals():
+    ''' To test for narrowing intervals; Number of examples increase => narrowing CI '''
+
+    intervals = [
+        gaussian.Interval(),
+        ]
+
+    def datagen(num_slots, epsilon, delta=0.5):
+
+        data = {'p_log': [], 'r': 0.0, 'p_pred': []}
+
+        for s in range(num_slots):
+            # Logged Policy for each slot s
+            # 0 - (1-epsilon) : Reward is Bernoulli(delta)
+            # 1 - epsilon : Reward is Bernoulli(1-delta)
+
+            # p_pred: 1 if action is chosen, 0 if action not chosen
+
+            # policy to estimate
+            # (delta), (1-delta) reward from a Bernoulli distribution - for probability p_pred; looking at the matches per slot s
+
+            chosen = int(random.random() < epsilon)
+            data['p_log'].append(epsilon if chosen == 1 else 1 - epsilon)
+            data['r'] += int(random.random() < 1-delta) if chosen == 1 else int(random.random() < delta)
+            data['p_pred'].append(int(chosen==1))
+
+        return data
+
+    intervals_less_data = Helper.get_estimate(lambda: datagen(num_slots=4, epsilon=0.5), intervals, num_examples=100)
+    intervals_more_data = Helper.get_estimate(lambda: datagen(num_slots=4, epsilon=0.5), intervals, num_examples=10000)
+
+    for interval_less_data, interval_more_data in zip(intervals_less_data, intervals_more_data):
+        width_wider = interval_less_data[1] - interval_less_data[0]
+        width_narrower = interval_more_data[1] - interval_more_data[0]
+        assert width_wider > 0
+        assert width_narrower > 0
+        assert width_narrower < width_wider
diff --git a/estimators/test/utils.py b/estimators/test/utils.py
new file mode 100644
index 0000000..afbda66
--- /dev/null
+++ b/estimators/test/utils.py
@@ -0,0 +1,31 @@
+import random, copy
+
+class Helper():
+    ''' Helper Class for tests '''
+
+    @staticmethod
+    def assert_is_close(n1, n2):
+        ''' Function to check if two numbers n1 and n2 are nearly equal'''
+
+        assert abs(n1 - n2) <= 1e-6 * (1 + abs(n1) + abs(n2))
+
+    @staticmethod
+    def run_add_example(datagen, estimator, num_examples):
+        # class_object is the object of class Estimator() or class Interval()
+        Estimator = copy.deepcopy(estimator)
+
+        for n in range(0,num_examples):
+            data = datagen()
+            Estimator.add_example(data['p_log'], data['r'], data['p_pred'])
+
+        return Estimator
+
+    @staticmethod
+    def get_estimate(datagen, estimators, num_examples):
+        estimates = []
+        for Estimator in estimators:
+            
+            estimator = Helper.run_add_example(datagen, Estimator, num_examples)
+            estimates.append(estimator.get())
+
+        return estimates
diff --git a/estimators/utils/__init__.py b/estimators/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/ds_parse.py b/estimators/utils/ds_parse.py
similarity index 100%
rename from ds_parse.py
rename to estimators/utils/ds_parse.py
diff --git a/ips_snips.py b/ips_snips.py
deleted file mode 100644
index 1cba0d1..0000000
--- a/ips_snips.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import math
-from scipy.stats import beta
-
-
-class Estimator:
-    def __init__(self):
-        ############################### Aggregates quantities ######################################
-        #
-        # 'n':   IPS of numerator
-        # 'N':   total number of samples in bin from log (IPS = n/N)
-        # 'd':   IPS of denominator (SNIPS = n/d)
-        # 'Ne':  number of samples in bin when off-policy agrees with log policy
-        # 'c':   max abs. value of numerator's items (needed for Clopper-Pearson confidence intervals)
-        # 'SoS': sum of squares of numerator's items (needed for Gaussian confidence intervals)
-        #
-        #################################################################################################
-
-        self.data = {'n':0.,'N':0,'d':0.,'Ne':0,'c':0.,'SoS':0}
-
-    def add_example(self, p_log, r, p_pred, count=1):
-        self.data['N'] += count
-        if p_pred > 0:
-            p_over_p = p_pred/p_log
-            self.data['d'] += p_over_p*count
-            self.data['Ne'] += count
-            if r != 0:
-                self.data['n'] += r*p_over_p*count
-                self.data['c'] = max(self.data['c'], r*p_over_p)
-                self.data['SoS'] += ((r*p_over_p)**2)*count
-
-    def get_estimate(self, type):
-        if self.data['N'] == 0:
-            raise('Error: No data point added')
-
-        if type == 'ips':
-            return self.data['n']/self.data['N']
-        elif type == 'snips':
-            if self.data['d'] != 0:
-                return self.data['n']/self.data['d']
-            else:
-                return 0
-        else:
-            raise('Error: Incorrect estimator type {}. Supported options are ips or snips'.format(type))
-
-
-    def get_interval(self, type, alpha=0.05):
-        bounds = []
-        num = self.data['n']
-        den = self.data['N']
-        maxWeightedCost = self.data['c']
-        SoS = self.data['SoS']
-
-        if type == "clopper-pearson":
-            if maxWeightedCost > 0.0:
-                successes = num / maxWeightedCost
-                n = den / maxWeightedCost
-                bounds.append(beta.ppf(alpha / 2, successes, n - successes + 1))
-                bounds.append(beta.ppf(1 - alpha / 2, successes + 1, n - successes))
-        elif type == "gaussian":
-            if SoS > 0.0 and den > 1:
-                zGaussianCdf = {
-                  0.25: 1.15,
-                  0.1: 1.645,
-                  0.05: 1.96
-                }
-
-                variance = (SoS - num * num / den) / (den - 1)
-                gaussDelta = zGaussianCdf[alpha] * math.sqrt(variance/den)
-                bounds.append(num / den - gaussDelta)
-                bounds.append(num / den + gaussDelta)
-
-        if not bounds:
-            bounds = [0, 0]
-        return bounds
diff --git a/pseudo_inverse.py b/pseudo_inverse.py
deleted file mode 100644
index fdb392f..0000000
--- a/pseudo_inverse.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import math
-from scipy.stats import beta
-
-# PseudoInverse estimator for slate recommendation. The following implements the
-# case for a Cartesian product when mu is a product distribution. This can be
-# seen in example 4 of the paper.
-# https://arxiv.org/abs/1605.04812
-
-class Estimator:
-    def __init__(self):
-        self.data = {'n':0.,'N':0, 'SoS':0}
-
-    def add_example(self, p_logs, r, p_preds, count=1):
-        """Expects lists for logged probabilities and predicted probabilities. These should correspond to each slot.
-        This function is implemented under the simplifying assumptions of
-        example 4 in the paper 'Off-policy evaluation for slate recommendation'
-        where the slate space is a cartesian product and the logging policy is a
-        product distribution"""
-        if not isinstance(p_logs, list) or not isinstance(p_preds, list):
-            raise('Error: p_logs and p_preds must be lists')
-
-        if(len(p_logs) != len(p_preds)):
-            raise('Error: p_logs and p_preds must be the same length, found {} and {} respectively'.format(len(p_logs), len(p_preds)))
-
-        self.data['N'] += count
-        p_over_ps = 0
-        num_slots = len(p_logs)
-        for p_log, p_pred in zip(p_logs, p_preds):
-            p_over_ps += p_pred/p_log
-        p_over_ps -= num_slots - 1
-
-        if r != 0:
-            self.data['n'] += r*p_over_ps*count
-            self.data['SoS'] += ((r*p_over_ps)**2)*count
-
-    def get_estimate(self, type):
-        if self.data['N'] == 0:
-            raise('Error: No data point added')
-
-        if type == 'pi':
-            return self.data['n']/self.data['N']
-        else:
-            raise('Error: Incorrect estimator type {}. Supported options are pi'.format(type))
-
-    def get_interval(self, type, alpha=0.05):
-        bounds = []
-        num = self.data['n']
-        den = self.data['N']
-        SoS = self.data['SoS']
-
-        if type == "gaussian":
-            if SoS > 0.0:
-                zGaussianCdf = {
-                  0.25: 1.15,
-                  0.1: 1.645,
-                  0.05: 1.96
-                }
-
-                variance = (SoS - num * num / den) / (den - 1)
-                gaussDelta = zGaussianCdf[alpha] * math.sqrt(variance/den)
-                bounds.append(num / den - gaussDelta)
-                bounds.append(num / den + gaussDelta)
-        else:
-            raise('Error: Incorrect interval type {}. Supported options are gaussian'.format(type))
-
-        if not bounds:
-            bounds = [0, 0]
-        return bounds
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..ed5554e
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,25 @@
+import setuptools
+
+with open("README.md", "r") as f:
+    long_description = f.read()
+
+setuptools.setup(
+    name="vw-estimators",
+    version="0.0.1",
+    description="Python package of estimators to perform off-policy evaluation",
+    long_description=long_description,
+    long_description_content_type="text/markdown",
+    url="https://github.com/VowpalWabbit/estimators.git",
+    license="BSD 3-Clause License",
+    classifiers=[
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: BSD License",
+        "Programming Language :: Python :: 3.6",
+        "Operating System :: OS Independent",
+        "Topic :: Scientific/Engineering"
+    ],
+    packages=["estimators", "estimators.bandits", "estimators.ccb", "estimators.slates", "estimators.utils"],
+    install_requires= ['scipy>=0.9'],
+    tests_require=['pytest'],
+    python_requires=">=3.6",
+)
\ No newline at end of file
diff --git a/test/test_pi.py b/test/test_pi.py
deleted file mode 100644
index f9d3838..0000000
--- a/test/test_pi.py
+++ /dev/null
@@ -1,89 +0,0 @@
-import os, sys
-sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-
-import pseudo_inverse
-import ips_snips
-import cats_utils
-
-def test_single_slot_pi_equivalent_to_ips():
-    """PI should be equivalent to IPS when there is only a single slot"""
-    pi_estimator = pseudo_inverse.Estimator()
-    ips_estimator = ips_snips.Estimator()
-    is_close = lambda a, b: abs(a - b) <= 1e-6 * (1 + abs(a) + abs(b))
-
-    p_logs = [0.8, 0.25, 0.5, 0.2]
-    p_preds = [0.6, 0.4, 0.3, 0.9]
-    rewards = [0.1, 0.2, 0, 1.0]
-
-    for p_log, r, p_pred in zip(p_logs, rewards, p_preds):
-        pi_estimator.add_example([p_log], r, [p_pred])
-        ips_estimator.add_example(p_log, r, p_pred)
-        assert is_close(pi_estimator.get_estimate('pi') , ips_estimator.get_estimate('ips'))
-
-
-def test_cats_ips():
-    ips_estimator = ips_snips.Estimator()
-
-    prob_logs = [0.151704, 0.006250, 0.086, 0.086, 0.086]
-    action_logs = [15.0, 3.89, 22.3, 17.34, 31]
-    rewards = [0.1, 0.2, 0, 1.0, 1.0]
-
-    max_value = 32
-    bandwidth = 1
-    cats_transformer = cats_utils.CatsTransformer(num_actions=8, min_value=0, max_value=max_value, bandwidth=bandwidth)
-
-    for logged_action, r, logged_prob in zip(action_logs, rewards, prob_logs):
-        data = {}
-        data['a'] = logged_action
-        data['cost'] = r
-        data['p'] = logged_prob
-        if logged_action < (max_value / 2.0):
-            pred_action = logged_action + 2 * bandwidth
-            data = cats_transformer.transform(data, pred_action) # pred_action should be too far away, so pred_p should be 0
-            assert data['pred_p'] == 0.0
-        else:
-            pred_action = logged_action
-            data = cats_transformer.transform(data, logged_action) # same action, so pred_p should be 1
-            assert data['pred_p'] == 1.0 / (2 * bandwidth)
-
-        ips_estimator.add_example(data['p'], r, data['pred_p'])
-        assert ips_estimator.get_estimate('ips') >= ips_estimator.get_estimate('snips')
-
-def test_cats_transformer_on_edges():
-    prob_logs = [0.151704, 0.006250, 0.086, 0.086]
-    action_logs = [0, 1, 31, 32]
-    rewards = [1.0, 1.0, 1.0, 1.0]
-
-    max_value = 32
-    bandwidth = 2
-    cats_transformer = cats_utils.CatsTransformer(num_actions=8, min_value=0, max_value=max_value, bandwidth=bandwidth)
-
-    for logged_action, r, logged_prob in zip(action_logs, rewards, prob_logs):
-        data = {}
-        data['a'] = logged_action
-        data['cost'] = r
-        data['p'] = logged_prob
-    
-        pred_action = logged_action
-        data = cats_transformer.transform(data, logged_action) # same action, so pred_p should be 1
-        assert data['pred_p'] == 1.0 / (2 * bandwidth)
-
-
-def test_cats_baseline():
-    max_value = 32
-    min_value = 0
-    bandwidth = 1
-    num_actions = 8
-    cats_transformer = cats_utils.CatsTransformer(num_actions=num_actions, min_value=min_value, max_value=max_value, bandwidth=bandwidth)
-    baseline = cats_transformer.get_baseline1_prediction()
-    ## unit range is 4, min_value is 0 so baseline action should be the centre of the firt unit range, starting off from min_value i.e. 2
-    assert baseline == 2
-
-    max_value = 33
-    min_value = 1
-    bandwidth = 1
-    num_actions = 8
-    cats_transformer = cats_utils.CatsTransformer(num_actions=num_actions, min_value=min_value, max_value=max_value, bandwidth=bandwidth)
-    baseline = cats_transformer.get_baseline1_prediction()
-    ## unit range is 4, min_value is 1 so baseline action should be the centre of the firt unit range, starting off from min_value i.e. 3
-    assert baseline == 3