forked from eriklindernoren/ML-From-Scratch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathadaboost.py
150 lines (124 loc) · 5.21 KB
/
adaboost.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
from __future__ import division, print_function
import math
import sys
import os
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt
import pandas as pd
# Import helper functions
from mlfromscratch.utils.data_manipulation import train_test_split
from mlfromscratch.utils.data_operation import accuracy_score
from mlfromscratch.utils import Plot
# Decision stump used as weak classifier in Adaboost
class DecisionStump():
def __init__(self):
self.polarity = 1
self.feature_index = None
self.threshold = None
self.alpha = None
class Adaboost():
"""Boosting method that uses a number of weak classifiers in
ensemble to make a strong classifier. This implementation uses decision
stumps, which is a one level Decision Tree.
Parameters:
-----------
n_clf: int
The number of weak classifiers that will be used.
"""
def __init__(self, n_clf=5):
self.n_clf = n_clf
# List of weak classifiers
self.clfs = []
def fit(self, X, y):
n_samples, n_features = np.shape(X)
# Initialize weights to 1/N
w = np.full(n_samples, (1 / n_samples))
# Iterate through classifiers
for _ in range(self.n_clf):
clf = DecisionStump()
# Minimum error given for using a certain feature value threshold
# for predicting sample label
min_error = 1
# Iterate throught every unique feature value and see what value
# makes the best threshold for predicting y
for feature_i in range(n_features):
feature_values = np.expand_dims(X[:, feature_i], axis=1)
unique_values = np.unique(feature_values)
# Try every unique feature value as threshold
for threshold in unique_values:
p = 1
# Set all predictions to '1' initially
prediction = np.ones(np.shape(y))
# Label the samples whose values are below threshold as '-1'
prediction[X[:, feature_i] < threshold] = -1
# Error = sum of weights of misclassified samples
error = sum(w[y != prediction])
if error > 0.5:
# E.g error = 0.8 => (1 - error) = 0.2
# We flip the error and polarity
error = 1 - error
p = -1
# If this threshold resulted in the smallest error we save the
# configuration
if error < min_error:
clf.polarity = p
clf.threshold = threshold
clf.feature_index = feature_i
min_error = error
# Calculate the alpha which is used to update the sample weights
# and is an approximation of this classifiers proficiency
clf.alpha = 0.5 * math.log((1.0 - min_error) / (min_error + 1e-10))
# Set all predictions to '1' initially
predictions = np.ones(np.shape(y))
# The indexes where the sample values are below threshold
negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold)
# Label those as '-1'
predictions[negative_idx] = -1
# Calculate new weights
# Missclassified gets larger weights and correctly classified smaller
w *= np.exp(-clf.alpha * y * predictions)
# Normalize to one
w /= np.sum(w)
# Save classifier
self.clfs.append(clf)
def predict(self, X):
n_samples = np.shape(X)[0]
y_pred = np.zeros((n_samples, 1))
# For each classifier => label the samples
for clf in self.clfs:
# Set all predictions to '1' initially
predictions = np.ones(np.shape(y_pred))
# The indexes where the sample values are below threshold
negative_idx = (clf.polarity * X[:, clf.feature_index] < clf.polarity * clf.threshold)
# Label those as '-1'
predictions[negative_idx] = -1
# Add predictions weighted by the classifiers alpha
# (alpha indicative of classifiers profieciency)
y_pred += clf.alpha * predictions
# Return sign of prediction sum
y_pred = np.sign(y_pred).flatten()
return y_pred
def main():
data = datasets.load_digits()
X = data.data
y = data.target
digit1 = 1
digit2 = 8
idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0])
y = data.target[idx]
# Change labels to {-1, 1}
y[y == digit1] = -1
y[y == digit2] = 1
X = data.data[idx]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
# Adaboost classification
clf = Adaboost(n_clf=5)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print ("Accuracy:", accuracy)
# Reduce dimensions to 2d using pca and plot the results
Plot().plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy)
if __name__ == "__main__":
main()