forked from raunakm90/AirWare
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathairware_svm.py
152 lines (124 loc) · 5.6 KB
/
airware_svm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
from data import Read_Data
import numpy as np
from sklearn.svm import SVC
import time
from utils.model_tuning import *
from utils.generate_report import *
import argparse
MODEL_PATH = "./baseline_models/svm/"
def run_gridSearch_svm():
start = time.time()
# Number of principle components for Masked PCA
n_components_range = [100, 200]
# C trades off misclassification of training examples against simplicity of the decision surface.
# Higher C selects more samples as support vectors
c_range = np.logspace(-3, 3, 7)
# gamma defines how far the influence of a single training example reaches; low==far.
# Inverse of the radius of influence of samples selected by the model as support vectors
gamma_range = np.logspace(-3, 3, 7)
kernel_options = ['rbf', 'linear']
gd = Read_Data.GestureData(gest_set=1)
x, y, user, lab_enc = gd.compile_data(nfft=4096, overlap=0.5,
brange=8, keras_format=False,
plot_spectogram=False,
baseline_format=True)
# Delete near zero variance columns
nz_var_ind = remove_near_zero_var(x, thresh=20)
x = np.delete(x, nz_var_ind, axis=1)
# Create a mask for PCA only on doppler signature
mask = np.arange(x.shape[1]) < x.shape[1] - 2
param_grid = [
{
'reduce_dim__n_components': n_components_range,
'reduce_dim__mask': [mask],
'classify__C': c_range,
'classify__kernel': kernel_options,
'classify__gamma': gamma_range,
'classify__class_weight': ['balanced']
}
]
clf_obj = SVC()
grid_search_best_estimator = gridSearch_clf(x=x, y=y, groups=user, clf=clf_obj, param_grid=param_grid,
file_path=MODEL_PATH)
print('It took ', time.time() - start, ' seconds.')
return grid_search_best_estimator
def run_eval_svm():
svm_clf_params = joblib.load(MODEL_PATH + "clf_gridsearch.pkl")
pipe = Pipeline([
('normalize', StandardScaler()),
('reduce_dim', MaskedPCA()),
('classify', SVC())
])
svm_clf_pipe = pipe.set_params(**svm_clf_params)
start = time.time()
gd = Read_Data.GestureData(gest_set=1)
x, y, user, lab_enc = gd.compile_data(nfft=4096, overlap=0.5,
brange=8, keras_format=False,
plot_spectogram=False,
baseline_format=True)
eval_model(svm_clf_pipe, x, y, MODEL_PATH + "SVM")
print('It took ', time.time() - start, ' seconds.')
def train_svm_loso():
cv_obj = LeaveOneGroupOut()
train_scores, test_scores = [], []
y_true, y_hat = [], []
class_names = []
i = 1
svm_clf_params = joblib.load(MODEL_PATH + "clf_gridsearch.pkl")
pipe = Pipeline([
('normalize', StandardScaler()),
('reduce_dim', MaskedPCA()),
('classify', SVC())
])
start = time.time()
gd = Read_Data.GestureData(gest_set=1)
x, y, user, lab_enc = gd.compile_data(nfft=4096, overlap=0.5,
brange=8, keras_format=False,
plot_spectogram=False,
baseline_format=True)
# Delete near zero variance columns
nz_var_ind = remove_near_zero_var(x, thresh=20)
x = np.delete(x, nz_var_ind, axis=1)
for train_idx, test_idx in cv_obj.split(x, y, user):
print("\nUser:", i)
i += 1
# Train and test data - leave one subject out
x_train, y_train = x[train_idx, :], y[train_idx]
x_test, y_test = x[test_idx, :], y[test_idx]
# Create copies of the train and test data sets
x_train_copy, y_train_copy = x_train.copy(), y_train.copy()
x_test_copy, y_test_copy = x_test.copy(), y_test.copy()
# Call model function - Refit a new model
clf_pipe = pipe.set_params(**svm_clf_params)
# Fit model
clf_pipe.fit(x_train_copy, y_train_copy)
# Evaluate training scores
train_scores.append(clf_pipe.score(x_train_copy, y_train_copy))
# Evaluate test scores
test_scores.append(clf_pipe.score(x_test_copy, y_test_copy))
# Predict for test data
y_hat_user = clf_pipe.predict(x_test_copy) # Predictions per user
class_names.append(lab_enc.classes_[y_test_copy]) # Class names per user
y_hat.append(y_hat_user) # Collect predictions for all users
y_true.append(y_test_copy) # Collect true values for all users
y_true = flat_list_of_array(y_true)
y_hat = flat_list_of_array(y_hat)
# class_names = flat_list_of_array(class_names)
print(classification_report(y_true=y_true, y_pred=y_hat, target_names=lab_enc.classes_, file_path=MODEL_PATH))
print('It took ', time.time() - start, ' seconds.')
return None
if __name__ == '__main__':
function_map = {'gridSearch': run_gridSearch_svm,
'eval': run_eval_svm,
'train': train_svm_loso}
parser = argparse.ArgumentParser(description="AirWare grid search and train model using different CV strategies")
# "?" one argument consumed from the command line and produced as a single item
# Positional arguments
parser.add_argument('-function_name',
help="Define function to run for SVM",
choices=['gridSearch', 'eval',
'train'])
args = parser.parse_args()
function = function_map[args.function_name]
print("Running ", function)
function()