-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_net.py
81 lines (66 loc) · 2.67 KB
/
test_net.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import pandas as pd
import numpy as np
# Loading Data
train_data = pd.read_csv("data/train.csv")
test_data = pd.read_csv("data/kaggle_test_data.csv")
train_data = train_data.drop('id', axis=1)
test_ids = test_data['id']
test_data = test_data.drop('id', axis=1)
# Replacing String values by numbers
coloumn_headers = list(train_data)
to_drop = ['age', 'fnlwgt', 'education-num', 'capital-gain',
'capital-loss', 'hours-per-week', 'salary']
coloumn_headers = [v for i, v in enumerate(coloumn_headers)
if v not in to_drop]
for coloumn in coloumn_headers:
for i, v in enumerate(train_data[coloumn].unique()):
train_data[coloumn] = train_data[coloumn].replace(v, int(i))
test_data[coloumn] = test_data[coloumn].replace(v, int(i))
# Dropping Output from Train Data
output = np.array(train_data)[:, -1].reshape(1, len(train_data))
train_data = train_data.drop('salary', axis=1)
# Dropping Education Coloumn Train Data
train_data = train_data.drop("education", axis=1)
test_data = test_data.drop("education", axis=1)
coloumn_headers.remove('education')
# One Hot encoding for discontinous data
for coloumn in coloumn_headers:
for k in range(len(train_data[coloumn].unique())):
train_data[coloumn+str(k)] = (train_data[coloumn] == k)*1
test_data[coloumn+str(k)] = (test_data[coloumn] == k)*1
train_data = train_data.drop(coloumn, axis=1)
test_data = test_data.drop(coloumn, axis=1)
# Min Max Scaling
min_cols = train_data.min()
max_cols = train_data.max()
train_data -= min_cols
test_data -= min_cols
train_data /= max_cols
test_data /= max_cols
# Loading Saved Weights
w1 = np.empty([100, train_data.shape[1]+1])
w2 = np.empty([25, 101])
w3 = np.empty([1, 26])
with open('weights.txt') as weights:
for line in weights:
elements = line.split('|')
exec(elements[0]+'['+elements[1]+',:] = np.fromstring("'+elements[2].rstrip().replace('[', '').replace(']', '')+'",sep=",")')
train_data = np.array(train_data)
train_data = np.hstack((train_data, np.ones([len(train_data), 1])))
test_data = np.array(test_data)
test_data = np.hstack((test_data, np.ones([len(test_data), 1])))
# Evaluate on test data
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def d_sigmoid(x):
return(np.exp(-x)/(1 + np.exp(-x))**2)
z1 = np.dot(w1, test_data.T)
y1 = sigmoid(z1)
y1 = np.vstack((y1, np.ones([1, len(y1[1, :])])))
z2 = np.dot(w2, y1)
y2 = sigmoid(z2)
y2 = np.vstack((y2, np.ones([1, len(y2[1, :])])))
z3 = np.dot(w3, y2)
y3 = sigmoid(z3)
out = np.greater(y3, 0.5) * 1
np.savetxt("results/predictions.csv", np.concatenate((test_ids.values.reshape(len(test_ids), 1), out.T), axis=1), delimiter=',', fmt=['%d', '%d'], header='id,salary', comments='')