-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnn_data_structs.jl
254 lines (227 loc) · 11.7 KB
/
nn_data_structs.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
# some useful shorthands for complex array types
const T_model_data = Array{Union{Array{Float64},SparseVector{Float64,Int64},SparseMatrixCSC{Float64,Int64}},1}
const T_theta = Array{Array{Float64,2},1}
const T_bias = Array{Array{Float64,1},1}
const T_union_dense_sparse_array = Union{Array{Float64},SparseVector{Float64,Int64},SparseMatrixCSC{Float64,Int64}}
const T_array_subarray = Union{Array{Float64}, SubArray{Float64}}
"""
struct Wgts holds model parameters learned by training and model metadata
"""
mutable struct Wgts # we will use nnw as the struct variable
theta::Array{Array{Float64,2},1}
bias::Array{Array{Float64,1},1}
delta_th::Array{Array{Float64,2},1}
delta_b::Array{Array{Float64,1},1}
# optimization weighted average of gradient: momentum, rmsprop, Adam
delta_v_th::Array{Array{Float64,2},1}
delta_v_b::Array{Array{Float64,1},1}
delta_s_th::Array{Array{Float64,2},1}
delta_s_b::Array{Array{Float64,1},1}
theta_dims::Array{Tuple{Int64, Int64},1}
output_layer::Int64
ks::Array{Int64,1} # number of output units in each layer (e.g., features for input layer)
# = the no. of rows in the weight matrix for each layer
norm_factors::Tuple{Array{Float64,2},Array{Float64,2}} # note: each array is 1 row by 2 cols
# calculate dropout mask for training
dropout_mask::Array{Array{Bool,1}, 1} # boolean filter for dropout--dims of a
Wgts() = new( # empty constructor
Array{Array{Float64,2},1}(undef, 0), # theta::Array{Array{Float64,2}}
Array{Array{Float64,2},1}(undef, 0), # bias::Array{Array{Float64,1}}
Array{Array{Float64,2},1}(undef, 0), # delta_th
Array{Array{Float64,1},1}(undef, 0), # delta_b
Array{Array{Float64,2},1}(undef, 0), # delta_v_th
Array{Array{Float64,1},1}(undef, 0), # delta_v_b
Array{Array{Float64,2},1}(undef, 0), # delta_s_th
Array{Array{Float64,1},1}(undef, 0), # delta_s_b
Array{Tuple{Int, Int},1}(undef, 0), # theta_dims::Array{Array{Int64,2}}
3, # output_layer
Array{Int64,1}(undef, 0), # k
([0.0 0.0], [1.0 0.0]), # norm_factors (mean, std)
Array{Array{Bool,1},1}(undef, 0) # dropout_mask Array{Array{Bool,2},1}
)
end
"""
struct Hyper_parameters holds hyper_parameters used to control training
"""
mutable struct Hyper_parameters # we will use hp as the struct variable
alpha::Float64 # learning rate
alphamod::Float64 # optionally adjust learning rate with learn_decay
lambda::Float64 # L2 regularization rate
hidden::Array{Tuple{String,Int64},1} # array of ("unit", number) for hidden layers
n_layers::Int64
b1::Float64 # 1st optimization for momentum or Adam
b2::Float64 # 2nd optimization parameter for Adam
ltl_eps::Float64 # use in denominator with division of very small values to prevent overflow
dobatch::Bool # simple flag on whether to do minibatch training
do_batch_norm::Bool # true or false
reshuffle::Bool
norm_mode::String # "", "none", "standard", or "minmax"
dropout::Bool # true or false to choose dropout network
droplim::Array{Float64,1} # the probability a node output is kept
reg::String # L1, L2, maxnorm, or "none"
maxnorm_lim::Array{Float64,1}# [] with limits for hidden layers and output layer
opt::String # Adam or momentum or "none" or "" for optimization
opt_output::Bool # appy optimization to output layer
opt_batch_norm::Bool # don't optimize batchnorm params if optimizing training weights
opt_params::Array{Float64,1}# parameters for optimization
classify::String # behavior of output layer: "softmax", "sigmoid", or "regression"
mb_size::Int64 # minibatch size--calculated; last mini-batch may be smaller
mb_size_in::Int64 # input of requested minibatch size: last actual size may be smaller
epochs::Int64 # number of "outer" loops of training
do_learn_decay::Bool # step down the learning rate across epochs
learn_decay::Array{Float64,1} # reduction factor (fraction) and number of steps
sparse::Bool
initializer::String # "xavier" or "zero"
scale_init::Float64 # varies with initializer method: 2.0 for xavier, around .15 for others
bias_initializer::Float64 # 0.0, 1.0, between them
quiet::Bool # display progress messages or not
stats::Array{String, 1} # not a hyper_parameter, choice of stats data to collect during training
plot_now::Bool
Hyper_parameters() = new( # constructor with defaults--we use hp as the struct variable
0.35, # alpha -- OK for nn. way too high for linear regression
0.35, # alphamod
0.01, # lambda
[("none",0)], # hidden
0, # n_layers
0.9, # b1
0.999, # b2
1e-8, # ltl_eps
false, # dobatch
false, # do_batch_norm
false, # reshuffle
"none", # norm_mode
false, # dropout
[], # droplim
"", # reg
Float64[], # maxnorm_lim
"", # opt
false, # opt_output
false, # opt_batch_norm
[], # opt_params
"sigmoid", # classify
0, # mb_size
50, # mb_size_in
1, # epochs
false, # do_learn_decay
[1.0, 1.0], # learn_decay
false, # sparse
"xavier", # initializer
2.0, # scale_init
0.0, # bias_initializer
true, # quiet
["None"], # stats
false # plot_now
)
end
"""
Struct Model_data hold examples and all layer outputs-->
pre-allocate to reduce memory allocations and improve speed.
Most of these are 1 dimensional arrays (an element for each layer) of arrays
(the array data values at a layer).
"""
mutable struct Model_data # we will use train for inputs and test for test data
# read from training, test, or production data
inputs::Union{AbstractArray{Float64},SparseVector{Float64,Int64},SparseMatrixCSC{Float64,Int64}} #:: # in_k features by n examples
targets::Union{AbstractArray{Float64},SparseVector{Float64,Int64},SparseMatrixCSC{Float64,Int64}} #:: # labels for each example
# calculated in feedforward pass
a::T_model_data #
z::T_model_data
# calculated in backprop pass
grad::T_model_data
epsilon::T_model_data # dims of a
# calculcated for batch_norm
z_norm::T_model_data # same size as z--for batch_norm
# descriptive
n::Int64 # number of examples
in_k::Int64 # number of input features
out_k::Int64 # number of output features
Model_data() = new( # empty constructor
zeros(0,0), # inputs
zeros(0,0), # targets
[zeros(0,0)], # a
[zeros(0,0)], # z
[zeros(0,0)], # grad
[zeros(0,0)], # epsilon
[zeros(0,0)], # z_norm -- only pre-allocate if batch_norm
0, # n
0, # in_k
0 # out_k
)
end
"""
Struct Batch_view holds views on all model data that will be broken into minibatches
"""
mutable struct Batch_view # we will use mb for as the variable for minibatches
# array of views
a::Array{SubArray{}} #::Array{SubArray{Float64,2,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int64}},UnitRange{Int64}},true},1}
targets::SubArray{} #::SubArray{Float64,2,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int64}},UnitRange{Int64}},true}
z::Array{SubArray{}} #::Array{SubArray{Float64,2,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int64}},UnitRange{Int64}},true},1}
z_norm::Array{SubArray{}} #::Array{SubArray{Float64,2,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int64}},UnitRange{Int64}},true},1}
grad::Array{SubArray{}} #::Array{SubArray{Float64,2,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int64}},UnitRange{Int64}},true},1}
epsilon::Array{SubArray{}} #::Array{SubArray{Float64,2,Array{Float64,2},Tuple{Base.Slice{Base.OneTo{Int64}},UnitRange{Int64}},true},1}
Batch_view() = new( # empty constructor
Array{SubArray{}}[], # a
view([0.0],1:1), # targets
Array{SubArray{}}[], # z
Array{SubArray{}}[], # z_norm
Array{SubArray{}}[], # grad
Array{SubArray{}}[], # epsilon
)
end
"""
struct Batch_norm_params holds batch normalization parameters for
feedfwd calculations and backprop training.
"""
mutable struct Batch_norm_params # we will use bn as the struct variable
# learned batch parameters to center and scale data
gam::Array{Array{Float64,1},1} # scaling parameter for z_norm
bet::Array{Array{Float64,1},1} # shifting parameter for z_norm (equivalent to bias)
delta_gam::Array{Array{Float64,1},1}
delta_bet::Array{Array{Float64,1},1}
# for optimization updates of bn parameters
delta_v_gam::Array{Array{Float64,1},1}
delta_s_gam::Array{Array{Float64,1},1}
delta_v_bet::Array{Array{Float64,1},1}
delta_s_bet::Array{Array{Float64,1},1}
# for standardizing batch values
mu::Array{Array{Float64,1},1} # mean of z; same size as bias = no. of input layer units
stddev::Array{Array{Float64,1},1} # std dev of z; ditto
mu_run::Array{Array{Float64,1},1} # running average of mu
std_run::Array{Array{Float64,1},1} # running average of stddev
Batch_norm_params() = new( # empty constructor
Array{Array{Float64,1},1}(undef, 0), # gam::Array{Array{Float64,1}}
Array{Array{Float64,1},1}(undef, 0), # bet::Array{Array{Float64,1}}
Array{Array{Float64,2},1}(undef, 0), # delta_gam
Array{Array{Float64,2},1}(undef, 0), # delta_bet
Array{Array{Float64,2},1}(undef, 0), # delta_v_gam
Array{Array{Float64,2},1}(undef, 0), # delta_s_gam
Array{Array{Float64,2},1}(undef, 0), # delta_v_bet
Array{Array{Float64,2},1}(undef, 0), # delta_s_bet
Array{Array{Float64,1},1}(undef, 0), # mu
Array{Array{Float64,1},1}(undef, 0), # stddev
Array{Array{Float64,1},1}(undef, 0), # mu_run
Array{Array{Float64,1},1}(undef, 0) # std_run
)
end
"""
struct Model_def holds the functions that will run in a
model based on the hyper_parameters and data
"""
mutable struct Model_def
ff_strstack::Array{Array{String,1},1}
ff_execstack::Array{Array{Function,1},1}
back_strstack::Array{Array{String,1},1}
back_execstack::Array{Array{Function,1},1}
update_strstack::Array{Array{String,1},1}
update_execstack::Array{Array{Function,1},1}
cost_function::Function
Model_def() = new(
[String[]], # ff_strstack
[Function[]], # ff_execstack
[String[]], # back_strstack
[Function[]], # back_execstack
[String[]], # update_strstack
[Function[]], # update_execstack
noop, # cost_function
)
end