-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_traintest2.py
144 lines (113 loc) · 4.62 KB
/
run_traintest2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import sys
import os
import numpy as np
from ray import tune
#from ray.tune import CLIReporter
#from ray.tune.schedulers import ASHAScheduler
#from ray.tune.suggest.hyperopt import HyperOptSearch
# set path
deeprad_dir = os.path.abspath(os.path.join(os.getcwd()))
if deeprad_dir not in sys.path:
sys.path.insert(0, deeprad_dir)
# Import deeprad models
from deeprad.traintest_utils import main
from deeprad import utils
pp, fd = utils.pp, utils.fd
np.random.seed(2) # TODO: confirm right location?
def parse_argv(arg_str, argv, arg_dict, is_bool=False):
search_arg_str = '--' + arg_str
if search_arg_str in argv:
i = argv.index(search_arg_str)
return bool(int(argv[i + 1])) if is_bool else int(argv[i + 1])
else:
return arg_dict[arg_str]
def update_arg_dict(arg_dict):
"""Parse user args and add to dictionary."""
if len(sys.argv) > 1:
argv = sys.argv[1:]
for arg in argv:
if '--' not in arg: continue
assert arg.split('--')[-1] in arg_dict, \
'{} not an arg: {}'.format(arg, arg_dict.keys())
arg_dict['max_epochs'] = parse_argv('max_epochs', argv, arg_dict, is_bool=False)
arg_dict['batch_size'] = parse_argv('batch_size', argv, arg_dict, is_bool=False)
arg_dict['data_limit'] = parse_argv('data_limit', argv, arg_dict, is_bool=False)
arg_dict['run_hparam'] = parse_argv('run_hparam', argv, arg_dict, is_bool=True)
arg_dict['run_gpu'] = parse_argv('run_gpu', argv, arg_dict, is_bool=True)
return arg_dict
if __name__ == "__main__":
pretrained_model_fpath = None
# os.path.join(deeprad_dir, 'models', pretrained_model, pretrained_model + '.pt')
torch.cuda.empty_cache()
arg_dict = {
'max_epochs': 15,
'batch_size': 5,
'data_limit': None,
'run_hparam': False,
'run_gpu': True}
try:
arg_dict = update_arg_dict(arg_dict)
print('\nPress Enter to confirm user arg:')
[print('\t{}: {}'.format(k, v)) for k, v in arg_dict.items()]
input('...')
except Exception as e:
print('Skip arg dict. {}'.format(e))
# ---------------------------------------------------------------------------------
# Hyperparameters
# ---------------------------------------------------------------------------------
out_dir = os.path.join(deeprad_dir, "data", "traintest3", "out_data2")
in_dir = os.path.join(deeprad_dir, "data", "traintest3", "in_data2")
max_epochs = arg_dict['max_epochs']
batch_size = arg_dict['batch_size']
data_limit = arg_dict['data_limit']
run_hparam = arg_dict['run_hparam']
run_gpu = arg_dict['run_gpu']
hparam = {
'max_epochs': max_epochs,
'batch_size': batch_size}
# Define model directory
now = utils.time_str()
nullify = 'delete_me' if data_limit else 'model'
model_fname = '{}_target_{}'.format(nullify, now)
if not run_hparam:
hparam['learning_rate'] = 0.002 #0.000203228544324115 #0.00184
hparam['weight_decay'] = 1.2697111322756407e-05 #0.00020957
hparam['f1'] = 16
hparam['k1'] = 3
model, outputs_test, train_loss_arr, test_loss_arr, out_fpaths = traintest_utils.main(
hparam, model_fname, None, checkpoint_dir=None, in_dir=in_dir, out_dir=out_dir,
run_hparam=run_hparam, data_limit=data_limit, run_gpu=run_gpu,
pretrained_model_fpath=pretrained_model_fpath)
# ---------------------------
# Save data
# ---------------------------
model_fpath, test_loss_img_fpath, learning_loss_img_fpath = out_fpaths
torch.save(model, model_fpath)
# Save test image, losses
# np.save(test_loss_arr_fpath, test_loss_arr)
traintest_utils.viz_loss(
outputs_test, img_fpath=test_loss_img_fpath, img_title='Test Loss', show_plot=False)
print('Saved testing model, loss image and data: {}'.format(model_fname))
# ---------------------------
# Save data
# ---------------------------
traintest_utils.viz_learning_curve(
train_loss_arr, test_loss_arr, model_fname, learning_loss_img_fpath)
else:
hparam['f1'] = tune.grid_search([24, 48, 64, 128])
hparam['k1'] = 3 #tune.grid_search([3, 4, 6])
hparam['learning_rate'] = 0.000203228544324115 # tune.uniform(1e-5, 1e-2)
hparam['weight_decay'] = 1.2697111322756407e-05 # tune.uniform(1e-5, 1e-1)
gpus_per_trial = 1
cpu_num = 2
num_samples = 4 # number of times to sample from parameter space
result = tune.run(
main,
stop={"training_iteration": 5},
config=hparam,
num_samples=num_samples,
resources_per_trial={"cpu": cpu_num, "gpu": gpus_per_trial})
# tensorboard --logdir ~/ray_results
print(result.dataframe())
result.dataframe().to_csv(
os.path.join(deeprad_dir, 'models', '{}_result_df.csv'.format(utils.time_str())))