forked from facebookresearch/NSVF
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain_wineholder_with_slurm.sh
71 lines (68 loc) · 2.24 KB
/
train_wineholder_with_slurm.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# just for debugging
DATA="Wineholder"
RES="800x800"
ARCH="nsvf_base"
SUFFIX="v1"
DATASET=/private/home/jgu/data/shapenet/release/Synthetic_NSVF/${DATA}
SAVE=/checkpoint/jgu/space/neuralrendering/new_release/$DATA
MODEL=$ARCH$SUFFIX
mkdir -p $SAVE/$MODEL
# By defining the following environment variables
# The code will automatically detect it and trying to submit the code in slurm-based clusters
# We don't need to change the main body of the training code.
export SLURM_ARGS="""{
'job-name': '${DATA}-${MODEL}',
'partition': 'priority',
'comment': 'NeurIPS2020 open-source',
'nodes': 1,
'gpus': 8,
'output': '$SAVE/$MODEL/train.out',
'error': '$SAVE/$MODEL/train.stderr.%j',
'constraint': 'volta32gb',
'local': False}
"""
# start training based on SLURM_ARGS
python train.py ${DATASET} \
--user-dir fairnr \
--task single_object_rendering \
--train-views "0..100" \
--view-resolution $RES \
--max-sentences 1 \
--view-per-batch 2 \
--pixel-per-view 2048 \
--no-preload \
--sampling-on-mask 1.0 --no-sampling-at-reader \
--valid-view-resolution $RES \
--valid-views "100..200" \
--valid-view-per-batch 1 \
--transparent-background "1.0,1.0,1.0" \
--background-stop-gradient \
--arch $ARCH \
--initial-boundingbox ${DATASET}/bbox.txt \
--raymarching-stepsize-ratio 0.125 \
--use-octree \
--discrete-regularization \
--color-weight 128.0 \
--alpha-weight 1.0 \
--optimizer "adam" \
--adam-betas "(0.9, 0.999)" \
--lr-scheduler "polynomial_decay" \
--total-num-update 150000 \
--lr 0.001 \
--clip-norm 0.0 \
--criterion "srn_loss" \
--num-workers 0 \
--seed 2 \
--save-interval-updates 500 --max-update 150000 \
--virtual-epoch-steps 5000 --save-interval 1 \
--half-voxel-size-at "5000,25000,75000" \
--reduce-step-size-at "5000,25000,75000" \
--pruning-every-steps 2500 \
--keep-interval-updates 5 \
--log-format simple --log-interval 1 \
--tensorboard-logdir ${SAVE}/tensorboard/${MODEL} \
--save-dir ${SAVE}/${MODEL}