working on TED demo

hexins · Apr 12, 2017 · addcc4e · addcc4e
1 parent 77ee111
commit addcc4e
Show file tree

Hide file tree

Showing 24 changed files with 392 additions and 110 deletions.
diff --git a/cfg/darknet9000.cfg b/cfg/darknet9000.cfg
@@ -0,0 +1,205 @@
+[net]
+# Training
+# batch=128
+# subdivisions=4
+# Testing
+batch = 1
+subdivisions = 1
+height=448
+width=448
+max_crop=512
+channels=3
+momentum=0.9
+decay=0.0005
+
+learning_rate=0.001
+policy=poly
+power=4
+max_batches=100000
+
+angle=7
+hue=.1
+saturation=.75
+exposure=.75
+aspect=.75
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=1
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[convolutional]
+filters=9418
+size=1
+stride=1
+pad=1
+activation=linear
+
+[avgpool]
+
+[softmax]
+groups=1
+tree=data/9k.tree
+
+[cost]
+type=masked
+
diff --git a/cfg/imagenet9k.hierarchy.dataset b/cfg/imagenet9k.hierarchy.dataset
@@ -0,0 +1,9 @@
+classes=9418
+train  = data/9k.train.list
+valid  = /data/imagenet/imagenet1k.valid.list
+leaves = data/imagenet1k.labels
+backup = /home/pjreddie/backup/
+labels = data/9k.labels
+names  = data/9k.names
+top=5
+
diff --git a/cfg/yolo9000.cfg b/cfg/yolo9000.cfg
@@ -1,17 +1,24 @@
 [net]
+# Testing
+# batch=1
+# subdivisions=1
+# Training
+batch=64
+subdivisions=8
 batch=1
 subdivisions=1
-height=416
-width=416
+height=544
+width=544
 channels=3
 momentum=0.9
 decay=0.0005
 
-learning_rate=0.00001
-max_batches = 242200
+learning_rate=0.001
+burn_in=1000
+max_batches = 500200
 policy=steps
-steps=500,200000,240000
-scales=10,.1,.1
+steps=400000,450000
+scales=.1,.1
 
 hue=.1
 saturation=.75

diff --git a/src/classifier.c b/src/classifier.c
@@ -1113,27 +1113,9 @@ void run_classifier(int argc, char **argv)
     }
 
     char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
-    int *gpus = 0;
-    int gpu = 0;
-    int ngpus = 0;
-    if(gpu_list){
-        printf("%s\n", gpu_list);
-        int len = strlen(gpu_list);
-        ngpus = 1;
-        int i;
-        for(i = 0; i < len; ++i){
-            if (gpu_list[i] == ',') ++ngpus;
-        }
-        gpus = calloc(ngpus, sizeof(int));
-        for(i = 0; i < ngpus; ++i){
-            gpus[i] = atoi(gpu_list);
-            gpu_list = strchr(gpu_list, ',')+1;
-        }
-    } else {
-        gpu = gpu_index;
-        gpus = &gpu;
-        ngpus = 1;
-    }
+    int ngpus;
+    int *gpus = read_intlist(gpu_list, &ngpus, gpu_index);
+
 
     int cam_index = find_int_arg(argc, argv, "-c", 0);
     int top = find_int_arg(argc, argv, "-t", 0);

diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu
@@ -286,7 +286,7 @@ void push_convolutional_layer(convolutional_layer layer)
     }
 }
 
-void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch)
+void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t)
 {
     scal_ongpu(n, B1, m, 1);
     scal_ongpu(n, B2, v, 1);
@@ -296,7 +296,7 @@ void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2,
     mul_ongpu(n, d, 1, d, 1);
     axpy_ongpu(n, (1-B2), d, 1, v, 1);
 
-    adam_gpu(n, w, m, v, B1, B2, rate/batch, eps, 1000);
+    adam_gpu(n, w, m, v, B1, B2, rate/batch, eps, t);
     fill_ongpu(n, 0, d, 1);
 }
 
@@ -305,10 +305,10 @@ void update_convolutional_layer_gpu(layer l, int batch, float learning_rate, flo
     int size = l.size*l.size*l.c*l.n;
 
     if(l.adam){
-        adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, size, batch);
-        adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, l.n, batch);
+        adam_update_gpu(l.weights_gpu, l.weight_updates_gpu, l.m_gpu, l.v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, size, batch, l.t);
+        adam_update_gpu(l.biases_gpu, l.bias_updates_gpu, l.bias_m_gpu, l.bias_v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, l.n, batch, l.t);
         if(l.scales_gpu){
-            adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, l.n, batch);
+            adam_update_gpu(l.scales_gpu, l.scale_updates_gpu, l.scale_m_gpu, l.scale_v_gpu, l.B1, l.B2, l.eps, decay, learning_rate, l.n, batch, l.t);
         }
     }else{
         axpy_ongpu(size, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);

diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
@@ -188,7 +188,7 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
 
     // float scale = 1./sqrt(size*size*c);
     float scale = sqrt(2./(size*size*c));
-    scale = .02;
+    //scale = .02;
     //for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1);
     for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_normal();
     int out_w = convolutional_out_width(l);

diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h
@@ -19,7 +19,7 @@ void pull_convolutional_layer(convolutional_layer layer);
 
 void add_bias_gpu(float *output, float *biases, int batch, int n, int size);
 void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size);
-void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch);
+void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t);
 #ifdef CUDNN
 void cudnn_convolutional_setup(layer *l);
 #endif

diff --git a/src/data.c b/src/data.c
@@ -102,7 +102,7 @@ matrix load_image_paths(char **paths, int n, int w, int h)
     return X;
 }
 
-matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
+matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center)
 {
     int i;
     matrix X;
@@ -112,7 +112,12 @@ matrix load_image_augment_paths(char **paths, int n, int min, int max, int size,
 
     for(i = 0; i < n; ++i){
         image im = load_image_color(paths[i], 0, 0);
-        image crop = random_augment_image(im, angle, aspect, min, max, size);
+        image crop;
+        if(center){
+            crop = center_crop_image(im, size, size);
+        } else {
+            crop = random_augment_image(im, angle, aspect, min, max, size);
+        }
         int flip = rand()%2;
         if (flip) flip_image(crop);
         random_distort_image(crop, hue, saturation, exposure);
@@ -742,7 +747,7 @@ void *load_thread(void *ptr)
     } else if (a.type == REGRESSION_DATA){
         *a.d = load_data_regression(a.paths, a.n, a.m, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
     } else if (a.type == CLASSIFICATION_DATA){
-        *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
+        *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.center);
     } else if (a.type == SUPER_DATA){
         *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
     } else if (a.type == WRITING_DATA){
@@ -890,18 +895,18 @@ data load_data_regression(char **paths, int n, int m, int min, int max, int size
     if(m) paths = get_random_paths(paths, n, m);
     data d = {0};
     d.shallow = 0;
-    d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
+    d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, 0);
     d.y = load_regression_labels_paths(paths, n);
     if(m) free(paths);
     return d;
 }
 
-data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
+data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure, int center)
 {
     if(m) paths = get_random_paths(paths, n, m);
     data d = {0};
     d.shallow = 0;
-    d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
+    d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, center);
     d.y = load_labels_paths(paths, n, labels, k, hierarchy);
     if(m) free(paths);
     return d;
@@ -914,7 +919,7 @@ data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size
     d.w = size;
     d.h = size;
     d.shallow = 0;
-    d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
+    d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure, 0);
     d.y = load_tags_paths(paths, n, k);
     if(m) free(paths);
     return d;