Skip to content
This repository has been archived by the owner on Jun 3, 2020. It is now read-only.

Commit

Permalink
Merge pull request #30 from Oslandia/models
Browse files Browse the repository at this point in the history
Models
  • Loading branch information
delhomer authored Mar 28, 2018
2 parents 05caabf + 91e2213 commit 99456a7
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 13 deletions.
124 changes: 123 additions & 1 deletion sources/feature_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,15 @@ def __init__(self, network_name="mapillary", image_size=512, nb_channels=3,
shape=[None, self._nb_labels])
if netsize == "small":
self.add_layers_3_1()
else:
elif netsize == "medium":
self.add_layers_6_2()
elif netsize == "vgg":
self.add_vgg_layers()
elif netsize == "inception":
self.add_inception_layers()
else:
utils.logger.error("Unsupported network.")
sys.exit(1)
self.compute_loss()
self.optimize()
self._cm = self.compute_dashboard(self._Y, self._Y_pred)
Expand Down Expand Up @@ -112,6 +119,121 @@ def add_layers_6_2(self):
layer = self.fullyconnected_layer(2, self._is_training, layer, 1024, 512, self._dropout)
return self.output_layer(layer, 512)

def add_vgg_layers(self):
"""Build the structure of a convolutional neural network from image data `input_layer`
to the last hidden layer on the model of a similar manner than VGG-net (see Simonyan &
Zisserman, Very Deep Convolutional Networks for Large-Scale Image Recognition, arXiv
technical report, 2014) ; not necessarily the *same* structure, as the input shape is not
necessarily identical
Returns
-------
tensor
Output layer of the neural network, *i.e.* a 1 X 1 X nb_class structure that contains
model predictions
"""
layer = self.convolutional_layer(1, self._is_training, self._X, self._nb_channels, 3, 64)
layer = self.maxpooling_layer(1, layer, 2, 2)
layer = self.convolutional_layer(2, self._is_training, layer, 64, 3, 128)
layer = self.maxpooling_layer(2, layer, 2, 2)
layer = self.convolutional_layer(3, self._is_training, layer, 128, 3, 256)
layer = self.convolutional_layer(4, self._is_training, layer, 256, 3, 256)
layer = self.maxpooling_layer(3, layer, 2, 2)
layer = self.convolutional_layer(5, self._is_training, layer, 256, 3, 512)
layer = self.convolutional_layer(6, self._is_training, layer, 512, 3, 512)
layer = self.maxpooling_layer(4, layer, 2, 2)
layer = self.convolutional_layer(7, self._is_training, layer, 512, 3, 512)
layer = self.convolutional_layer(8, self._is_training, layer, 512, 3, 512)
layer = self.maxpooling_layer(5, layer, 2, 2)
last_layer_dim = self.get_last_conv_layer_dim(32, 512)
layer = self.fullyconnected_layer(1, self._is_training, layer, last_layer_dim, 1024, self._dropout)
return self.output_layer(layer, 1024)

def inception_block(self, counter, input_layer, input_depth, depth_1,
depth_3_reduce, depth_3, depth_5_reduce, depth_5, depth_pool):
"""Apply an Inception block (concatenation of convoluted inputs, see Szegedy et al, 2014)
Concatenation of several filtered outputs:
- 1*1 convoluted image
- 1*1 and 3*3 convoluted images
- 1*1 and 5*5 convoluted images
- 3*3 max-pooled and 1*1 convoluted images
Parameters
----------
counter : integer
Inception block ID
input_layer : tensor
Input layer that has to be transformed in the Inception block
input_depth : integer
Input layer depth
depth_1 : integer
Depth of the 1*1 convoluted output
depth_3_reduce : integer
Hidden layer depth, between 1*1 and 3*3 convolution
depth_3 : integer
Depth of the 3*3 convoluted output
depth_5_reduce : integer
Hidden layer depth, between 1*1 and 5*5 convolution
depth_5 : integer
Depth of the 5*5 convoluted output
depth_pool : integer
Depth of the max-pooled output (after 1*1 convolution)
Returns
-------
tensor
Output layer, after Inception block treatments
"""
filter_1_1 = self.convolutional_layer("i"+str(counter)+"1", self._is_training, input_layer,
input_depth, 1, depth_1)
filter_3_3 = self.convolutional_layer("i"+str(counter)+"3a", self._is_training, input_layer,
input_depth, 1, depth_3_reduce)
filter_3_3 = self.convolutional_layer("i"+str(counter)+"3b", self._is_training, filter_3_3,
depth_3_reduce, 3, depth_3)
filter_5_5 = self.convolutional_layer("i"+str(counter)+"5a", self._is_training, input_layer,
input_depth, 1, depth_5_reduce)
filter_5_5 = self.convolutional_layer("i"+str(counter)+"5b", self._is_training, filter_5_5,
depth_5_reduce, 5, depth_5)
filter_pool = self.maxpooling_layer("i"+str(counter), input_layer, 3, 1)
filter_pool = self.convolutional_layer("i"+str(counter)+"p", self._is_training,
filter_pool, input_depth, 1, depth_pool)
return tf.concat([filter_1_1, filter_3_3, filter_5_5, filter_pool], axis=3)

def add_inception_layers(self):
"""Build the structure of a convolutional neural network from image data `input_layer`
to the last hidden layer on the model of a similar manner than Inception networks (see
Szegedy et al, Going Deeper with Convolutions, arXiv technical report, 2014) ; not
necessarily the *same* structure, as the input shape is not necessarily identical
Returns
-------
tensor
Output layer of the neural network, *i.e.* a 1 X 1 X nb_class structure that contains
model predictions
"""
layer = self.convolutional_layer(1, self._is_training, self._X, self._nb_channels, 7, 64,
2)
layer = self.maxpooling_layer(1, layer, 3, 2)
layer = self.convolutional_layer(2, self._is_training, layer, 64, 3, 192)
layer = self.maxpooling_layer(2, layer, 3, 2)
layer = self.inception_block('3a', layer, 192, 64, 96, 128, 16, 32, 32)
layer = self.inception_block('3b', layer, 256, 128, 128, 192, 32, 96, 64)
layer = self.maxpooling_layer(3, layer, 3, 2)
layer = self.inception_block('4a', layer, 480, 192, 96, 208, 16, 48, 64)
layer = self.inception_block('4b', layer, 512, 160, 112, 224, 24, 64, 64)
layer = self.inception_block('4c', layer, 512, 128, 128, 256, 24, 64, 64)
layer = self.inception_block('4d', layer, 512, 112, 144, 288, 32, 64, 64)
layer = self.inception_block('4e', layer, 528, 256, 160, 320, 32, 128, 128)
layer = self.maxpooling_layer(4, layer, 3, 2)
layer = self.inception_block('5a', layer, 832, 256, 160, 320, 32, 128, 128)
layer = self.inception_block('5b', layer, 832, 384, 192, 384, 48, 128, 128)
layer = tf.nn.avg_pool(layer, ksize=[1, 7, 7, 1], strides=[1, 1, 1, 1],
padding="VALID", name="avg_pool")
layer = tf.reshape(layer, [-1, 1024])
layer = tf.nn.dropout(layer, self._dropout, name="final_dropout")
return self.output_layer(layer, 1024)

def compute_loss(self):
"""Define the loss tensor as well as the optimizer; it uses a decaying
learning rate following the equation
Expand Down
6 changes: 3 additions & 3 deletions sources/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,17 @@
args = parser.parse_args()

# instance name decomposition (instance name = name + image size + network size)
_, image_size, network_size, _, aggregate_value, _, _ = args.name.split('_')
_, image_size, network, _, aggregate_value, _, _ = args.name.split('_')
image_size = int(image_size)

if image_size > 1024:
utils.logger.error(("Unsupported image size. Please provide a "
"reasonable image size (less than 1024)"))
sys.exit(1)

if not network_size in ["small", "medium"]:
if not network in ["small", "medium", "vgg", "inception", "inception-v2", "inception-v3"]:
utils.logger.error(("Unsupported network size. "
"Please choose 'small' or 'medium'."))
"Please choose 'small' or 'medium'.")) # TO UPDATE
sys.exit(1)

if not args.model in ["feature_detection", "semantic_segmentation"]:
Expand Down
20 changes: 11 additions & 9 deletions sources/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,14 @@
parser.add_argument('-n', '--name', default="cnnmapil", nargs='?',
help=("Model name that will be used for results, "
"checkout and graph storage on file system"))
parser.add_argument('-ns', '--network-size', default='small',
help=("Neural network size, either 'small' or 'medium'"
parser.add_argument('-N', '--network', default='small',
help=("Neural network size, either 'small', 'medium',"
" 'vgg' or 'inception' "
"('small' refers to 3 conv/pool blocks and 1 "
"fully-connected layer, and 'medium' refers to 6"
"conv/pool blocks and 2 fully-connected layers)"))
"fully-connected layer; 'medium' refers to 6"
"conv/pool blocks and 2 fully-connected layers;"
" 'vgg' and 'inception' refer to "
"state-of-the-art networks)"))
parser.add_argument('-r', '--learning-rate', required=False, nargs="+",
default=[0.01, 1000, 0.95], type=float,
help=("List of learning rate components (starting LR, "
Expand Down Expand Up @@ -127,10 +130,10 @@
"each batch (convex weights with min at 50%)..."))
sys.exit(1)

if not args.network_size in ["small", "medium"]:
if not args.network in ["small", "medium", "vgg", "inception"]:
utils.logger.error(("Unsupported network size description"
"Please use this parameter with 'small' or "
"'medium' values"))
"Please use this parameter with 'small', "
"'medium', 'vgg' or 'inception' values"))
sys.exit(1)

if not args.model in ["feature_detection", "semantic_segmentation"]:
Expand All @@ -145,7 +148,7 @@

# Instance name (name + image size + network size + batch_size + aggregate? + dropout +
# learning_rate)
instance_args = [args.name, args.image_size, args.network_size, args.batch_size,
instance_args = [args.name, args.image_size, args.network, args.batch_size,
aggregate_value, args.dropout, utils.list_to_str(args.learning_rate)]
instance_name = utils.list_to_str(instance_args, "_")

Expand Down Expand Up @@ -233,4 +236,3 @@
utils.logger.error(("Unknown type of model. Please use "
"'feature_detection' or 'semantic_segmentation'"))
sys.exit(1)

0 comments on commit 99456a7

Please sign in to comment.