forked from sigmaai/semantic-segmentation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
executable file
·622 lines (482 loc) · 27.2 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
#
# utilities for semantic segmentation
# autonomous golf cart project
# (c) Yongyang Nie, Michael Meng
#
import cv2
import configs as configs
import numpy as np
import pandas
from collections import namedtuple
import os
import glob
import random
import json
import gc
from keras.utils import to_categorical
from keras.callbacks import Callback
from keras.utils.data_utils import Sequence
Label = namedtuple( 'Label' , [
'name' , # The identifier of this label, e.g. 'car', 'person', ... .
# We use them to uniquely name a class
'id' , # An integer ID that is associated with this label.
# The IDs are used to represent the label in ground truth images
# An ID of -1 means that this label does not have an ID and thus
# is ignored when creating ground truth images (e.g. license plate).
# Do not modify these IDs, since exactly these IDs are expected by the
# evaluation server.
'trainId' , # Feel free to modify these IDs as suitable for your method. Then create
# ground truth images with train IDs, using the tools provided in the
# 'preparation' folder. However, make sure to validate or submit results
# to our evaluation server using the regular IDs above!
# For trainIds, multiple labels might have the same ID. Then, these labels
# are mapped to the same class in the ground truth images. For the inverse
# mapping, we use the label that is defined first in the list below.
# For example, mapping all void-type classes to the same ID in training,
# might make sense for some approaches.
# Max value is 255!
'category' , # The name of the category that this label belongs to
'categoryId' , # The ID of this category. Used to create ground truth images
# on category level.
'hasInstances', # Whether this label distinguishes between single instances or not
'ignoreInEval', # Whether pixels having this class as ground truth label are ignored
# during evaluations or not
'color' , # The color of this label
] )
#--------------------------------------------------------------------------------
# A list of all labels
#--------------------------------------------------------------------------------
# Please adapt the train IDs as appropriate for your approach.
# Note that you might want to ignore labels with ID 255 during training.
# Further note that the current train IDs are only a suggestion. You can use whatever you like.
# Make sure to provide your results using the original IDs and not the training IDs.
# Note that many IDs are ignored in evaluation and thus you never need to predict these!
labels = [
# name id trainId category catId hasInstances ignoreInEval color
Label( 'unlabeled' , 0 , 255 , 'void' , 0 , False , True , ( 0, 0, 0) ),
Label( 'ego vehicle' , 1 , 255 , 'void' , 0 , False , True , ( 0, 0, 0) ),
Label( 'rectification border' , 2 , 255 , 'void' , 0 , False , True , ( 0, 0, 0) ),
Label( 'out of roi' , 3 , 255 , 'void' , 0 , False , True , ( 0, 0, 0) ),
Label( 'static' , 4 , 255 , 'void' , 0 , False , True , ( 0, 0, 0) ),
Label( 'dynamic' , 5 , 255 , 'void' , 0 , False , True , (111, 74, 0) ),
Label( 'ground' , 6 , 255 , 'void' , 0 , False , True , ( 81, 0, 81) ),
Label( 'road' , 7 , 0 , 'flat' , 1 , False , False , (128, 64,128) ),
Label( 'sidewalk' , 8 , 1 , 'flat' , 1 , False , False , (244, 35,232) ),
Label( 'parking' , 9 , 255 , 'flat' , 1 , False , True , (250,170,160) ),
Label( 'rail track' , 10 , 255 , 'flat' , 1 , False , True , (230,150,140) ),
Label( 'building' , 11 , 2 , 'construction' , 2 , False , False , ( 70, 70, 70) ),
Label( 'wall' , 12 , 3 , 'construction' , 2 , False , False , (102,102,156) ),
Label( 'fence' , 13 , 4 , 'construction' , 2 , False , False , (190,153,153) ),
Label( 'guard rail' , 14 , 255 , 'construction' , 2 , False , True , (180,165,180) ),
Label( 'bridge' , 15 , 255 , 'construction' , 2 , False , True , (150,100,100) ),
Label( 'tunnel' , 16 , 255 , 'construction' , 2 , False , True , (150,120, 90) ),
Label( 'pole' , 17 , 5 , 'object' , 3 , False , False , (153,153,153) ),
Label( 'polegroup' , 18 , 255 , 'object' , 3 , False , True , (153,153,153) ),
Label( 'traffic light' , 19 , 6 , 'object' , 3 , False , False , (250,170, 30) ),
Label( 'traffic sign' , 20 , 7 , 'object' , 3 , False , False , (220,220, 0) ),
Label( 'vegetation' , 21 , 8 , 'nature' , 4 , False , False , (107,142, 35) ),
Label( 'terrain' , 22 , 9 , 'nature' , 4 , False , False , (152,251,152) ),
Label( 'sky' , 23 , 10 , 'sky' , 5 , False , False , ( 70,130,180) ),
Label( 'person' , 24 , 11 , 'human' , 6 , True , False , (220, 20, 60) ),
Label( 'rider' , 25 , 12 , 'human' , 6 , True , False , (255, 0, 0) ),
Label( 'car' , 26 , 13 , 'vehicle' , 7 , True , False , ( 0, 0,142) ),
Label( 'truck' , 27 , 14 , 'vehicle' , 7 , True , False , ( 0, 0, 70) ),
Label( 'bus' , 28 , 15 , 'vehicle' , 7 , True , False , ( 0, 60,100) ),
Label( 'caravan' , 29 , 255 , 'vehicle' , 7 , True , True , ( 0, 0, 90) ),
Label( 'trailer' , 30 , 255 , 'vehicle' , 7 , True , True , ( 0, 0,110) ),
Label( 'train' , 31 , 16 , 'vehicle' , 7 , True , False , ( 0, 80,100) ),
Label( 'motorcycle' , 32 , 17 , 'vehicle' , 7 , True , False , ( 0, 0,230) ),
Label( 'bicycle' , 33 , 18 , 'vehicle' , 7 , True , False , (119, 11, 32) ),
Label( 'license plate' , -1 , -1 , 'vehicle' , 7 , False , True , ( 0, 0,142) )
]
def load_image(path):
img = cv2.imread(path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (configs.img_width, configs.img_height))
return img
def convert_class_to_rgb(image_labels, threshold=0.80):
# convert any pixel > threshold to 1
# convert any pixel < threshold to 0
# then use bitwise_and
output = np.zeros((configs.img_height / 2, configs.img_width / 2, 3), dtype=np.uint8)
for i in range(34):
split = image_labels[:, :, i]
split[split > threshold] = 1
split[split < threshold] = 0
split[:] *= 255
split = split.astype(np.uint8)
bg = np.zeros((configs.img_height / 2, configs.img_width / 2, 3), dtype=np.uint8)
bg[:, :, 0].fill(labels[i][7][2])
bg[:, :, 1].fill(labels[i][7][1])
bg[:, :, 2].fill(labels[i][7][0])
res = cv2.bitwise_and(bg, bg, mask=split)
output = cv2.addWeighted(output, 1.0, res, 1.0, 0)
return output
# The new training generator
def fusion_generator(df, resize_shape, n_classes=34, batch_size=1, horizontal_flip=True,
vertical_flip=False, brightness=0.1, rotation=5.0, zoom=0.1, training=True):
"""
the default fusion training generator for mid fusion ICNet
:param df: the dataframe. provided by the csv
:param crop_shape: cropped size of the image.
:param n_classes: number of classes to classify
:param batch_size: the training batch size. usually default to 5
:param horizontal_flip: boolean, apply hori flip to image during training.
:param vertical_flip: boolean, apply vert flip to image during training
:param brightness:
:param rotation:
:param zoom:
:param training: boolean, if yes, apply augmentation
:return:
"""
X_color = np.zeros((batch_size, resize_shape[1], resize_shape[0], 3), dtype='float32')
X_depth = np.zeros((batch_size, resize_shape[1], resize_shape[0], 3), dtype='float32')
Y1 = np.zeros((batch_size, resize_shape[1] // 4, resize_shape[0] // 4, n_classes), dtype='float32')
Y2 = np.zeros((batch_size, resize_shape[1] // 8, resize_shape[0] // 8, n_classes), dtype='float32')
Y3 = np.zeros((batch_size, resize_shape[1] // 16, resize_shape[0] // 16, n_classes), dtype='float32')
while 1:
j = 0
for index in np.random.permutation(len(df)):
image, image_depth, label = _load_rgb_depth_image_label(df[index])
image = cv2.resize(image, resize_shape)
image_depth = cv2.resize(image_depth, resize_shape)
label = cv2.resize(label, resize_shape)
# Do augmentation (only if training)
if training:
if horizontal_flip and random.randint(0, 1):
image = cv2.flip(image, 1)
label = cv2.flip(label, 1)
if vertical_flip and random.randint(0, 1):
image = cv2.flip(image, 0)
label = cv2.flip(label, 0)
if brightness and random.randint(0, 1):
factor = 1.0 + abs(random.gauss(mu=0.0, sigma=brightness))
if random.randint(0, 1):
factor = 1.0 / factor
table = np.array([((i / 255.0) ** factor) * 255 for i in np.arange(0, 256)]).astype(np.uint8)
image = cv2.LUT(image, table)
# get rotation or zoom
if rotation and random.randint(0, 1):
angle = random.gauss(mu=0.0, sigma=rotation)
else:
angle = 0.0
if zoom and random.randint(0, 1):
scale = random.gauss(mu=1.0, sigma=zoom)
else:
scale = 1.0
# perform rotation or zoom
if rotation or zoom:
M = cv2.getRotationMatrix2D((image.shape[1] // 2, image.shape[0] // 2), angle, scale)
image = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))
label = cv2.warpAffine(label, M, (label.shape[1], label.shape[0]))
X_color[j, :, :, :] = image
X_depth[j, :, :, :] = image_depth
Y1[j] = to_categorical(cv2.resize(label, (label.shape[1] // 4, label.shape[0] // 4)), n_classes)
Y2[j] = to_categorical(cv2.resize(label, (label.shape[1] // 8, label.shape[0] // 8)), n_classes)
Y3[j] = to_categorical(cv2.resize(label, (label.shape[1] // 16, label.shape[0] // 16)), n_classes)
j += 1
if j == batch_size:
break
yield [X_color, X_depth], [Y1, Y2, Y3]
# The new training generator
def early_fusion_generator(df, crop_shape, n_classes=34, batch_size=1, resize_shape=None, horizontal_flip=True,
vertical_flip=False, brightness=0.1, rotation=5.0, zoom=0.1, training=True):
X = np.zeros((batch_size, crop_shape[1], crop_shape[0], 6), dtype='float32')
Y = np.zeros((batch_size, crop_shape[1] // 4, crop_shape[0] // 4, n_classes), dtype='float32')
while 1:
j = 0
for index in np.random.permutation(len(df)):
image, image_depth, label = _load_rgb_depth_image_label(df[index])
if resize_shape:
image = cv2.resize(image, resize_shape)
image_depth = cv2.resize(image_depth, resize_shape)
label = cv2.resize(label, resize_shape)
# Do augmentation (only if training)
if training:
if horizontal_flip and random.randint(0, 1):
image = cv2.flip(image, 1)
label = cv2.flip(label, 1)
if vertical_flip and random.randint(0, 1):
image = cv2.flip(image, 0)
label = cv2.flip(label, 0)
if brightness and random.randint(0, 1):
factor = 1.0 + abs(random.gauss(mu=0.0, sigma=brightness))
if random.randint(0, 1):
factor = 1.0 / factor
table = np.array([((i / 255.0) ** factor) * 255 for i in np.arange(0, 256)]).astype(np.uint8)
image = cv2.LUT(image, table)
# get rotation or zoom
if rotation and random.randint(0, 1):
angle = random.gauss(mu=0.0, sigma=rotation)
else:
angle = 0.0
if zoom and random.randint(0, 1):
scale = random.gauss(mu=1.0, sigma=zoom)
else:
scale = 1.0
# perform rotation or zoom
if rotation or zoom:
M = cv2.getRotationMatrix2D((image.shape[1] // 2, image.shape[0] // 2), angle, scale)
image = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))
label = cv2.warpAffine(label, M, (label.shape[1], label.shape[0]))
X[j] = np.concatenate((image, image_depth), axis=2)
Y[j] = to_categorical(cv2.resize(label, (label.shape[1] // 4, label.shape[0] // 4)), n_classes)
print(Y[j].shape)
exit(0)
j += 1
if j == batch_size:
break
yield X, Y
# The new training generator
def generator(df, crop_shape, n_classes=34, batch_size=1, resize_shape=None, horizontal_flip=True,
vertical_flip=False, brightness=0.1, rotation=5.0, zoom=0.1, training=True):
X = np.zeros((batch_size, crop_shape[1], crop_shape[0], 3), dtype='float32')
Y = np.zeros((batch_size, crop_shape[1] // 4, crop_shape[0] // 4, n_classes), dtype='float32')
while 1:
j = 0
for index in np.random.permutation(len(df)):
image_path = df[index][0]
label_path = df[index][1]
image = cv2.imread(image_path, 1)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
label = cv2.imread(label_path, 0)
if resize_shape:
image = cv2.resize(image, resize_shape)
label = cv2.resize(label, resize_shape)
# Do augmentation (only if training)
if training:
if horizontal_flip and random.randint(0, 1):
image = cv2.flip(image, 1)
label = cv2.flip(label, 1)
if vertical_flip and random.randint(0, 1):
image = cv2.flip(image, 0)
label = cv2.flip(label, 0)
if brightness and random.randint(0, 1):
factor = 1.0 + abs(random.gauss(mu=0.0, sigma=brightness))
if random.randint(0, 1):
factor = 1.0 / factor
table = np.array([((i / 255.0) ** factor) * 255 for i in np.arange(0, 256)]).astype(np.uint8)
image = cv2.LUT(image, table)
# get rotation or zoom
if rotation and random.randint(0, 1):
angle = random.gauss(mu=0.0, sigma=rotation)
else:
angle = 0.0
if zoom and random.randint(0, 1):
scale = random.gauss(mu=1.0, sigma=zoom)
else:
scale = 1.0
# perform rotation or zoom
if rotation or zoom:
M = cv2.getRotationMatrix2D((image.shape[1] // 2, image.shape[0] // 2), angle, scale)
image = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))
label = cv2.warpAffine(label, M, (label.shape[1], label.shape[0]))
X[j] = image
Y[j] = to_categorical(cv2.resize(label, (label.shape[1] // 4, label.shape[0] // 4)), n_classes)
j += 1
if j == batch_size:
break
yield X, Y
##############################################################
################ City Scape Generator ########################
##############################################################
# *** Not working currently ***
class CityScapeGenerator(Sequence):
def __init__(self, csv_path, mode='training', n_classes=34, batch_size=1, resize_shape=None, crop_shape=(640, 320),
horizontal_flip=False, vertical_flip=False, brightness=0.1, rotation=0.0, zoom=0.0):
"""
Init method for the CityScape dataset generator. This can be used for
Any type of Keras models (Not tested). Currently under development
for ICNet architecture.
:param csv_path: the path of the csv file which contains paths to labels
:param mode: mode of the generator
:param n_classes: number of classes in segmentation. CityScape default 34
:param batch_size: generator batch size
:param resize_shape: you can either resize the img or crop
:param crop_shape: you can either resize the img or crop. cropping is random
:param horizontal_flip: whether or not to perform hori flip
:param vertical_flip: whether or not to perform vert flip
:param brightness: for data augmentation. If != 0, adjust brightness of image.
:param rotation: For data augmentation. If != 0, rotate input image.
:param zoom: For data augmentation. If != 0, zooms in.
"""
self.image_path_list, self.label_path_list = _load_data(csv_path)
self.mode = mode
self.n_classes = n_classes
self.batch_size = batch_size
self.resize_shape = resize_shape
self.crop_shape = crop_shape
self.horizontal_flip = horizontal_flip
self.vertical_flip = vertical_flip
self.brightness = brightness
self.rotation = rotation
self.zoom = zoom
# Preallocate memory
if mode == 'training' and self.crop_shape:
self.X = np.zeros((batch_size, crop_shape[1], crop_shape[0], 3), dtype='float32')
self.Y1 = np.zeros((batch_size, crop_shape[1] // 4, crop_shape[0] // 4, self.n_classes), dtype='float32')
self.Y2 = np.zeros((batch_size, crop_shape[1] // 8, crop_shape[0] // 8, self.n_classes), dtype='float32')
self.Y3 = np.zeros((batch_size, crop_shape[1] // 16, crop_shape[0] // 16, self.n_classes), dtype='float32')
elif self.resize_shape:
self.X = np.zeros((batch_size, resize_shape[1], resize_shape[0], 3), dtype='float32')
self.Y1 = np.zeros((batch_size, resize_shape[1] // 4, resize_shape[0] // 4, self.n_classes), dtype='float32')
self.Y2 = np.zeros((batch_size, resize_shape[1] // 8, resize_shape[0] // 8, self.n_classes), dtype='float32')
self.Y3 = np.zeros((batch_size, resize_shape[1] // 16, resize_shape[0] // 16, self.n_classes), dtype='float32')
else:
raise Exception('No image dimensions specified!')
def __len__(self):
return len(self.image_path_list) // self.batch_size
def __getitem__(self, i):
for n, (image_path, label_path) in enumerate(zip(self.image_path_list[i * self.batch_size:(i + 1) * self.batch_size],
self.label_path_list[i * self.batch_size:(i + 1) * self.batch_size])):
image = cv2.imread(image_path, 1)
label = cv2.imread(label_path, 0)
if self.resize_shape:
image = cv2.resize(image, self.resize_shape)
label = cv2.resize(label, self.resize_shape)
# Do augmentation (only if training)
if self.mode == 'training':
if self.horizontal_flip and random.randint(0, 1):
image = cv2.flip(image, 1)
label = cv2.flip(label, 1)
if self.vertical_flip and random.randint(0, 1):
image = cv2.flip(image, 0)
label = cv2.flip(label, 0)
if self.brightness:
factor = 1.0 + abs(random.gauss(mu=0.0, sigma=self.brightness))
if random.randint(0, 1):
factor = 1.0 / factor
table = np.array([((i / 255.0) ** factor) * 255 for i in np.arange(0, 256)]).astype(np.uint8)
image = cv2.LUT(image, table)
if self.rotation:
angle = random.gauss(mu=0.0, sigma=self.rotation)
else:
angle = 0.0
if self.zoom:
scale = random.gauss(mu=1.0, sigma=self.zoom)
else:
scale = 1.0
if self.rotation or self.zoom:
M = cv2.getRotationMatrix2D((image.shape[1] // 2, image.shape[0] // 2), angle, scale)
image = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))
label = cv2.warpAffine(label, M, (label.shape[1], label.shape[0]))
if self.crop_shape:
image, label = _random_crop(image, label, self.crop_shape)
self.X[n] = image
# only keep the useful classes
y1 = _filter_labels(to_categorical(cv2.resize(label, (label.shape[1] // 4, label.shape[0] // 4)), self.n_classes)).transpose()
y2 = _filter_labels(to_categorical(cv2.resize(label, (label.shape[1] // 8, label.shape[0] // 8)), self.n_classes)).transpose()
y3 = _filter_labels(to_categorical(cv2.resize(label, (label.shape[1] // 16, label.shape[0] // 16)), self.n_classes)).transpose()
self.Y1[n] = y1.reshape((label.shape[0] // 4, label.shape[1] // 4, -1))
self.Y2[n] = y2.reshape((label.shape[0] // 8, label.shape[1] // 8, -1))
self.Y3[n] = y3.reshape((label.shape[0] // 16, label.shape[1] // 16, -1))
return self.X, [self.Y1, self.Y2, self.Y3]
def on_epoch_end(self):
# Shuffle dataset for next epoch
c = list(zip(self.image_path_list, self.label_path_list))
random.shuffle(c)
self.image_path_list, self.label_path_list = zip(*c)
# Fix memory leak (Keras bug)
gc.collect()
class PolyDecay:
def __init__(self, initial_lr, power, n_epochs):
self.initial_lr = initial_lr
self.power = power
self.n_epochs = n_epochs
def scheduler(self, epoch):
return self.initial_lr * np.power(1.0 - 1.0 * epoch / self.n_epochs, self.power)
class ExpDecay:
def __init__(self, initial_lr, decay):
self.initial_lr = initial_lr
self.decay = decay
def scheduler(self, epoch):
return self.initial_lr * np.exp(-self.decay * epoch)
# Taken from Mappillary Vistas demo.py
def apply_color_map(image_array, labels):
color_array = np.zeros((image_array.shape[0], image_array.shape[1], 3), dtype=np.uint8)
for label_id, label in enumerate(labels):
# set all pixels with the current label to the color of the current label
color_array[image_array == label_id] = label["color"]
return color_array
# =====================
# Public Helper Methods
# =====================
def load_train_data(cv_path):
labels = pandas.read_csv(cv_path).values
df = []
count = 0
for row in labels:
if os.path.isfile(row[0]) and os.path.isfile(row[1]):
count = count + 1
df.append(row)
print("data processing finished")
print("data frame size: " + str(count))
return df
def load_val_data(cv_path):
labels = pandas.read_csv(cv_path).values
df = []
count = 0
for row in labels:
if os.path.isfile(row[0]) and os.path.isfile(row[1]):
count = count + 1
df.append(row)
print("data processing finished")
print("data frame size: " + str(count))
return df
# ===============
# Private methods
# ===============
def _random_crop(image, label, crop_shape):
if (image.shape[0] != label.shape[0]) or (image.shape[1] != label.shape[1]):
raise Exception('Image and label must have the same dimensions!')
if (crop_shape[0] < image.shape[1]) and (crop_shape[1] < image.shape[0]):
x = random.randrange(image.shape[1] - crop_shape[0])
y = random.randrange(image.shape[0] - crop_shape[1])
return image[y:y + crop_shape[1], x:x + crop_shape[0], :], label[y:y + crop_shape[1], x:x + crop_shape[0]]
else:
raise Exception('Crop shape exceeds image dimensions!')
def _load_rgb_depth_image_label(label_row):
"""
private helper method for loading images and labels
:param label_row: a row of the label csv file, which contains the path to the images and label.
:return: return the rgb image, the depth image, and the label image
"""
image_path = label_row[0]
depth_image_path = label_row[2]
label_path = label_row[1]
image = cv2.imread(image_path, 1)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image_depth = cv2.imread(depth_image_path, 1)
image_depth = cv2.cvtColor(image_depth, cv2.COLOR_BGR2RGB)
label = cv2.imread(label_path, 0)
return image, image_depth, label
def _load_data(csv_path):
labels = pandas.read_csv(csv_path)
img_list_initial = labels[labels.columns[0]].values
label_list_initial = labels[labels.columns[0]].values
img_list = []
label_list = []
count = 0
for i in range(len(img_list)):
if os.path.isfile(img_list_initial[i]) and os.path.isfile(label_list_initial[i]):
count = count + 1
img_list.append(img_list_initial[i])
label_list.append(label_list_initial[i])
print("data processing finished")
print("data frame size: " + str(count))
return img_list, label_list
def _filter_labels(categorical_labels):
new_label = np.stack((categorical_labels[:, :, 0],
categorical_labels[:, :, 6],
categorical_labels[:, :, 7],
categorical_labels[:, :, 8],
categorical_labels[:, :, 11],
categorical_labels[:, :, 17],
categorical_labels[:, :, 21],
categorical_labels[:, :, 22],
categorical_labels[:, :, 23],
categorical_labels[:, :, 24],
categorical_labels[:, :, 26],
categorical_labels[:, :, 32],
categorical_labels[:, :, 33]))
return new_label