scripts/nodes/segmentation_inference

#!/usr/bin/env python

import os
import cv2 as cv
import yaml
import torch
import rospy, rospkg
import numpy as np
from cv_bridge import CvBridge, CvBridgeError
from sensor_msgs.msg import Image, CompressedImage, CameraInfo
from traversability_estimation.utils import convert_label, convert_color
from hrnet.utils.utils import draw_legend
from time import time
from threading import RLock
from hrnet.config import config
from hrnet import models


# pkg_path = os.path.realpath(os.path.join(os.path.dirname(__file__), '..', '..'))
pkg_path = rospkg.RosPack().get_path('traversability_estimation')
RELLIS_CONFIG = os.path.join(pkg_path, "config/rellis.yaml")
RELLIS_CONFIG_TRAVERSABILITY = os.path.join(pkg_path, "config/rellis_to_traversability.yaml")


class ImageProcessor(object):
    """
    Class for image segmentations

    Args:
        compressed [bool] - for differentiation between compressed and uncompressed image messages
        number [int] - index of the image processor class <0,num_cameras)

    Topics:
        input_{number} - input image topic
        output_{number} - output segmented image topic
    """
    def __init__(self):
        # Read parameters.
        self.num_cameras = rospy.get_param('~num_cameras', 1)
        self.image_transport = rospy.get_param('~image_transport', 'compressed')
        self.traversability_labels = rospy.get_param('~traversability_labels', True)
        label_map = rospy.get_param('~label_map', None)
        self.model_name = rospy.get_param('~model_name', 'hrnet')
        assert label_map is None or isinstance(label_map, (dict, list))
        if isinstance(label_map, dict):
            label_map = dict((int(k), int(v)) for k, v in label_map.items())
            n = max(label_map) + 1
            self.label_map = np.zeros((n,), dtype=np.uint8)
            for k, v in label_map.items():
                self.label_map[k] = v
        elif isinstance(label_map, list):
            self.label_map = np.asarray(label_map)
        else:
            self.label_map = None
        if self.label_map is not None:
            rospy.loginfo('Label map: %s', self.label_map)
        self.device = rospy.get_param('~device', 'cpu')
        self.dtype = rospy.get_param('~dtype', 'float')
        assert self.dtype in ('float', 'half')
        self.dtype = eval('torch.%s' % self.dtype)
        self.max_age = rospy.get_param('~max_age', 1.0)
        self.input_size = rospy.get_param('~input_size', None)
        self.input_scale = rospy.get_param('~input_scale', 1.0)
        if self.input_size:
            self.input_size = tuple(self.input_size)
            self.input_scale = 0.0
        else:
            self.input_size = (0, 0)
        assert len(self.input_size) == 2

        self.bridge = CvBridge()
        self.model = self.load_model()

        # Training mean, std
        self.mean = np.array([0.485, 0.456, 0.406])
        self.std = np.array([0.229, 0.224, 0.225])
        # Running test mean
        self.test_sum = np.zeros((3,))
        self.test_n = 0

        # Draw legend to segmentation
        # TODO: Generic label mapping.
        if self.traversability_labels:
            self.data_cfg = yaml.safe_load(open(RELLIS_CONFIG_TRAVERSABILITY, 'r'))
        else:
            self.data_cfg = yaml.safe_load(open(RELLIS_CONFIG, 'r'))

        # Processing queue: image with camera info for each camera.
        self.lock = RLock()
        self.images = self.num_cameras * [None]
        self.camera_infos = self.num_cameras * [None]
        # Publishers
        self.image_color_pubs = self.num_cameras * [None]
        self.compressed_color_pubs = self.num_cameras * [None]
        self.image_pubs = self.num_cameras * [None]
        self.compressed_pubs = self.num_cameras * [None]
        self.camera_info_pubs = self.num_cameras * [None]
        self.traversability_pubs = self.num_cameras * [None]
        # Subscribers
        self.image_subs = self.num_cameras * [None]
        self.compressed_subs = self.num_cameras * [None]
        self.camera_info_subs = self.num_cameras * [None]
        for i in range(self.num_cameras):
            # Setup publishers.
            pub = rospy.Publisher('output_%i/semseg_color' % i, Image, queue_size=1)
            self.image_color_pubs[i] = pub
            pub = rospy.Publisher('output_%i/semseg_color/compressed' % i, CompressedImage, queue_size=1)
            self.compressed_color_pubs[i] = pub
            pub = rospy.Publisher('output_%i/semseg' % i, Image, queue_size=1)
            self.image_pubs[i] = pub
            pub = rospy.Publisher('output_%i/semseg/compressed' % i, CompressedImage, queue_size=1)
            self.compressed_pubs[i] = pub
            pub = rospy.Publisher('output_%i/camera_info' % i, CameraInfo, queue_size=1)
            self.camera_info_pubs[i] = pub
            pub = rospy.Publisher('output_%i/traversability/compressed' % i, CompressedImage, queue_size=1)
            self.traversability_pubs[i] = pub
            # Setup subscribers.
            if self.image_transport == 'raw':
                sub = rospy.Subscriber('input_%i/image' % i, Image,
                                       lambda msg, i=i: self.callback_color(msg, i), queue_size=1)
            elif self.image_transport == 'compressed':
                sub = rospy.Subscriber('input_%i/image/compressed' % i, CompressedImage,
                                       lambda msg, i=i: self.callback_color(msg, i), queue_size=1)
            self.image_subs[i] = sub
            sub = rospy.Subscriber('input_%i/camera_info' % i, CameraInfo,
                                   lambda msg, i=i: self.get_camera_info(msg, i), queue_size=1)
            self.camera_info_subs[i] = sub
            rospy.loginfo('Publishers and subscribers for camera %i ready.', i)

    def get_camera_info(self, msg, i=None):
        """Store camera calibration for i-th camera."""
        assert isinstance(msg, CameraInfo)
        with self.lock:
            self.camera_infos[i] = msg
        self.camera_info_subs[i].unregister()
        rospy.loginfo('Camera %i (%s) unsubscribed.', i, msg.header.frame_id)

    def load_model(self):
        if self.model_name == 'hrnet':
            model = self.load_hrnet_model()
            rospy.loginfo('Loaded HRnet model')
        elif self.model_name == 'smp':
            model = self.load_smp_model()
            rospy.loginfo('Loaded SMP model')
        else:
            raise ValueError('Supported model names: "hrnet", "smp"')
        return model

    def load_smp_model(self):
        model_weights = rospy.get_param('~smp_weights', "PSPNet_resnext50_32x4d_1184x1920_lr0.0001_bs1_epoch6_Rellis3DImages_iou_0.68.pth")
        PRETRAINED_MODEL = os.path.join(pkg_path, "config/weights/", "image/%s" % model_weights)
        model = torch.load(PRETRAINED_MODEL)
        model = model.eval()
        model = model.to(device=self.device, dtype=self.dtype)
        return model

    def load_hrnet_model(self):
        # Updates PATHS in model config file
        MODEL_CONFIG = os.path.join(pkg_path, "config/hrnet_rellis/"
                                              "seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484.yaml")
        model_weights = rospy.get_param('~hrnet_weights',
                                        "seg_hrnet_ocr_w48_train_512x1024_sgd_lr1e-2_wd5e-4_bs_12_epoch484/best.pth")
        PRETRAINED_MODEL = os.path.join(pkg_path, "config/weights/", model_weights)
        with open(MODEL_CONFIG, 'r') as f:
            cfg = yaml.safe_load(f)
        cfg['MODEL']['PRETRAINED'] = PRETRAINED_MODEL
        with open(MODEL_CONFIG, 'w') as f:
            yaml.dump(cfg, f)

        # Create model
        config.defrost()
        config.merge_from_file(MODEL_CONFIG)
        config.merge_from_list(['TEST.MODEL_FILE', PRETRAINED_MODEL])
        config.freeze()
        model = eval('models.' + config.MODEL.NAME + '.get_seg_model')(config)

        pretrained_dict = torch.load(PRETRAINED_MODEL, map_location='cpu')
        if 'state_dict' in pretrained_dict:
            pretrained_dict = pretrained_dict['state_dict']
        model_dict = model.state_dict()
        pretrained_dict = {k[6:]: v for k, v in pretrained_dict.items()
                           if k[6:] in model_dict.keys()}
        model_dict.update(pretrained_dict)
        model.load_state_dict(model_dict)

        model = model.eval()
        model = model.to(device=self.device, dtype=self.dtype)
        return model

    def callback_color(self, img_msg, i=None):
        age = (rospy.Time.now() - img_msg.header.stamp).to_sec()
        if age > self.max_age:
            rospy.logwarn('Image message is too old for segmentation: %.3f' % age)
            return

        with self.lock:
            self.images[i] = img_msg
            rospy.logdebug('Image updated for camera %i (%s).', i, img_msg.header.frame_id)

        return

    def preprocessing(self, image):
        image = cv.resize(image, self.input_size, fx=self.input_scale, fy=self.input_scale,
                          interpolation=cv.INTER_AREA)
        # image shape should be divisible by 32
        h, w = image.shape[:2]
        image = cv.resize(image, (32 * (w // 32), 32 * (h // 32)),
                          interpolation=cv.INTER_AREA)
        image = image.astype(np.float32)
        image = image[..., ::-1]  # BGR to RGB
        image = image / 255.0

        # Training data channel-wise mean and std.
        # mean, std = self.mean, self.std

        # Test data intensity mean, image std.
        # self.test_sum += image.mean(axis=(0, 1))
        # self.test_n += 1
        # test_mean = self.test_sum / self.test_n
        # rospy.loginfo('Train mean %s, test mean %s.', self.mean, test_mean)
        # mean = test_mean.mean()
        # std = image.std()

        # Image channel-wise mean and std.
        # mean, std = image.mean(axis=(0, 1)), image.std(axis=(0, 1))

        # Image intensity mean and std.
        # mean, std = image.mean(), image.std()

        # Re-scaled training data mean and std.
        intensity_ratio = image.mean() / self.mean.mean()
        # intensity_ratio = test_mean.mean() / self.mean.mean()
        mean = intensity_ratio * self.mean
        # std = intensity_ratio * self.std
        std = image.std()

        image -= mean
        image /= std

        image = image.transpose((2, 0, 1))  # HxWxC tp CxHxW
        return image

    def process(self, image):
        t0 = time()
        orig_size = image.shape[:2]
        image = self.preprocessing(image)
        t1 = time()
        rospy.logdebug('Preprocessing took (%.3f s).', t1 - t0)
        with torch.no_grad():
            input = torch.as_tensor(image).unsqueeze(0)
            input = input.to(device=self.device, dtype=self.dtype)
            pred = self.model(input)
        t2 = time()
        rospy.logdebug('Model inference of image shape (%i, %i) took (%.3f s).',
                       input.shape[2], input.shape[3], t2 - t1)
        pred, trav = self.postprocessing(pred, orig_size)
        rospy.logdebug('Postprocessing took (%.3f s).', time() - t2)
        return pred, trav

    def postprocessing(self, pred_raw, size):
        if self.model_name == 'hrnet':
            pred_raw = pred_raw[config.TEST.OUTPUT_INDEX]
            pred_raw = torch.softmax(pred_raw, dim=1)

        pred_raw = pred_raw.squeeze(0).cpu().numpy()
        pred = np.argmax(pred_raw, axis=0)

        # create traversability map here from predicted classes
        traverable_classes = [1, 8, 14, 16, 17, 18]  # grass, asphalt, concrete, puddle, mud, rubble
        grass_prob = pred_raw[traverable_classes]
        trav = np.max(grass_prob, axis=0).astype('float')
        # cv.imshow('Traversability', trav)
        # cv.waitKey(1)

        pred = convert_label(pred, inverse=True)
        pred = cv.resize(pred.astype('float32'), (size[1], size[0]), interpolation=cv.INTER_LINEAR).astype('int8')
        if self.label_map is not None:
            pred = self.label_map[pred]
        return pred, trav

    def spin(self):
        """Processing loop. Keep processing images in queue until shutdown."""
        i = -1
        while not rospy.is_shutdown():
            t0 = rospy.Time.now()
            i = (i + 1) % len(self.images)
            with self.lock:
                image, camera_info = self.images[i], self.camera_infos[i]
            if not image or not camera_info:
                continue

            try:
                if self.image_transport:
                    arr = self.bridge.compressed_imgmsg_to_cv2(image, "bgr8")
                else:
                    arr = self.bridge.imgmsg_to_cv2(image, "bgr8")
            except CvBridgeError as e:
                rospy.logerr(e)
                raise

            pred, trav = self.process(arr)
            rospy.loginfo('Image %i (%s) processed (%.3f s).',
                          i, image.header.frame_id, (rospy.Time.now() - t0).to_sec())

            t1 = rospy.Time.now()
            if self.compressed_pubs[i].get_num_connections():
                msg = CompressedImage()
                msg.header = image.header
                msg.format = "jpeg"
                msg.data = np.array(cv.imencode('.jpg', pred)[1]).tobytes()
                self.compressed_pubs[i].publish(msg)

            if self.compressed_pubs[i].get_num_connections():
                msg = CompressedImage()
                msg.header = image.header
                msg.format = "jpeg"
                msg.data = np.asarray(cv.imencode('.jpg', pred)[1]).tobytes()
                self.compressed_pubs[i].publish(msg)

            if self.compressed_color_pubs[i].get_num_connections():
                pred = convert_color(pred, self.data_cfg["color_map"])
                pred = pred[..., ::-1]  # RGB to BGR
                msg = CompressedImage()
                msg.header = image.header
                msg.format = "jpeg"
                msg.data = np.asarray(cv.imencode('.jpg', pred)[1]).tobytes()
                self.compressed_color_pubs[i].publish(msg)

            if self.traversability_pubs[i].get_num_connections():
                msg = self.bridge.cv2_to_compressed_imgmsg(np.uint8(255 * trav))
                msg.header = image.header
                self.traversability_pubs[i].publish(msg)

            self.camera_info_pubs[i].publish(camera_info)
            rospy.loginfo('Image %i (%s) publishing (%.3f s).',
                          i, image.header.frame_id, (rospy.Time.now() - t1).to_sec())


def main():
    rospy.init_node('segmentation_inference', log_level=rospy.DEBUG)

    # Draw legend
    legend = rospy.get_param("~legend", False)
    label_config = rospy.get_param('~label_config', None)
    traversability_labels = rospy.get_param('~traversability_labels', True)
    if legend:
        if not label_config:
            label_config = RELLIS_CONFIG_TRAVERSABILITY if traversability_labels else RELLIS_CONFIG
        data_cfg = yaml.safe_load(open(label_config, 'r'))
        draw_legend(data_cfg)

    node = ImageProcessor()
    node.spin()


if __name__ == '__main__':
    main()