kneron_model_converter/scripts/utils/yolo/utils.py

import numpy as np
from keras import backend as K
from timeit import default_timer as timer
import os
from keras.utils import multi_gpu_model
import tensorflow as tf
import math
import sys
sys.path.append(os.path.dirname(os.path.realpath(__file__))+ "/kerasyolo3")

from yolo3.model import yolo_head, yolo_correct_boxes, yolo_boxes_and_scores, yolo_eval


# coco2voc = {
#     'aeroplane':0,
#     'bicycle':1,
#     'bird':2,
#     'boat':3,
#     'bottle':4,
#     'bus':5,
#     'car':6,
#     'cat':7,
#     'chair':8,
#     'cow':9,
#     'diningtable':10,
#     'dog':11,
#     'horse':12,
#     'motorbike':13,
#     'person':14,
#     'pottedplant':15,
#     'sheep':16,
#     'sofa':17,
#     'train':18,
#     'tvmonitor':19
#     }


class YOLO(object):
    _defaults = {
        "anchors_path": '/workspace/scripts/utils/yolo/kerasyolo3/model_data/anchors.txt',
        "classes_path": '/workspace/scripts/utils/yolo/kerasyolo3/model_data/classes.txt',
        "score" : 0.01,
        "iou" : 0.45,
        "gpu_num" : 1,
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    def __init__(self, image_sz, output_num,  score, keep_aspect_ratio, **kwargs):
        self.output_num = output_num
        print("image_sz: {}".format(image_sz))
        if self.output_num >= 1:
            self.x1=image_sz//32
        if self.output_num >= 2:
            self.x2=self.x1 * 2

        if self.output_num >= 3:
            self.x3=self.x2 * 2
        self.__dict__.update(self._defaults) # set up default values
        self.__dict__.update(kwargs) # and update with user overrides
        self.class_names = self._get_class()
        self.output_channel_num = (len(self.class_names) + 5) * 3
        self.anchors = self._get_anchors()
        self.sess = K.get_session()
        self.score = score
        self.keep_aspect_ratio = keep_aspect_ratio
        self.boxes, self.scores, self.classes = self.generate()

    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names

    def _get_anchors(self):
        anchors_path = os.path.expanduser(self.anchors_path)
        with open(anchors_path) as f:
            anchors = f.readline()
        anchors = [float(x) for x in anchors.split(',')]
        return np.array(anchors).reshape(-1, 2)

    def generate(self):
        self.input_image_shape = K.placeholder(shape=(2, ))
        feature_outputs = []
        if self.output_num >= 1:
            self.feature_output0 = K.placeholder(shape=(1, self.x1, self.x1, self.output_channel_num )) #13
            feature_outputs.append(self.feature_output0)
        if self.output_num >= 2:
            self.feature_output1 = K.placeholder(shape=(1, self.x2, self.x2, self.output_channel_num )) #26
            feature_outputs.append(self.feature_output1)
        if self.output_num >= 3:
            self.feature_output2 = K.placeholder(shape=(1, self.x3, self.x3, self.output_channel_num )) #26
            feature_outputs.append(self.feature_output2)

        boxes, scores, classes = yolo_eval(
            feature_outputs,
            self.anchors,#self.yolo_model.output
            len(self.class_names),
            self.input_image_shape,
            score_threshold=self.score,
            iou_threshold=self.iou,
            keep_aspect_ratio = self.keep_aspect_ratio)
        return boxes, scores, classes


    def detect_image(self, output_data_lst, voc_map, orig_width, orig_height):
        # print("self.output_channel_num: {}".format(self.output_channel_num ))
        # print("x1: {}, x2: {}".format(self.x1, self.x2))
        for output_data in output_data_lst:
            # print("math.sqrt(len(output_data)/self.output_channel_num ): {}".format(math.sqrt(len(output_data)/self.output_channel_num )))
            if self.output_num >=1 and math.sqrt(len(output_data)/self.output_channel_num ) == self.x1: # 13
                # print("tag1")
                image_data0 = np.reshape(output_data, (1, self.output_channel_num , self.x1, self.x1))
                image_data0 = image_data0.transpose(0, 2, 3, 1)

            elif self.output_num >=2 and math.sqrt(len(output_data)/self.output_channel_num ) == self.x2: #26
                # print("tag2")
                image_data1 = np.reshape(output_data, (1, self.output_channel_num , self.x2, self.x2))
                image_data1 = image_data1.transpose(0, 2, 3, 1)

            elif self.output_num >=3 and math.sqrt(len(output_data)/self.output_channel_num ) == self.x3: #26
                # print("tag3")
                image_data2 = np.reshape(output_data, (1, self.output_channel_num , self.x3, self.x3))
                image_data2 = image_data2.transpose(0, 2, 3, 1)
            else:
                print("please fill the right class name")
                raise(print(len(output_data)))

        feed_dict_ = {}
        if self.output_num == 1:
            feed_dict_={
                self.feature_output0: image_data0,
                self.input_image_shape: [orig_height, orig_width],#[image.size[1], image.size[0]],
                K.learning_phase(): 0
            }
        elif self.output_num == 2:
            feed_dict_={
                self.feature_output0: image_data0,
                self.feature_output1: image_data1,
                self.input_image_shape: [orig_height, orig_width],#[image.size[1], image.size[0]],
                K.learning_phase(): 0
            }
        elif self.output_num == 3:
            feed_dict_={
                self.feature_output0: image_data0,
                self.feature_output1: image_data1,
                self.feature_output2: image_data2,
                self.input_image_shape: [orig_height, orig_width],#[image.size[1], image.size[0]],
                K.learning_phase(): 0
            }
        out_boxes, out_scores, out_classes = self.sess.run(
            [self.boxes, self.scores, self.classes],
            feed_dict=feed_dict_
            )

        res = []
        for i, c in reversed(list(enumerate(out_classes))):
            predicted_class = self.class_names[c]
            # if voc_map:
            #     if predicted_class not in coco2voc:
            #         continue

            box = out_boxes[i]
            score = out_scores[i]
            # score = round(score,2)
            # label = '{} {:.2f}'.format(predicted_class, score)
            top, left, bottom, right = box
            top = max(0, np.floor(top + 0.5).astype('int32'))
            left = max(0, np.floor(left + 0.5).astype('int32'))
            bottom = min(orig_height, np.floor(bottom + 0.5).astype('int32'))
            right = min(orig_width, np.floor(right + 0.5).astype('int32'))
            res.append((predicted_class, str(score), str(left), str(top), str(right), str(bottom)))

            #print(predicted_class, score, left, top, right, bottom)
        # print(len(res))
        return res


    def close_session(self):
        self.sess.close()