import numpy as np from keras import backend as K from timeit import default_timer as timer import os from keras.utils import multi_gpu_model import tensorflow as tf import math import sys sys.path.append(os.path.dirname(os.path.realpath(__file__))+ "/kerasyolo3") from yolo3.model import yolo_head, yolo_correct_boxes, yolo_boxes_and_scores, yolo_eval # coco2voc = { # 'aeroplane':0, # 'bicycle':1, # 'bird':2, # 'boat':3, # 'bottle':4, # 'bus':5, # 'car':6, # 'cat':7, # 'chair':8, # 'cow':9, # 'diningtable':10, # 'dog':11, # 'horse':12, # 'motorbike':13, # 'person':14, # 'pottedplant':15, # 'sheep':16, # 'sofa':17, # 'train':18, # 'tvmonitor':19 # } class YOLO(object): _defaults = { "anchors_path": '/workspace/scripts/utils/yolo/kerasyolo3/model_data/anchors.txt', "classes_path": '/workspace/scripts/utils/yolo/kerasyolo3/model_data/classes.txt', "score" : 0.01, "iou" : 0.45, "gpu_num" : 1, } @classmethod def get_defaults(cls, n): if n in cls._defaults: return cls._defaults[n] else: return "Unrecognized attribute name '" + n + "'" def __init__(self, image_sz, output_num, score, keep_aspect_ratio, **kwargs): self.output_num = output_num print("image_sz: {}".format(image_sz)) if self.output_num >= 1: self.x1=image_sz//32 if self.output_num >= 2: self.x2=self.x1 * 2 if self.output_num >= 3: self.x3=self.x2 * 2 self.__dict__.update(self._defaults) # set up default values self.__dict__.update(kwargs) # and update with user overrides self.class_names = self._get_class() self.output_channel_num = (len(self.class_names) + 5) * 3 self.anchors = self._get_anchors() self.sess = K.get_session() self.score = score self.keep_aspect_ratio = keep_aspect_ratio self.boxes, self.scores, self.classes = self.generate() def _get_class(self): classes_path = os.path.expanduser(self.classes_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] return class_names def _get_anchors(self): anchors_path = os.path.expanduser(self.anchors_path) with open(anchors_path) as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] return np.array(anchors).reshape(-1, 2) def generate(self): self.input_image_shape = K.placeholder(shape=(2, )) feature_outputs = [] if self.output_num >= 1: self.feature_output0 = K.placeholder(shape=(1, self.x1, self.x1, self.output_channel_num )) #13 feature_outputs.append(self.feature_output0) if self.output_num >= 2: self.feature_output1 = K.placeholder(shape=(1, self.x2, self.x2, self.output_channel_num )) #26 feature_outputs.append(self.feature_output1) if self.output_num >= 3: self.feature_output2 = K.placeholder(shape=(1, self.x3, self.x3, self.output_channel_num )) #26 feature_outputs.append(self.feature_output2) boxes, scores, classes = yolo_eval( feature_outputs, self.anchors,#self.yolo_model.output len(self.class_names), self.input_image_shape, score_threshold=self.score, iou_threshold=self.iou, keep_aspect_ratio = self.keep_aspect_ratio) return boxes, scores, classes def detect_image(self, output_data_lst, voc_map, orig_width, orig_height): # print("self.output_channel_num: {}".format(self.output_channel_num )) # print("x1: {}, x2: {}".format(self.x1, self.x2)) for output_data in output_data_lst: # print("math.sqrt(len(output_data)/self.output_channel_num ): {}".format(math.sqrt(len(output_data)/self.output_channel_num ))) if self.output_num >=1 and math.sqrt(len(output_data)/self.output_channel_num ) == self.x1: # 13 # print("tag1") image_data0 = np.reshape(output_data, (1, self.output_channel_num , self.x1, self.x1)) image_data0 = image_data0.transpose(0, 2, 3, 1) elif self.output_num >=2 and math.sqrt(len(output_data)/self.output_channel_num ) == self.x2: #26 # print("tag2") image_data1 = np.reshape(output_data, (1, self.output_channel_num , self.x2, self.x2)) image_data1 = image_data1.transpose(0, 2, 3, 1) elif self.output_num >=3 and math.sqrt(len(output_data)/self.output_channel_num ) == self.x3: #26 # print("tag3") image_data2 = np.reshape(output_data, (1, self.output_channel_num , self.x3, self.x3)) image_data2 = image_data2.transpose(0, 2, 3, 1) else: print("please fill the right class name") raise(print(len(output_data))) feed_dict_ = {} if self.output_num == 1: feed_dict_={ self.feature_output0: image_data0, self.input_image_shape: [orig_height, orig_width],#[image.size[1], image.size[0]], K.learning_phase(): 0 } elif self.output_num == 2: feed_dict_={ self.feature_output0: image_data0, self.feature_output1: image_data1, self.input_image_shape: [orig_height, orig_width],#[image.size[1], image.size[0]], K.learning_phase(): 0 } elif self.output_num == 3: feed_dict_={ self.feature_output0: image_data0, self.feature_output1: image_data1, self.feature_output2: image_data2, self.input_image_shape: [orig_height, orig_width],#[image.size[1], image.size[0]], K.learning_phase(): 0 } out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict=feed_dict_ ) res = [] for i, c in reversed(list(enumerate(out_classes))): predicted_class = self.class_names[c] # if voc_map: # if predicted_class not in coco2voc: # continue box = out_boxes[i] score = out_scores[i] # score = round(score,2) # label = '{} {:.2f}'.format(predicted_class, score) top, left, bottom, right = box top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(orig_height, np.floor(bottom + 0.5).astype('int32')) right = min(orig_width, np.floor(right + 0.5).astype('int32')) res.append((predicted_class, str(score), str(left), str(top), str(right), str(bottom))) #print(predicted_class, score, left, top, right, bottom) # print(len(res)) return res def close_session(self): self.sess.close()