189 lines
7.1 KiB
Python
189 lines
7.1 KiB
Python
import numpy as np
|
|
from keras import backend as K
|
|
from timeit import default_timer as timer
|
|
import os
|
|
from keras.utils import multi_gpu_model
|
|
import tensorflow as tf
|
|
import math
|
|
import sys
|
|
sys.path.append(os.path.dirname(os.path.realpath(__file__))+ "/kerasyolo3")
|
|
|
|
from yolo3.model import yolo_head, yolo_correct_boxes, yolo_boxes_and_scores, yolo_eval
|
|
|
|
|
|
|
|
# coco2voc = {
|
|
# 'aeroplane':0,
|
|
# 'bicycle':1,
|
|
# 'bird':2,
|
|
# 'boat':3,
|
|
# 'bottle':4,
|
|
# 'bus':5,
|
|
# 'car':6,
|
|
# 'cat':7,
|
|
# 'chair':8,
|
|
# 'cow':9,
|
|
# 'diningtable':10,
|
|
# 'dog':11,
|
|
# 'horse':12,
|
|
# 'motorbike':13,
|
|
# 'person':14,
|
|
# 'pottedplant':15,
|
|
# 'sheep':16,
|
|
# 'sofa':17,
|
|
# 'train':18,
|
|
# 'tvmonitor':19
|
|
# }
|
|
|
|
|
|
|
|
class YOLO(object):
|
|
_defaults = {
|
|
"anchors_path": '/workspace/scripts/utils/yolo/kerasyolo3/model_data/anchors.txt',
|
|
"classes_path": '/workspace/scripts/utils/yolo/kerasyolo3/model_data/classes.txt',
|
|
"score" : 0.01,
|
|
"iou" : 0.45,
|
|
"gpu_num" : 1,
|
|
}
|
|
|
|
@classmethod
|
|
def get_defaults(cls, n):
|
|
if n in cls._defaults:
|
|
return cls._defaults[n]
|
|
else:
|
|
return "Unrecognized attribute name '" + n + "'"
|
|
|
|
def __init__(self, image_sz, output_num, score, keep_aspect_ratio, **kwargs):
|
|
self.output_num = output_num
|
|
print("image_sz: {}".format(image_sz))
|
|
if self.output_num >= 1:
|
|
self.x1=image_sz//32
|
|
if self.output_num >= 2:
|
|
self.x2=self.x1 * 2
|
|
|
|
if self.output_num >= 3:
|
|
self.x3=self.x2 * 2
|
|
self.__dict__.update(self._defaults) # set up default values
|
|
self.__dict__.update(kwargs) # and update with user overrides
|
|
self.class_names = self._get_class()
|
|
self.output_channel_num = (len(self.class_names) + 5) * 3
|
|
self.anchors = self._get_anchors()
|
|
self.sess = K.get_session()
|
|
self.score = score
|
|
self.keep_aspect_ratio = keep_aspect_ratio
|
|
self.boxes, self.scores, self.classes = self.generate()
|
|
|
|
def _get_class(self):
|
|
classes_path = os.path.expanduser(self.classes_path)
|
|
with open(classes_path) as f:
|
|
class_names = f.readlines()
|
|
class_names = [c.strip() for c in class_names]
|
|
return class_names
|
|
|
|
def _get_anchors(self):
|
|
anchors_path = os.path.expanduser(self.anchors_path)
|
|
with open(anchors_path) as f:
|
|
anchors = f.readline()
|
|
anchors = [float(x) for x in anchors.split(',')]
|
|
return np.array(anchors).reshape(-1, 2)
|
|
|
|
def generate(self):
|
|
self.input_image_shape = K.placeholder(shape=(2, ))
|
|
feature_outputs = []
|
|
if self.output_num >= 1:
|
|
self.feature_output0 = K.placeholder(shape=(1, self.x1, self.x1, self.output_channel_num )) #13
|
|
feature_outputs.append(self.feature_output0)
|
|
if self.output_num >= 2:
|
|
self.feature_output1 = K.placeholder(shape=(1, self.x2, self.x2, self.output_channel_num )) #26
|
|
feature_outputs.append(self.feature_output1)
|
|
if self.output_num >= 3:
|
|
self.feature_output2 = K.placeholder(shape=(1, self.x3, self.x3, self.output_channel_num )) #26
|
|
feature_outputs.append(self.feature_output2)
|
|
|
|
boxes, scores, classes = yolo_eval(
|
|
feature_outputs,
|
|
self.anchors,#self.yolo_model.output
|
|
len(self.class_names),
|
|
self.input_image_shape,
|
|
score_threshold=self.score,
|
|
iou_threshold=self.iou,
|
|
keep_aspect_ratio = self.keep_aspect_ratio)
|
|
return boxes, scores, classes
|
|
|
|
|
|
def detect_image(self, output_data_lst, voc_map, orig_width, orig_height):
|
|
# print("self.output_channel_num: {}".format(self.output_channel_num ))
|
|
# print("x1: {}, x2: {}".format(self.x1, self.x2))
|
|
for output_data in output_data_lst:
|
|
# print("math.sqrt(len(output_data)/self.output_channel_num ): {}".format(math.sqrt(len(output_data)/self.output_channel_num )))
|
|
if self.output_num >=1 and math.sqrt(len(output_data)/self.output_channel_num ) == self.x1: # 13
|
|
# print("tag1")
|
|
image_data0 = np.reshape(output_data, (1, self.output_channel_num , self.x1, self.x1))
|
|
image_data0 = image_data0.transpose(0, 2, 3, 1)
|
|
|
|
elif self.output_num >=2 and math.sqrt(len(output_data)/self.output_channel_num ) == self.x2: #26
|
|
# print("tag2")
|
|
image_data1 = np.reshape(output_data, (1, self.output_channel_num , self.x2, self.x2))
|
|
image_data1 = image_data1.transpose(0, 2, 3, 1)
|
|
|
|
elif self.output_num >=3 and math.sqrt(len(output_data)/self.output_channel_num ) == self.x3: #26
|
|
# print("tag3")
|
|
image_data2 = np.reshape(output_data, (1, self.output_channel_num , self.x3, self.x3))
|
|
image_data2 = image_data2.transpose(0, 2, 3, 1)
|
|
else:
|
|
print("please fill the right class name")
|
|
raise(print(len(output_data)))
|
|
|
|
feed_dict_ = {}
|
|
if self.output_num == 1:
|
|
feed_dict_={
|
|
self.feature_output0: image_data0,
|
|
self.input_image_shape: [orig_height, orig_width],#[image.size[1], image.size[0]],
|
|
K.learning_phase(): 0
|
|
}
|
|
elif self.output_num == 2:
|
|
feed_dict_={
|
|
self.feature_output0: image_data0,
|
|
self.feature_output1: image_data1,
|
|
self.input_image_shape: [orig_height, orig_width],#[image.size[1], image.size[0]],
|
|
K.learning_phase(): 0
|
|
}
|
|
elif self.output_num == 3:
|
|
feed_dict_={
|
|
self.feature_output0: image_data0,
|
|
self.feature_output1: image_data1,
|
|
self.feature_output2: image_data2,
|
|
self.input_image_shape: [orig_height, orig_width],#[image.size[1], image.size[0]],
|
|
K.learning_phase(): 0
|
|
}
|
|
out_boxes, out_scores, out_classes = self.sess.run(
|
|
[self.boxes, self.scores, self.classes],
|
|
feed_dict=feed_dict_
|
|
)
|
|
|
|
res = []
|
|
for i, c in reversed(list(enumerate(out_classes))):
|
|
predicted_class = self.class_names[c]
|
|
# if voc_map:
|
|
# if predicted_class not in coco2voc:
|
|
# continue
|
|
|
|
box = out_boxes[i]
|
|
score = out_scores[i]
|
|
# score = round(score,2)
|
|
# label = '{} {:.2f}'.format(predicted_class, score)
|
|
top, left, bottom, right = box
|
|
top = max(0, np.floor(top + 0.5).astype('int32'))
|
|
left = max(0, np.floor(left + 0.5).astype('int32'))
|
|
bottom = min(orig_height, np.floor(bottom + 0.5).astype('int32'))
|
|
right = min(orig_width, np.floor(right + 0.5).astype('int32'))
|
|
res.append((predicted_class, str(score), str(left), str(top), str(right), str(bottom)))
|
|
|
|
#print(predicted_class, score, left, top, right, bottom)
|
|
# print(len(res))
|
|
return res
|
|
|
|
|
|
def close_session(self):
|
|
self.sess.close()
|