2026-01-28 06:16:04 +00:00

189 lines
7.1 KiB
Python

import numpy as np
from keras import backend as K
from timeit import default_timer as timer
import os
from keras.utils import multi_gpu_model
import tensorflow as tf
import math
import sys
sys.path.append(os.path.dirname(os.path.realpath(__file__))+ "/kerasyolo3")
from yolo3.model import yolo_head, yolo_correct_boxes, yolo_boxes_and_scores, yolo_eval
# coco2voc = {
# 'aeroplane':0,
# 'bicycle':1,
# 'bird':2,
# 'boat':3,
# 'bottle':4,
# 'bus':5,
# 'car':6,
# 'cat':7,
# 'chair':8,
# 'cow':9,
# 'diningtable':10,
# 'dog':11,
# 'horse':12,
# 'motorbike':13,
# 'person':14,
# 'pottedplant':15,
# 'sheep':16,
# 'sofa':17,
# 'train':18,
# 'tvmonitor':19
# }
class YOLO(object):
_defaults = {
"anchors_path": '/workspace/scripts/utils/yolo/kerasyolo3/model_data/anchors.txt',
"classes_path": '/workspace/scripts/utils/yolo/kerasyolo3/model_data/classes.txt',
"score" : 0.01,
"iou" : 0.45,
"gpu_num" : 1,
}
@classmethod
def get_defaults(cls, n):
if n in cls._defaults:
return cls._defaults[n]
else:
return "Unrecognized attribute name '" + n + "'"
def __init__(self, image_sz, output_num, score, keep_aspect_ratio, **kwargs):
self.output_num = output_num
print("image_sz: {}".format(image_sz))
if self.output_num >= 1:
self.x1=image_sz//32
if self.output_num >= 2:
self.x2=self.x1 * 2
if self.output_num >= 3:
self.x3=self.x2 * 2
self.__dict__.update(self._defaults) # set up default values
self.__dict__.update(kwargs) # and update with user overrides
self.class_names = self._get_class()
self.output_channel_num = (len(self.class_names) + 5) * 3
self.anchors = self._get_anchors()
self.sess = K.get_session()
self.score = score
self.keep_aspect_ratio = keep_aspect_ratio
self.boxes, self.scores, self.classes = self.generate()
def _get_class(self):
classes_path = os.path.expanduser(self.classes_path)
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
def _get_anchors(self):
anchors_path = os.path.expanduser(self.anchors_path)
with open(anchors_path) as f:
anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
return np.array(anchors).reshape(-1, 2)
def generate(self):
self.input_image_shape = K.placeholder(shape=(2, ))
feature_outputs = []
if self.output_num >= 1:
self.feature_output0 = K.placeholder(shape=(1, self.x1, self.x1, self.output_channel_num )) #13
feature_outputs.append(self.feature_output0)
if self.output_num >= 2:
self.feature_output1 = K.placeholder(shape=(1, self.x2, self.x2, self.output_channel_num )) #26
feature_outputs.append(self.feature_output1)
if self.output_num >= 3:
self.feature_output2 = K.placeholder(shape=(1, self.x3, self.x3, self.output_channel_num )) #26
feature_outputs.append(self.feature_output2)
boxes, scores, classes = yolo_eval(
feature_outputs,
self.anchors,#self.yolo_model.output
len(self.class_names),
self.input_image_shape,
score_threshold=self.score,
iou_threshold=self.iou,
keep_aspect_ratio = self.keep_aspect_ratio)
return boxes, scores, classes
def detect_image(self, output_data_lst, voc_map, orig_width, orig_height):
# print("self.output_channel_num: {}".format(self.output_channel_num ))
# print("x1: {}, x2: {}".format(self.x1, self.x2))
for output_data in output_data_lst:
# print("math.sqrt(len(output_data)/self.output_channel_num ): {}".format(math.sqrt(len(output_data)/self.output_channel_num )))
if self.output_num >=1 and math.sqrt(len(output_data)/self.output_channel_num ) == self.x1: # 13
# print("tag1")
image_data0 = np.reshape(output_data, (1, self.output_channel_num , self.x1, self.x1))
image_data0 = image_data0.transpose(0, 2, 3, 1)
elif self.output_num >=2 and math.sqrt(len(output_data)/self.output_channel_num ) == self.x2: #26
# print("tag2")
image_data1 = np.reshape(output_data, (1, self.output_channel_num , self.x2, self.x2))
image_data1 = image_data1.transpose(0, 2, 3, 1)
elif self.output_num >=3 and math.sqrt(len(output_data)/self.output_channel_num ) == self.x3: #26
# print("tag3")
image_data2 = np.reshape(output_data, (1, self.output_channel_num , self.x3, self.x3))
image_data2 = image_data2.transpose(0, 2, 3, 1)
else:
print("please fill the right class name")
raise(print(len(output_data)))
feed_dict_ = {}
if self.output_num == 1:
feed_dict_={
self.feature_output0: image_data0,
self.input_image_shape: [orig_height, orig_width],#[image.size[1], image.size[0]],
K.learning_phase(): 0
}
elif self.output_num == 2:
feed_dict_={
self.feature_output0: image_data0,
self.feature_output1: image_data1,
self.input_image_shape: [orig_height, orig_width],#[image.size[1], image.size[0]],
K.learning_phase(): 0
}
elif self.output_num == 3:
feed_dict_={
self.feature_output0: image_data0,
self.feature_output1: image_data1,
self.feature_output2: image_data2,
self.input_image_shape: [orig_height, orig_width],#[image.size[1], image.size[0]],
K.learning_phase(): 0
}
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict=feed_dict_
)
res = []
for i, c in reversed(list(enumerate(out_classes))):
predicted_class = self.class_names[c]
# if voc_map:
# if predicted_class not in coco2voc:
# continue
box = out_boxes[i]
score = out_scores[i]
# score = round(score,2)
# label = '{} {:.2f}'.format(predicted_class, score)
top, left, bottom, right = box
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(orig_height, np.floor(bottom + 0.5).astype('int32'))
right = min(orig_width, np.floor(right + 0.5).astype('int32'))
res.append((predicted_class, str(score), str(left), str(top), str(right), str(bottom)))
#print(predicted_class, score, left, top, right, bottom)
# print(len(res))
return res
def close_session(self):
self.sess.close()