2026-01-28 06:16:04 +00:00

378 lines
9.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import numpy as np
from PIL import Image
import argparse
import os
# import matplotlib.pyplot as plt
"""
ATTENTION:
Current supported preprocessing includes:
2 resize styles: resize and resize with padding right and bottom.
4 normalization modes: kneron, tf, caffe, torch
"""
def preprocess(image, clrspace, pos, crop_dim, resize_dim, keep_aspect_ratio, mode, b_crop, outFile, bSaveCrop, bcvtOnly):
"""
image: PIL Image object
clrspace: one of "RGB", "BGR", "L"
size: (width, height)
keep_aspect_ratio: True or False
mode: one of "kneron", "caffe", "tf", "torch"
"""
if clrspace == "BGR":
image = image.convert("RGB")
image = Image.fromarray(np.array(image)[...,::-1])
else:
image = image.convert(clrspace)
if bcvtOnly == 0:
if(b_crop):
image = do_crop(image, pos, crop_dim, outFile)
img_data = resize(image, resize_dim, keep_aspect_ratio, outFile, bSaveCrop, clrspace)
else:
img_data = np.array(image)
img_data = normalization(img_data, mode)
return img_data
def resize(x, size, keep_aspect_ratio, oFile, bSave, clrspace):
if not keep_aspect_ratio:
return np.array(x.resize(size, Image.BILINEAR))
else:
x.thumbnail(size, Image.ANTIALIAS)
#if bSave == True:
# x.save(oFile + "_resize_out.jpg")
x = x.crop((0, 0, size[0], size[1]))
#if bSave == True:
# x.save(oFile + "_final_rgba.jpg")
iw, ih = x.size
w, h = size
scale = min(w/iw, h/ih)
nw = int(iw*scale)
nh = int(ih*scale)
x = x.resize((nw,nh), Image.BILINEAR)
new_image = Image.new('RGB', size, (0,0,0))
# new_image.paste(image, ((w-nw)//2, (h-nh)//2)) # keras setting
new_image.paste(x, (0,0)) # npu setting
final = np.array(new_image)
if clrspace == "L" or clrspace == "l":
final = final[:,:,0]
return final
def do_crop(x, pos, size, oFile):
x = x.crop((pos[0], pos[1], size[0], size[1]))
# x.save(oFile + "_crop_out.jpg")
return x
def normalization(x, mode, **kwargs):
"""Normalize a Numpy array encoding a batch of images.
# Arguments
x: Input array, 3D or 4D.
mode: One of "kneron","caffe", "tf" or "torch".
- caffe: will convert the images from RGB to BGR,
then will zero-center each color channel with
respect to the ImageNet dataset,
without scaling.
- tf: will scale pixels between -1 and 1,
sample-wise.
- torch: will scale pixels between 0 and 1 and then
will normalize each channel with respect to the
ImageNet dataset.
# Returns
Preprocessed Numpy array.
"""
x = x.astype(float)
if mode == 'none':
return x
if mode == 'yolo': # 0 -1 8-7
x /= 255.
return x
if mode == 'kneron': #-0.50.5
x /= 256.
x -= 0.5
return x
if mode == 'tensorflow': # -1 1 8-7
x /= 127.5
x -= 1.
return x
if mode == 'pytorch': #-2.52.5 8-6
x /= 255.
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
x[..., 0] -= mean[0]
x[..., 1] -= mean[1]
x[..., 2] -= mean[2]
if std is not None:
x[..., 0] /= std[0]
x[..., 1] /= std[1]
x[..., 2] /= std[2]
return x
if mode == 'caffe': #-123 - 123 8-0
### mean is for BGR format
mean = [103.939, 116.779, 123.68]
std = None
x[..., 0] -= mean[0]
x[..., 1] -= mean[1]
x[..., 2] -= mean[2]
if std is not None:
x[..., 0] /= std[0]
x[..., 1] /= std[1]
x[..., 2] /= std[2]
return x
#this is the customized part
if mode == 'customized': #-123 - 123 8-0
print("customized")
y = (x - 127.5) * 0.0078125
return y
def img2txt_bin(input_file : str,
input_is_dir : bool,
output_folder : str,
out_h : int,
out_w : int,
clrspace : str,
mode : str,
task : str,
kdp_version : int,
radix : int,
bitwidth : int = 0,
x_pos : int = 0,
y_pos : int = 0,
crop_h : int = 0,
crop_w : int = 0,
bCvtOnly : int = 0,
enable_crop : bool = False,
keep_aspect_ratio : bool = False
):
# Prepare inputs
if input_is_dir:
input_list = [os.path.join(input_file, file_name) for file_name in os.listdir(input_file)]
else:
input_list = [input_file]
# Prepare output folder
if not os.path.exists(output_folder):
os.mkdir(output_folder)
# For each input file
for in_file in input_list:
x = x_pos
y = y_pos
w = x + crop_w
h = y + crop_h
crop_size = w,h
resize_size = out_w, out_h
if enable_crop:
pos = x, y
bCrop = False
else:
pos = 0, 0
bCrop = False
# print(in_file)
if os.path.exists(in_file) != True:
assert(0);
imagefile = in_file;
image = Image.open(imagefile)
if task == "img2bin":
bNeedSaveCrop = True;
else:
bNeedSaveCrop = False;
img_data = preprocess(image, clrspace, pos, crop_size, resize_size, keep_aspect_ratio, mode, bCrop, "temp.txt", bNeedSaveCrop, bCvtOnly)
if task == "img2txt":
out_file = os.path.join(output_folder, in_file.split("/")[-1].split(".")[0] + ".txt")
np.savetxt(out_file, img_data.reshape((-1, 1)), fmt="%.8f")
elif task == "img2bin":
col, row = resize_size
if kdp_version == 520:
col_output = int(np.ceil(col*1.0/16)*16)
else:
col_output = col
img_data = np.clip(img_data * (2**radix) , a_min=-1 * 2**(bitwidth-1), a_max=2**(bitwidth-1) - 1)
img_data = np.around(img_data).astype("int")
out_file = os.path.join(output_folder, in_file.split("/")[-1].split(".")[0] + ".bin")
binfile = out_file
img_data_output = np.zeros((row, col_output, 4))
if len(clrspace) == 3:
img_data_output[:,:col,:3] = img_data
else:
img_data_output[:,:col,0] = img_data
img_data_output[:,:col,1] = img_data
img_data_output[:,:col,2] = img_data
img_data_output.astype("int8").tofile(binfile)
def main_(args):
img2txt_bin(args.input_folder, True, args.output_folder,
args.s_h, args.s_w, args.color_space,
args.mode, args.task,
args.kdp_version,
args.radix, args.bitwidth)
return
if __name__ == "__main__":
argparser = argparse.ArgumentParser(
description="convert img to RGB, resize or crop, etc"
)
argparser.add_argument(
'-t',
'--task',
help=" 'img2bin', 'img2txt'"
)
argparser.add_argument(
'-i',
'--input_file',
help="input file name"
)
argparser.add_argument(
'-if',
'--input_folder',
help="input folder path"
)
argparser.add_argument(
'-o',
'--output_file',
help="output file name"
)
argparser.add_argument(
'-of',
'--output_folder',
help="output folder path"
)
argparser.add_argument(
'-c',
'--color_space',
help="L, RGB or BGR"
)
argparser.add_argument(
'-s_w',
'--width',
type=int,
help="output width for npu input",
)
argparser.add_argument(
'-s_h',
'--height',
type=int,
help="output height for npu input",
)
argparser.add_argument(
'-x',
'--x_pos',
type=int,
help="left up coordinate x",
)
argparser.add_argument(
'-y',
'--y_pos',
type=int,
help="left up coordinate y",
)
argparser.add_argument(
'-c_w',
'--crop_w',
type=int,
help="crop width",
)
argparser.add_argument(
'-c_h',
'--crop_h',
type=int,
help="crop height",
)
argparser.add_argument(
'-a',
'--keep_aspect_ratio',
help="True/False to indicate maintain aspect_ratio or not"
)
argparser.add_argument(
'-m',
'--mode',
help="normalizaton mode: yolo, kneron, caffe, tf , torch."
)
argparser.add_argument(
'-bw',
'--bitwidth',
type=int,
help="Int for bitwidth"
)
argparser.add_argument(
'-r',
'--radix',
type=int,
help="Int for radix"
)
argparser.add_argument(
'-crop',
'--enCrop',
type=int,
help="indicate if this task will act Crop"
)
argparser.add_argument(
'-cvtOnly',
'--convertOnly',
type=int,
help="indicate if just do format convert"
)
argparser.add_argument(
'--kdp_version',
type=int,
default=520,
help="kdp version"
)
args = argparser.parse_args()
main_(args)