kneron_model_converter/scripts/utils/img_preprocess.py

import numpy as np
from PIL import Image
import argparse
import os

# import matplotlib.pyplot as plt

"""
ATTENTION:
Current supported preprocessing includes:
2 resize styles: resize and resize with padding right and bottom.
4 normalization modes: kneron, tf, caffe, torch
"""


def preprocess(image, clrspace, pos, crop_dim, resize_dim, keep_aspect_ratio, mode, b_crop, outFile, bSaveCrop, bcvtOnly):
    """
    image: PIL Image object
    clrspace: one of "RGB", "BGR", "L"
    size: (width, height)
    keep_aspect_ratio: True or False
    mode: one of "kneron", "caffe", "tf", "torch"
    """

    if clrspace == "BGR":
        image = image.convert("RGB")
        image = Image.fromarray(np.array(image)[...,::-1])
    else:
        image = image.convert(clrspace)


    if bcvtOnly == 0:
        if(b_crop):
            image = do_crop(image, pos, crop_dim, outFile)
        img_data = resize(image, resize_dim, keep_aspect_ratio, outFile, bSaveCrop, clrspace)
    else:
        img_data = np.array(image)
    img_data = normalization(img_data, mode)

    return img_data

def resize(x, size, keep_aspect_ratio, oFile, bSave, clrspace):
    if not keep_aspect_ratio:
        return np.array(x.resize(size, Image.BILINEAR))
    else:
        x.thumbnail(size, Image.ANTIALIAS)
        #if bSave == True:
        #    x.save(oFile + "_resize_out.jpg")
        x = x.crop((0, 0, size[0], size[1]))
        #if bSave == True:
        #    x.save(oFile + "_final_rgba.jpg")

        iw, ih = x.size
        w, h = size
        scale = min(w/iw, h/ih)
        nw = int(iw*scale)
        nh = int(ih*scale)

        x = x.resize((nw,nh), Image.BILINEAR)
        new_image = Image.new('RGB', size, (0,0,0))
        # new_image.paste(image, ((w-nw)//2, (h-nh)//2)) # keras setting
        new_image.paste(x, (0,0)) # npu setting

        final = np.array(new_image)
        if clrspace == "L" or clrspace == "l":
            final = final[:,:,0]

        return final

def do_crop(x, pos, size, oFile):
        x = x.crop((pos[0], pos[1], size[0], size[1]))
        # x.save(oFile + "_crop_out.jpg")
        return x

def normalization(x, mode, **kwargs):
    """Normalize a Numpy array encoding a batch of images.
    # Arguments
        x: Input array, 3D or 4D.
        mode: One of "kneron","caffe", "tf" or "torch".
            - caffe: will convert the images from RGB to BGR,
                then will zero-center each color channel with
                respect to the ImageNet dataset,
                without scaling.
            - tf: will scale pixels between -1 and 1,
                sample-wise.
            - torch: will scale pixels between 0 and 1 and then
                will normalize each channel with respect to the
                ImageNet dataset.
    # Returns
        Preprocessed Numpy array.
    """

    x = x.astype(float)

    if mode == 'none':
        return x

    if mode == 'yolo': # 0 -1  8-7
        x /= 255.
        return x

    if mode == 'kneron': #-0.5～0.5
        x /= 256.
        x -= 0.5
        return x

    if mode == 'tensorflow': # -1 ～ 1 8-7
        x /= 127.5
        x -= 1.
        return x

    if mode == 'pytorch': #-2.5～2.5 8-6
        x /= 255.
        mean = [0.485, 0.456, 0.406]
        std = [0.229, 0.224, 0.225]
        x[..., 0] -= mean[0]
        x[..., 1] -= mean[1]
        x[..., 2] -= mean[2]
        if std is not None:
            x[..., 0] /= std[0]
            x[..., 1] /= std[1]
            x[..., 2] /= std[2]
        return x

    if mode == 'caffe': #-123 - 123 8-0
        ### mean is for BGR format
        mean = [103.939, 116.779, 123.68]
        std = None

        x[..., 0] -= mean[0]
        x[..., 1] -= mean[1]
        x[..., 2] -= mean[2]
        if std is not None:
            x[..., 0] /= std[0]
            x[..., 1] /= std[1]
            x[..., 2] /= std[2]
        return x

    #this is the customized part
    if mode == 'customized': #-123 - 123 8-0
        print("customized")
        y = (x - 127.5) * 0.0078125
        return y


def img2txt_bin(input_file : str,
                input_is_dir : bool,
                output_folder : str,
                out_h : int,
                out_w : int,
                clrspace : str,
                mode : str,
                task : str,
                kdp_version : int,
                radix : int,
                bitwidth : int = 0,
                x_pos : int = 0,
                y_pos : int = 0,
                crop_h : int = 0,
                crop_w : int = 0,
                bCvtOnly : int = 0,
                enable_crop : bool = False,
                keep_aspect_ratio : bool = False
                ):
    # Prepare inputs
    if input_is_dir:
        input_list = [os.path.join(input_file, file_name) for file_name in os.listdir(input_file)]
    else:
        input_list = [input_file]

    # Prepare output folder
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)

    # For each input file
    for in_file in input_list:
        x = x_pos
        y = y_pos
        w = x + crop_w
        h = y + crop_h

        crop_size = w,h
        resize_size = out_w, out_h
        if enable_crop:
            pos = x, y
            bCrop = False
        else:
            pos = 0, 0
            bCrop = False

        # print(in_file)
        if os.path.exists(in_file) != True:
            assert(0);

        imagefile = in_file;
        image = Image.open(imagefile)

        if task == "img2bin":
            bNeedSaveCrop = True;
        else:
            bNeedSaveCrop = False;


        img_data = preprocess(image, clrspace, pos, crop_size, resize_size, keep_aspect_ratio, mode, bCrop, "temp.txt", bNeedSaveCrop, bCvtOnly)

        if task == "img2txt":
            out_file = os.path.join(output_folder, in_file.split("/")[-1].split(".")[0] + ".txt")
            np.savetxt(out_file, img_data.reshape((-1, 1)), fmt="%.8f")

        elif task == "img2bin":
            col, row = resize_size
            if kdp_version == 520:
                col_output = int(np.ceil(col*1.0/16)*16)
            else:
                col_output = col

            img_data = np.clip(img_data * (2**radix)  , a_min=-1 * 2**(bitwidth-1), a_max=2**(bitwidth-1) - 1)
            img_data = np.around(img_data).astype("int")
            out_file = os.path.join(output_folder, in_file.split("/")[-1].split(".")[0] + ".bin")
            binfile = out_file
            img_data_output = np.zeros((row, col_output, 4))


            if len(clrspace) == 3:
                img_data_output[:,:col,:3] = img_data
            else:
                img_data_output[:,:col,0] = img_data
                img_data_output[:,:col,1] = img_data
                img_data_output[:,:col,2] = img_data

            img_data_output.astype("int8").tofile(binfile)


def main_(args):
    img2txt_bin(args.input_folder, True, args.output_folder,
                args.s_h, args.s_w, args.color_space,
                args.mode, args.task,
                args.kdp_version,
                args.radix, args.bitwidth)
    return


if __name__ == "__main__":
    argparser = argparse.ArgumentParser(
        description="convert img to RGB, resize or crop, etc"
        )

    argparser.add_argument(
        '-t',
        '--task',
        help=" 'img2bin', 'img2txt'"
        )

    argparser.add_argument(
        '-i',
        '--input_file',
        help="input file name"
        )

    argparser.add_argument(
        '-if',
        '--input_folder',
        help="input folder path"
        )


    argparser.add_argument(
        '-o',
        '--output_file',
        help="output file name"
        )

    argparser.add_argument(
        '-of',
        '--output_folder',
        help="output folder path"
        )

    argparser.add_argument(
        '-c',
        '--color_space',
        help="L, RGB or BGR"
        )

    argparser.add_argument(
        '-s_w',
        '--width',
        type=int,
        help="output width for npu input",
        )
    argparser.add_argument(
        '-s_h',
        '--height',
        type=int,
        help="output height for npu input",
        )

    argparser.add_argument(
        '-x',
        '--x_pos',
        type=int,
        help="left up coordinate x",
        )

    argparser.add_argument(
        '-y',
        '--y_pos',
        type=int,
        help="left up coordinate y",
        )

    argparser.add_argument(
        '-c_w',
        '--crop_w',
        type=int,
        help="crop width",
        )

    argparser.add_argument(
        '-c_h',
        '--crop_h',
        type=int,
        help="crop height",
        )

    argparser.add_argument(
        '-a',
        '--keep_aspect_ratio',
        help="True/False to indicate maintain aspect_ratio or not"
        )

    argparser.add_argument(
        '-m',
        '--mode',
        help="normalizaton mode: yolo, kneron, caffe, tf , torch."
        )

    argparser.add_argument(
        '-bw',
        '--bitwidth',
        type=int,
        help="Int for bitwidth"
        )

    argparser.add_argument(
        '-r',
        '--radix',
        type=int,
        help="Int for radix"
        )

    argparser.add_argument(
        '-crop',
        '--enCrop',
        type=int,
        help="indicate if this task will act Crop"
        )

    argparser.add_argument(
        '-cvtOnly',
        '--convertOnly',
        type=int,
        help="indicate if just do format convert"
        )

    argparser.add_argument(
        '--kdp_version',
        type=int,
        default=520,
        help="kdp version"
        )

    args = argparser.parse_args()

    main_(args)