196 lines
6.4 KiB
Python
196 lines
6.4 KiB
Python
import json
|
|
import csv
|
|
import os
|
|
import argparse
|
|
import sys
|
|
import numpy as np
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
|
|
def json2csv(data_dir, json_name, csv_name):
|
|
json_name = os.path.join(data_dir, json_name)
|
|
f_json = open(json_name)
|
|
data = json.load(f_json)
|
|
f_json.close()
|
|
|
|
image_name = data['imagePath']
|
|
with open(csv_name, 'a') as csvfile:
|
|
csvwriter = csv.writer(csvfile)
|
|
for info in data['shapes']:
|
|
l,t,r,b = str(info['points'][0][0]), str(info['points'][0][1]), str(info['points'][1][0]), str(info['points'][1][1])
|
|
row = [[image_name, l,t,r,b, info['label'] ]]
|
|
csvwriter.writerows(row)
|
|
|
|
def create_csv(data_dir, save_dir):
|
|
|
|
fields = ['img_id', 'xmin', 'ymin', 'xmax', 'ymax', 'class_id']
|
|
save_path = os.path.join(save_dir, 'img_info.csv')
|
|
with open(save_path, 'w') as csvfile:
|
|
csvwriter = csv.writer(csvfile)
|
|
csvwriter.writerow(fields)
|
|
|
|
for file in os.listdir(data_dir):
|
|
if file.split('.')[-1] == 'json':
|
|
json2csv(data_dir, file, save_path)
|
|
|
|
def split_data(data_cvs_path, save_dir):
|
|
|
|
input_file = pd.read_csv(data_cvs_path)
|
|
ids = np.array(list(set(input_file['img_id'])))
|
|
|
|
ids = np.array(ids)
|
|
n = len(ids)
|
|
|
|
train_idx = np.random.choice(n, int(n*0.8), replace=False)
|
|
val_idx = np.setdiff1d(np.arange(n), train_idx)
|
|
|
|
train_img, val_img = set(ids[train_idx]), set(ids[val_idx])
|
|
|
|
mask = []
|
|
mask_ = []
|
|
for i in range(len(input_file['img_id'].values)):
|
|
if input_file['img_id'].values[i] in train_img:
|
|
mask.append(True)
|
|
mask_.append(False)
|
|
else:
|
|
mask.append(False)
|
|
mask_.append(True)
|
|
|
|
train_csv = input_file[mask]
|
|
val_csv = input_file[mask_]
|
|
train_csv.to_csv(os.path.join(save_dir, 'train_info.csv'), sep=',',index=False)
|
|
val_csv.to_csv(os.path.join(save_dir, 'val_info.csv'), sep=',',index=False)
|
|
|
|
|
|
def create_class_mapping(data_cvs_path, save_dir):
|
|
|
|
input_file = pd.read_csv(data_cvs_path)
|
|
ids = list(set(input_file['class_id']))
|
|
ids.sort()
|
|
|
|
save_path = os.path.join(save_dir,'class_id.csv')
|
|
|
|
with open(save_path, 'w') as f:
|
|
write = csv.writer(f, delimiter=",")
|
|
for i, name in enumerate(ids):
|
|
write.writerow([name,str(i)])
|
|
|
|
def prepare(anno_cvs_path, save_dir):
|
|
input_file = pd.read_csv(anno_cvs_path, header=None)
|
|
ids = np.array(list(set(input_file[5])))
|
|
n = len(ids)
|
|
|
|
train_idx = np.random.choice(n, int(n*0.8), replace=False)
|
|
val_idx = np.setdiff1d(np.arange(n), train_idx)
|
|
|
|
train_img, val_img = set(ids[train_idx]), set(ids[val_idx])
|
|
data = {}
|
|
data['img_id'] = input_file[5]
|
|
data['xmin'], data['ymin'] = input_file[1], input_file[2]
|
|
data['xmax'], data['ymax'] = input_file[1]+input_file[3], input_file[2]+input_file[4]
|
|
data['class_id'] = input_file[0]
|
|
df = pd.DataFrame(data)
|
|
|
|
mask = []
|
|
mask_ = []
|
|
for i in range(len(df['img_id'].values)):
|
|
if df['img_id'].values[i] in train_img:
|
|
mask.append(True)
|
|
mask_.append(False)
|
|
else:
|
|
mask.append(False)
|
|
mask_.append(True)
|
|
|
|
train_csv = df[mask]
|
|
val_csv = df[mask_]
|
|
print('saving train info into ', os.path.join(save_dir, 'train_info.csv'))
|
|
train_csv.to_csv(os.path.join(save_dir, 'train_info.csv'), sep=',',index=False)
|
|
print('saving val info into ', os.path.join(save_dir, 'val_info.csv'))
|
|
val_csv.to_csv(os.path.join(save_dir, 'val_info.csv'), sep=',',index=False)
|
|
|
|
class_id = list(set(data['class_id']))
|
|
class_id.sort()
|
|
save_path = os.path.join(save_dir,'class_id.csv')
|
|
print('saving class id mapping into ', os.path.join(save_dir, 'class_id.csv'))
|
|
with open(save_path, 'w') as f:
|
|
write = csv.writer(f, delimiter=",")
|
|
for i, name in enumerate(class_id):
|
|
write.writerow([name,str(i)])
|
|
|
|
def prepare_txt(train_dir, id_mapping, trainset = True):
|
|
save_dir = os.path.split(train_dir)
|
|
if len(save_dir[1]) == 0:
|
|
save_dir = os.path.split(save_dir[0])
|
|
trainval = save_dir[1]
|
|
save_dir = save_dir[0]
|
|
|
|
par_dir = os.path.split(save_dir)[0]
|
|
txt_path = os.path.join(par_dir, 'labels', trainval)
|
|
imgs_path = train_dir
|
|
|
|
fields = ['img_id', 'xmin', 'ymin', 'xmax', 'ymax', 'class_id']
|
|
if trainset:
|
|
save_path = os.path.join(save_dir, 'train_info.csv')
|
|
else:
|
|
save_path = os.path.join(save_dir, 'val_info.csv')
|
|
|
|
with open(save_path, 'w') as csvfile:
|
|
csvwriter = csv.writer(csvfile)
|
|
csvwriter.writerow(fields)
|
|
|
|
for txt_file in os.listdir(txt_path):
|
|
if txt_file[0] == '.':
|
|
continue
|
|
txt_file_path = os.path.join(txt_path, txt_file)
|
|
img_id = txt_file.split('.')[0]+'.jpg'
|
|
img_path = os.path.join(imgs_path, img_id)
|
|
image = plt.imread(img_path)
|
|
try:
|
|
h,w,_ = image.shape
|
|
except:
|
|
h,w = image.shape
|
|
with open(txt_file_path, 'r') as fp:
|
|
content = fp.readlines()
|
|
with open(save_path, 'a') as csvfile:
|
|
csvwriter = csv.writer(csvfile)
|
|
|
|
for data in content:
|
|
class_id,cx,cy,cw,ch = data.split(' ')
|
|
class_id = id_mapping[int(class_id)]
|
|
cx,cy,cw,ch = float(cx)*w, float(cy)*h, float(cw)*w, float(ch)*h
|
|
|
|
l,t,r,b = str(cx-cw/2), str(cy-ch/2), str(cx+cw/2), str(cy+ch/2)
|
|
row = [[img_id, l,t,r,b, class_id ]]
|
|
csvwriter.writerows(row)
|
|
return save_path
|
|
|
|
def parse_args(args):
|
|
"""
|
|
Parse the arguments.
|
|
"""
|
|
parser = argparse.ArgumentParser(description='preprocessing csv data')
|
|
parser.add_argument('annopath', help='path to csv file downloaded from makesense.ai')
|
|
return parser.parse_args(args)
|
|
|
|
def main(args = None):
|
|
if args is None:
|
|
args = sys.argv[1:]
|
|
args = parse_args(args)
|
|
|
|
save_dir = os.path.split(args.annopath)
|
|
if len(save_dir[1]) == 0:
|
|
save_dir = os.path.split(save_dir[0])
|
|
save_dir = save_dir[0]
|
|
|
|
'''
|
|
create_csv(args.anno_path, save_dir)
|
|
data_cvs_path = os.path.join(save_dir, 'img_info.csv')
|
|
split_data(data_cvs_path, save_dir)
|
|
create_class_mapping(data_cvs_path, save_dir)
|
|
'''
|
|
|
|
prepare(args.annopath, save_dir)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|