166 lines
6.0 KiB
Python

import os
import argparse
import json
import requests
def download_image(image_url, image_dir):
'''Downloads image from image_url to image_dir if the image doesn't exist.'''
file_name = image_url.split('/')[-1]
file_path = os.path.join(image_dir, file_name)
if os.path.exists(file_path):
verbose("[INFO]%s exists, skipping download" % file_name, v_flag)
return file_path
response = requests.get(image_url)
if response.status_code == 200:
with open(file_path, 'wb') as file:
file.write(response.content)
verbose("[INFO]Downloaded %s" % file_name, v_flag)
return file_path
else:
print('[WARNING]Unable to download image, skipping...')
return False
def generate_annotation(label, data):
'''Generate annotation from the json file.'''
image_width = data['imageWidth']
image_height = data['imageHeight']
#if four coordinates of the bounding box is given
if len(data['points']) == 4:
xmin = image_width * min(data['points'][0][0], data['points'][1][0], data['points'][2][0], data['points'][3][0])
ymin = image_height * min(data['points'][0][1], data['points'][1][1], data['points'][2][1], data['points'][3][1])
xmax = image_width * max(data['points'][0][0], data['points'][1][0], data['points'][2][0], data['points'][3][0])
ymax = image_height * max(data['points'][0][1], data['points'][1][1], data['points'][2][1], data['points'][3][1])
#if diagonal coordinates given
else:
xmin = int(data['points'][0]['x'] * image_width)
ymin = int(data['points'][0]['y'] * image_height)
xmax = int(data['points'][1]['x'] * image_width)
ymax = int(data['points'][1]['y'] * image_height)
#calculating coodinate ratios as required for training yolo
x_center = ((xmax + xmin) / 2.0) / image_width
y_center = ((ymax + ymin) / 2.0) / image_height
width = (xmax - xmin) / image_width
height = (ymax - ymin) / image_height
return ("%.6f %.6f %.6f %.6f\n"% (x_center, y_center, width, height))
def convert_to_yolo_annotation():
classes = []
train_txt = []
with open(dataturks_json_path, 'r') as file:
lines = file.readlines()
for line in lines:
data = json.loads(line)
if data['annotation'] == None:
continue
file_path = download_image(data['content'], image_dir)
if not file_path:
continue
annotation = ''
for item in data['annotation']:
if item['label'] == None:
continue
labels = item['label']
if not isinstance(labels, list):
labels = [labels]
for label in labels:
if label not in classes:
classes.append(label)
annotation = annotation + str(classes.index(label)) + ' ' + generate_annotation(label, item)
train_txt.append(str(os.path.abspath(file_path)) + '\n')
annotation_file = '.'.join(file_path.split('.')[:-1]) + '.txt'
with open(annotation_file, 'w') as f:
f.write(annotation)
verbose("[INFO]%s file generated." % annotation_file, v_flag)
with open(os.path.join(yolo_dir, 'train.txt'), 'w') as file:
file.writelines(train_txt)
verbose("[INFO]train.txt file generated.", v_flag)
return classes
def generate_yolo_cfg_files(classes):
with open(os.path.join(yolo_dir, 'obj.names'), 'w') as file:
for item in classes:
file.write(item + '\n')
verbose("[INFO]obj.names file generated.", v_flag)
with open(os.path.join(yolo_dir, 'obj.data'), 'w') as file:
file.write('classes = %s\ntrain = %s\nnames = %s\nbackup = %s' %
(str(len(classes)),
str(os.path.join(os.path.abspath(yolo_dir), 'train.txt')),
str(os.path.join(os.path.abspath(yolo_dir), 'obj.names')),
str(os.path.join(os.path.abspath(yolo_dir), 'backup/'))
)
)
verbose("[INFO]obj.data file generated.", v_flag)
n_classes = len(classes)
n_filters = (n_classes + 5) * 3
with open(os.path.join(yolo_dir, 'yolov3.cfg'), 'w') as file:
with open('yolov3.cfg.template') as template:
lines = template.readlines()
for i in range(len(lines)):
lines[i] = lines[i].replace('#FILTER#', str(n_filters))
lines[i] = lines[i].replace('#CLASS#', str(n_classes))
file.writelines(lines)
verbose("[INFO]yolov3.cfg file generated.", v_flag)
def main():
if not os.path.isdir(image_dir):
print('[ERROR]The directory %s does not exist' % os.path.abspath(image_dir))
return
if not os.path.exists(dataturks_json_path):
print('[ERROR]The specified json file does not exitst')
return
if not os.path.isdir(yolo_dir):
print('[ERROR]The directory %s does not exist' % os.path.abspath(yolo_dir))
return
classes = convert_to_yolo_annotation()
generate_yolo_cfg_files(classes)
def arg_parser():
parser = argparse.ArgumentParser(description = 'Converts Dataturks JSON format to yolo-darknet format.')
parser.add_argument('-v', help = 'Verbose output.', action = 'store_true')
parser.add_argument('-d', '--dataturks_json_path', required = True, help = 'Path to the Dataturks JSON file.')
parser.add_argument('-i', '--image_dir', required = True, help = 'Path to the directory where the images with annotations will be stored.')
parser.add_argument('-y', '--yolo_dir', required = True, help = 'Path to the directory where the files for training YOLO will be stored.')
return parser.parse_args()
def verbose(message, v_flag):
if v_flag == True:
print(message)
if __name__ == '__main__':
args = arg_parser()
global dataturks_json_path
global image_dir
global yolo_dir
global v_flag
dataturks_json_path = args.dataturks_json_path
image_dir = args.image_dir
yolo_dir = args.yolo_dir
v_flag = args.v
main()