Awesome-Python-Scripts/Dataturks-to-YOLOv3/convert.py

import os
import argparse
import json
import requests

def download_image(image_url, image_dir):
    '''Downloads image from image_url to image_dir if the image doesn't exist.'''

    file_name = image_url.split('/')[-1]
    file_path = os.path.join(image_dir, file_name)
    if os.path.exists(file_path):
    	verbose("[INFO]%s exists, skipping download" % file_name, v_flag)
    	return file_path

    response = requests.get(image_url)
    if response.status_code == 200:
        with open(file_path, 'wb') as file:
            file.write(response.content)
        verbose("[INFO]Downloaded %s" % file_name, v_flag)
        return file_path
    else:
        print('[WARNING]Unable to download image, skipping...')
        return False

def generate_annotation(label, data):
    '''Generate annotation from the json file.'''

    image_width = data['imageWidth']
    image_height = data['imageHeight']

    #if four coordinates of the bounding box is given
    if len(data['points']) == 4:
        xmin = image_width * min(data['points'][0][0], data['points'][1][0], data['points'][2][0], data['points'][3][0])
        ymin = image_height * min(data['points'][0][1], data['points'][1][1], data['points'][2][1], data['points'][3][1])
        xmax = image_width * max(data['points'][0][0], data['points'][1][0], data['points'][2][0], data['points'][3][0])
        ymax = image_height * max(data['points'][0][1], data['points'][1][1], data['points'][2][1], data['points'][3][1])

    #if diagonal coordinates given
    else:
        xmin = int(data['points'][0]['x'] * image_width)
        ymin = int(data['points'][0]['y'] * image_height)
        xmax = int(data['points'][1]['x'] * image_width)
        ymax = int(data['points'][1]['y'] * image_height)

    #calculating coodinate ratios as required for training yolo
    x_center = ((xmax + xmin) / 2.0) / image_width
    y_center = ((ymax + ymin) / 2.0) / image_height
    width = (xmax - xmin) / image_width
    height = (ymax - ymin) / image_height

    return ("%.6f %.6f %.6f %.6f\n"% (x_center, y_center, width, height))

def convert_to_yolo_annotation():
    classes = []
    train_txt = []
    with open(dataturks_json_path, 'r') as file:
        lines = file.readlines()
    for line in lines:
        data = json.loads(line)
        if data['annotation'] == None:
            continue

        file_path = download_image(data['content'], image_dir)

        if not file_path:
            continue

        annotation = ''

        for item in data['annotation']:
            if item['label'] == None:
                continue

            labels = item['label']
            if not isinstance(labels, list):
                labels = [labels]

            for label in labels:
                if label not in classes:
                    classes.append(label)

                annotation = annotation + str(classes.index(label)) + ' ' + generate_annotation(label, item)

        train_txt.append(str(os.path.abspath(file_path)) + '\n')

        annotation_file = '.'.join(file_path.split('.')[:-1]) + '.txt'

        with open(annotation_file, 'w') as f:
            f.write(annotation)
        verbose("[INFO]%s file generated." % annotation_file, v_flag)

    with open(os.path.join(yolo_dir, 'train.txt'), 'w') as file:
    	file.writelines(train_txt)
    verbose("[INFO]train.txt file generated.", v_flag)

    return classes

def generate_yolo_cfg_files(classes):

    with open(os.path.join(yolo_dir, 'obj.names'), 'w') as file:
        for item in classes:
            file.write(item + '\n')
    verbose("[INFO]obj.names file generated.", v_flag)

    with open(os.path.join(yolo_dir, 'obj.data'), 'w') as file:
        file.write('classes = %s\ntrain = %s\nnames = %s\nbackup = %s' %
                   (str(len(classes)),
                    str(os.path.join(os.path.abspath(yolo_dir), 'train.txt')),
                    str(os.path.join(os.path.abspath(yolo_dir), 'obj.names')),
                    str(os.path.join(os.path.abspath(yolo_dir), 'backup/'))
                   )
        )
    verbose("[INFO]obj.data file generated.", v_flag)

    n_classes = len(classes)

    n_filters = (n_classes + 5) * 3

    with open(os.path.join(yolo_dir, 'yolov3.cfg'), 'w') as file:
        with open('yolov3.cfg.template') as template:
            lines = template.readlines()
        for i in range(len(lines)):
            lines[i] = lines[i].replace('#FILTER#', str(n_filters))
            lines[i] = lines[i].replace('#CLASS#', str(n_classes))
        file.writelines(lines)
    verbose("[INFO]yolov3.cfg file generated.", v_flag)


def main():
    if not os.path.isdir(image_dir):
        print('[ERROR]The directory %s does not exist' % os.path.abspath(image_dir))
        return
    if not os.path.exists(dataturks_json_path):
        print('[ERROR]The specified json file does not exitst')
        return
    if not os.path.isdir(yolo_dir):
        print('[ERROR]The directory %s does not exist' % os.path.abspath(yolo_dir))
        return
    classes = convert_to_yolo_annotation()
    generate_yolo_cfg_files(classes)


def arg_parser():
    parser = argparse.ArgumentParser(description = 'Converts Dataturks JSON format to yolo-darknet format.')
    parser.add_argument('-v', help = 'Verbose output.', action = 'store_true')
    parser.add_argument('-d', '--dataturks_json_path', required = True, help = 'Path to the Dataturks JSON file.')
    parser.add_argument('-i', '--image_dir', required = True, help = 'Path to the directory where the images with annotations will be stored.')
    parser.add_argument('-y', '--yolo_dir', required = True, help = 'Path to the directory where the files for training YOLO will be stored.')
    return parser.parse_args()

def verbose(message, v_flag):
	if v_flag == True:
		print(message)

if __name__ == '__main__':
    args = arg_parser()
    global dataturks_json_path
    global image_dir
    global yolo_dir
    global v_flag
    dataturks_json_path = args.dataturks_json_path
    image_dir = args.image_dir
    yolo_dir = args.yolo_dir
    v_flag = args.v
    main()