diff --git a/Dataturks-to-YOLOv3/README.md b/Dataturks-to-YOLOv3/README.md new file mode 100644 index 0000000..4961994 --- /dev/null +++ b/Dataturks-to-YOLOv3/README.md @@ -0,0 +1,13 @@ +# Convert Dataturks json to YOLO format +The ```convert.py``` script downloads the images and converts the annotations from the Dataturks json file to YOLO format. +## Usage +1. Clone or download this repository. +2. Create folders for saving images and YOLO config files. +3. Execute: +``` +python3 convert.py -d -i -y +``` +Note: ```-v``` flag can be used for detailed output. +## Output +1. Downloaded images along with their annotations in the specified directory. +2. YOLO config files ```train.txt, obj.names, obj.data, yolov3.cfg``` saved in the specified directory. diff --git a/Dataturks-to-YOLOv3/convert.py b/Dataturks-to-YOLOv3/convert.py new file mode 100644 index 0000000..6ce72b7 --- /dev/null +++ b/Dataturks-to-YOLOv3/convert.py @@ -0,0 +1,165 @@ +import os +import argparse +import json +import requests + +def download_image(image_url, image_dir): + '''Downloads image from image_url to image_dir if the image doesn't exist.''' + + file_name = image_url.split('/')[-1] + file_path = os.path.join(image_dir, file_name) + if os.path.exists(file_path): + verbose("[INFO]%s exists, skipping download" % file_name, v_flag) + return file_path + + response = requests.get(image_url) + if response.status_code == 200: + with open(file_path, 'wb') as file: + file.write(response.content) + verbose("[INFO]Downloaded %s" % file_name, v_flag) + return file_path + else: + print('[WARNING]Unable to download image, skipping...') + return False + +def generate_annotation(label, data): + '''Generate annotation from the json file.''' + + image_width = data['imageWidth'] + image_height = data['imageHeight'] + + #if four coordinates of the bounding box is given + if len(data['points']) == 4: + xmin = image_width * min(data['points'][0][0], data['points'][1][0], data['points'][2][0], data['points'][3][0]) + ymin = image_height * min(data['points'][0][1], data['points'][1][1], data['points'][2][1], data['points'][3][1]) + xmax = image_width * max(data['points'][0][0], data['points'][1][0], data['points'][2][0], data['points'][3][0]) + ymax = image_height * max(data['points'][0][1], data['points'][1][1], data['points'][2][1], data['points'][3][1]) + + #if diagonal coordinates given + else: + xmin = int(data['points'][0]['x'] * image_width) + ymin = int(data['points'][0]['y'] * image_height) + xmax = int(data['points'][1]['x'] * image_width) + ymax = int(data['points'][1]['y'] * image_height) + + #calculating coodinate ratios as required for training yolo + x_center = ((xmax + xmin) / 2.0) / image_width + y_center = ((ymax + ymin) / 2.0) / image_height + width = (xmax - xmin) / image_width + height = (ymax - ymin) / image_height + + return ("%.6f %.6f %.6f %.6f\n"% (x_center, y_center, width, height)) + +def convert_to_yolo_annotation(): + classes = [] + train_txt = [] + with open(dataturks_json_path, 'r') as file: + lines = file.readlines() + for line in lines: + data = json.loads(line) + if data['annotation'] == None: + continue + + file_path = download_image(data['content'], image_dir) + + if not file_path: + continue + + annotation = '' + + for item in data['annotation']: + if item['label'] == None: + continue + + labels = item['label'] + if not isinstance(labels, list): + labels = [labels] + + for label in labels: + if label not in classes: + classes.append(label) + + annotation = annotation + str(classes.index(label)) + ' ' + generate_annotation(label, item) + + train_txt.append(str(os.path.abspath(file_path)) + '\n') + + annotation_file = '.'.join(file_path.split('.')[:-1]) + '.txt' + + with open(annotation_file, 'w') as f: + f.write(annotation) + verbose("[INFO]%s file generated." % annotation_file, v_flag) + + with open(os.path.join(yolo_dir, 'train.txt'), 'w') as file: + file.writelines(train_txt) + verbose("[INFO]train.txt file generated.", v_flag) + + return classes + +def generate_yolo_cfg_files(classes): + + with open(os.path.join(yolo_dir, 'obj.names'), 'w') as file: + for item in classes: + file.write(item + '\n') + verbose("[INFO]obj.names file generated.", v_flag) + + with open(os.path.join(yolo_dir, 'obj.data'), 'w') as file: + file.write('classes = %s\ntrain = %s\nnames = %s\nbackup = %s' % + (str(len(classes)), + str(os.path.join(os.path.abspath(yolo_dir), 'train.txt')), + str(os.path.join(os.path.abspath(yolo_dir), 'obj.names')), + str(os.path.join(os.path.abspath(yolo_dir), 'backup/')) + ) + ) + verbose("[INFO]obj.data file generated.", v_flag) + + n_classes = len(classes) + + n_filters = (n_classes + 5) * 3 + + with open(os.path.join(yolo_dir, 'yolov3.cfg'), 'w') as file: + with open('yolov3.cfg.template') as template: + lines = template.readlines() + for i in range(len(lines)): + lines[i] = lines[i].replace('#FILTER#', str(n_filters)) + lines[i] = lines[i].replace('#CLASS#', str(n_classes)) + file.writelines(lines) + verbose("[INFO]yolov3.cfg file generated.", v_flag) + + +def main(): + if not os.path.isdir(image_dir): + print('[ERROR]The directory %s does not exist' % os.path.abspath(image_dir)) + return + if not os.path.exists(dataturks_json_path): + print('[ERROR]The specified json file does not exitst') + return + if not os.path.isdir(yolo_dir): + print('[ERROR]The directory %s does not exist' % os.path.abspath(yolo_dir)) + return + classes = convert_to_yolo_annotation() + generate_yolo_cfg_files(classes) + + +def arg_parser(): + parser = argparse.ArgumentParser(description = 'Converts Dataturks JSON format to yolo-darknet format.') + parser.add_argument('-v', help = 'Verbose output.', action = 'store_true') + parser.add_argument('-d', '--dataturks_json_path', required = True, help = 'Path to the Dataturks JSON file.') + parser.add_argument('-i', '--image_dir', required = True, help = 'Path to the directory where the images with annotations will be stored.') + parser.add_argument('-y', '--yolo_dir', required = True, help = 'Path to the directory where the files for training YOLO will be stored.') + return parser.parse_args() + +def verbose(message, v_flag): + if v_flag == True: + print(message) + +if __name__ == '__main__': + args = arg_parser() + global dataturks_json_path + global image_dir + global yolo_dir + global v_flag + dataturks_json_path = args.dataturks_json_path + image_dir = args.image_dir + yolo_dir = args.yolo_dir + v_flag = args.v + main() diff --git a/Dataturks-to-YOLOv3/yolov3.cfg.template b/Dataturks-to-YOLOv3/yolov3.cfg.template new file mode 100644 index 0000000..eff4489 --- /dev/null +++ b/Dataturks-to-YOLOv3/yolov3.cfg.template @@ -0,0 +1,789 @@ +[net] +# Testing +batch=1 +subdivisions=1 +# Training +# batch=64 +# subdivisions=16 +width=416 +height=416 +channels=3 +momentum=0.9 +decay=0.0005 +angle=0 +saturation = 1.5 +exposure = 1.5 +hue=.1 + +learning_rate=0.001 +burn_in=1000 +max_batches = 500200 +policy=steps +steps=400000,450000 +scales=.1,.1 + +[convolutional] +batch_normalize=1 +filters=32 +size=3 +stride=1 +pad=1 +activation=leaky + +# Downsample + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=32 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=64 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +# Downsample + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=2 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=1024 +size=3 +stride=1 +pad=1 +activation=leaky + +[shortcut] +from=-3 +activation=linear + +###################### + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=512 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=1024 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=#FILTER# +activation=linear + + +[yolo] +mask = 6,7,8 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=#CLASS# +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 61 + + + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=256 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=512 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=#FILTER# +activation=linear + + +[yolo] +mask = 3,4,5 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=#CLASS# +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 + + + +[route] +layers = -4 + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[upsample] +stride=2 + +[route] +layers = -1, 36 + + + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +batch_normalize=1 +filters=128 +size=1 +stride=1 +pad=1 +activation=leaky + +[convolutional] +batch_normalize=1 +size=3 +stride=1 +pad=1 +filters=256 +activation=leaky + +[convolutional] +size=1 +stride=1 +pad=1 +filters=#FILTER# +activation=linear + + +[yolo] +mask = 0,1,2 +anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 +classes=#CLASS# +num=9 +jitter=.3 +ignore_thresh = .7 +truth_thresh = 1 +random=1 +