200字范文 > PASCAL VOC目标检测数据集格式转化为COCO目标检测数据集格式

PASCAL VOC目标检测数据集格式转化为COCO目标检测数据集格式

时间：2018-08-08 00:51:55

VOC目标检测数据集格式转化为COCO目标检测数据集格式

你只需要传入一个VOC数据集根路径可以直接运行，简直不要太方便好吧

需要材料：

VOC原始数据集一个pycharm项目文件夹（任意）

达到效果：

自动生成当前项目下的 data/coco 文件夹

VOC数据集标注格式转为标准的COCO数据集格式

图片按照训练集和验证集划分好

voc目标检测数据集格式

主要关注框起来的三个文件夹：

COCO数据集标注格式

annotation里面就是标注，而train和val是放的图片

转换代码如下：

文件复制到项目里面任何一个地方都能运行

运行代码：
python demo.py --voc_root={YOUR VOC} # 这里的 {YOUR VOC}就是你自己的voc路径

import osimport jsonimport xml.etree.ElementTree as ETimport argparseimport shutildef transform_voc2coco(args):voc_root = args.voc_root# 获取工程的根绝对路径project_root = os.path.realpath(os.curdir)while True:if ".idea" in os.listdir(project_root):breakelse:project_root = os.path.join(project_root, "..")# 构建COCO完整目录coco_root = os.path.join(project_root, "data/coco")coco_train = os.path.join(coco_root, "train")coco_val = os.path.join(coco_root, "val")coco_anno = os.path.join(coco_root, "annotations")coco_train_anno = os.path.join(coco_anno, "instances_train.json")coco_val_anno = os.path.join(coco_anno, "instances_val.json")if not os.path.exists(coco_root):os.makedirs(coco_root)os.mkdir(coco_train)os.mkdir(coco_val)os.mkdir(coco_anno)coco_train_num = len(os.listdir(coco_train))coco_val_num = len(os.listdir(coco_val))print(f"train_ number: {coco_train_num}")print(f"val_ number: {coco_val_num}")# voc数据集目录voc_anno_dir = os.path.join(voc_root, "Annotations")voc_images_dir = os.path.join(voc_root, "JPEGImages")voc_train_txt = os.path.join(voc_root, "ImageSets/Main/train.txt")voc_val_txt = os.path.join(voc_root, "ImageSets/Main/val.txt")overwrite_images = True# 复制voc图片到cocoif overwrite_images:# 复制训练集图片with open(voc_train_txt, "r") as f:lines = f.readlines()for line in lines:image_name = line.strip() + ".jpg"image_path = os.path.join(voc_images_dir, image_name)shutil.copy(image_path, os.path.join(coco_train, image_name))# 复制验证集图片with open(voc_val_txt, "r") as f:lines = f.readlines()for line in lines:image_name = line.strip() + ".jpg"image_path = os.path.join(voc_images_dir, image_name)shutil.copy(image_path, os.path.join(coco_val, image_name))def _extract_anno(fp, mode: str = "train"):txt_file = voc_train_txt if mode == "train" else voc_val_txt# 预定义VOC检测的20个类别以及超类supercategorys = ["vehicles", "household", "animals", "person"]vehicles = ["car", "bus", "bicycle", "motorbike", "aeroplane", "boat", "train"]household = ["chair", "sofa", "diningtable", "tvmonitor", "bottle", "pottedplant"]animals = ["cat", "dog", "cow", "horse", "sheep", "bird"]person = ["person"]classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable","dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]# 预构建coco格式的json文件json_file = {"info": [], "license": [], "images": [], "annotations": [],"categories": [{"id": i, "name": class_,"supercategory": ""} for i, class_ in enumerate(classes)]}for i, class_ in enumerate(classes):if class_ in vehicles:json_file["categories"][i]["supercategory"] = supercategorys[0]elif class_ in household:json_file["categories"][i]["supercategory"] = supercategorys[1]elif class_ in animals:json_file["categories"][i]["supercategory"] = supercategorys[2]elif class_ in person:json_file["categories"][i]["supercategory"] = supercategorys[3]else:raise "unsupported class"# 写入json文件with open(txt_file, "r") as f_:lines = f_.readlines()for line in lines:image_xml = line.strip() + ".xml"image_xml_path = os.path.join(voc_anno_dir, image_xml)xml_obj = ET.parse(image_xml_path)root = xml_obj.getroot()img_dir = {"file_name": "", "width": 0, "height": 0, "objects": []}i = 0for eles in root:if eles.tag == "filename":img_dir["file_name"] = eles.textelif eles.tag == "size":for ele in eles:img_dir["width"] = int(ele.text) if ele.tag == "width" else int(img_dir["width"])img_dir["height"] = int(ele.text) if ele.tag == "height" else int(img_dir["height"])elif eles.tag == "object":obj_dir = {"name": "", "bndbox": [], "image_id": int(img_dir["file_name"].split(".")[0]),"id": i}i = i + 1for ele in eles:obj_dir["name"] = ele.text if ele.tag == "name" else obj_dir["name"]if ele.tag == "bndbox":for pos in ele:if pos.tag == "xmin":xmin = int(pos.text)elif pos.tag == "xmax":xmax = int(pos.text)elif pos.tag == "ymin":ymin = int(pos.text)elif pos.tag == "ymax":ymax = int(pos.text)else:raise "unsupported pose"obj_dir["bndbox"] = [xmin, ymin, xmax-xmin, ymax-ymin]img_dir["objects"].append(obj_dir)json_file["annotations"].append({"id": obj_dir["id"],"image_id": obj_dir["image_id"],"category_id": classes.index(obj_dir["name"]),"segmentation": [],"area": float(obj_dir["bndbox"][2]*obj_dir["bndbox"][3]),"bbox": obj_dir["bndbox"],"iscrowd": 0})else:continuejson_file["images"].append({"file_name": img_dir["file_name"],"height": img_dir["height"],"width": img_dir["width"],"id": int(img_dir["file_name"].split(".")[0])})json.dump(json_file, fp)# 生成coco的annotation标注文件override_anno = True# 有一个文件不存在或者需要覆盖掉之前的标注if not os.path.exists(coco_train_anno) or not os.path.exists(coco_val_anno) or override_anno:with open(coco_train_anno, "w") as f:_extract_anno(f, "train")with open(coco_val_anno, "w") as f:_extract_anno(f, "val")print("-" * 30 + "finish" + "-" * 30)if __name__ == '__main__':parser = argparse.ArgumentParser()parser.add_argument('--voc_root', type=str, default="/project/datasets/VOC")args = parser.parse_args()transform_voc2coco(args)

本内容不代表本网观点和政治立场，如有侵犯你的权益请联系我们处理。

网友评论

网友评论仅供其表达个人看法，并不表明网站立场。