自标注目标检测数据集（labelme）转voc\coco格式，并切图处理

http://www.icodebang.com/article/355859.html

highlight: github
theme: github

这篇博客记录了我处理自标注的目标检测数据集的过程，由于数据集中小目标占比较大，处理的目标是希望将数据集中图片切割成小块。过程相对比较繁琐，因此在此记录，以便有同样需求的同学参考，也方便自己回顾。有任何问题或者有更好的方法，也希望在评论区指出，共同进步。

labelme转voc

这个过程网上有很多的代码可供参考，我使用下面代码作为转换。labelme标注的结果应该是一个文件夹里面既有图片，也有同名的txt文件提供标签信息。

Voc格式的数据遵循以下目录

	VOC_ROOT #根目录\
	├── JPEGImages # 存放源图片\
	│ ├── aaaa.jpg\
	│ ├── bbbb.jpg\
	│ └── cccc.jpg\
	├── Annotations # 存放[xml]文件，与JPEGImages中的图片一一对应，解释图片的内容\
	│ ├── aaaa.xml\
	│ ├── bbbb.xml\
	│ └── cccc.xml\
	└── ImageSets\
	└── Main\
	├── train.txt # txt文件中每一行包含一个图片的名称\
	└── val.txt

下面是转换的代码labelme2voc.py

	import os
	from typing import List, Any
	import numpy as np
	import codecs
	import json
	from glob import glob
	import cv2
	import shutil
	from sklearn.model_selection import train_test_split
	# 1.标签路径
	labelme_imgpath = r"" # 原始labelme数据图片路径
	labelme_annorpath = r"" #labelme数据标签路径（txt）
	saved_path = r"" # 保存路径
	isUseTest = True # 是否创建test集
	# 2.创建要求文件夹
	if not os.path.exists(saved_path + "Annotations"):
	os.makedirs(saved_path + "Annotations")
	if not os.path.exists(saved_path + "JPEGImages/"):
	os.makedirs(saved_path + "JPEGImages/")
	if not os.path.exists(saved_path + "ImageSets/Main/"):
	os.makedirs(saved_path + "ImageSets/Main/")
	# 3.获取待处理文件
	files = glob(labelme_annorpath+ "*.json")
	files = [i.replace("\", "/").split("/")[-1].split(".json")[0] for i in files]
	#print(files)
	# 4.读取标注信息并写入xml
	for json_file_ in files:
	json_filename = labelme_annorpath + json_file_ + ".json"
	json_file = json.load(open(json_filename, "r", encoding="utf-8"))
	height, width, channels = cv2.imread(labelme_imgpath + json_file_ + ".jpg").shape
	with codecs.open(saved_path + "Annotations/" + json_file_ + ".xml", "w", "utf-8") as xml:
	xml.write('<annotation>\n')
	xml.write('\t<folder>' + 'WH_data' + '</folder>\n')
	xml.write('\t<filename>' + json_file_ + ".jpg" + '</filename>\n')
	xml.write('\t<source>\n')
	xml.write('\t\t<database>WH Data</database>\n')
	xml.write('\t\t<annotation>WH</annotation>\n')
	xml.write('\t\t<image>flickr</image>\n')
	xml.write('\t\t<flickrid>NULL</flickrid>\n')
	xml.write('\t</source>\n')
	xml.write('\t<owner>\n')
	xml.write('\t\t<flickrid>NULL</flickrid>\n')
	xml.write('\t\t<name>WH</name>\n')
	xml.write('\t</owner>\n')
	xml.write('\t<size>\n')
	xml.write('\t\t<width>' + str(width) + '</width>\n')
	xml.write('\t\t<height>' + str(height) + '</height>\n')
	xml.write('\t\t<depth>' + str(channels) + '</depth>\n')
	xml.write('\t</size>\n')
	xml.write('\t\t<segmented>0</segmented>\n')
	for multi in json_file["shapes"]:
	points = np.array(multi["points"])
	labelName = multi["label"]
	xmin = min(points[:, 0])
	xmax = max(points[:, 0])
	ymin = min(points[:, 1])
	ymax = max(points[:, 1])
	label = multi["label"]
	if xmax <= xmin:
	pass
	elif ymax <= ymin:
	pass
	else:
	xml.write('\t<object>\n')
	xml.write('\t\t<name>' + labelName + '</name>\n')
	xml.write('\t\t<pose>Unspecified</pose>\n')
	xml.write('\t\t<truncated>1</truncated>\n')
	xml.write('\t\t<difficult>0</difficult>\n')
	xml.write('\t\t<bndbox>\n')
	xml.write('\t\t\t<xmin>' + str(int(xmin)) + '</xmin>\n')
	xml.write('\t\t\t<ymin>' + str(int(ymin)) + '</ymin>\n')
	xml.write('\t\t\t<xmax>' + str(int(xmax)) + '</xmax>\n')
	xml.write('\t\t\t<ymax>' + str(int(ymax)) + '</ymax>\n')
	xml.write('\t\t</bndbox>\n')
	xml.write('\t</object>\n')
	print(json_filename, xmin, ymin, xmax, ymax, label)
	xml.write('</annotation>')
	# 5.复制图片到 VOC2007/JPEGImages/下
	image_files = glob(labelme_imgpath + "*.jpg")
	print("copy image files to VOC007/JPEGImages/")
	for image in image_files:
	shutil.copy(image, saved_path + "JPEGImages/")
	# 6.split files for txt
	txtsavepath = saved_path + "ImageSets/Main/"
	ftrainval = open(txtsavepath + '/trainval.txt', 'w')
	ftest = open(txtsavepath + '/test.txt', 'w')
	ftrain = open(txtsavepath + '/train.txt', 'w')
	fval = open(txtsavepath + '/val.txt', 'w')
	total_files = glob("D:/DATASET_for_CNN/labelme_data_new/VOC2007/Annotations/*.xml")
	total_files = [i.replace("\", "/").split("/")[-1].split(".xml")[0] for i in total_files]
	trainval_files = []
	test_files = []
	if isUseTest:
	trainval_files, test_files = train_test_split(total_files, test_size=0.15, random_state=55)
	else:
	trainval_files = total_files
	for file in trainval_files:
	ftrainval.write(file + "\n")
	# split
	train_files, val_files = train_test_split(trainval_files, test_size=0.15, random_state=55)
	# train
	for file in train_files:
	ftrain.write(file + "\n")
	# val
	for file in val_files:
	fval.write(file + "\n")
	for file in test_files:
	print(file)
	ftest.write(file + "\n")
	ftrainval.close()
	ftrain.close()
	fval.close()
	ftest.close()

voc格式数据集去除不需要的label

我的数据集原本标注的label类共10类，但我在实际使用中只需要使用其中的4类来训练，因此需要把剩下不需要的类别的图片和标注统统删除掉。因为数据集已经转换成了voc格式，在删除的时候只需要遍历xml文件夹，解析xml文件，当里面出现了不需要的类别的obj的时候，就把这个xml连同对应的图片一并删除

我这么做是因为在我的数据集中，不需要的6类本身占比就非常少，因此对于那些混杂着需要目标和不需要目标的图片，我也一并删掉了，并不会对数据集本身的图片数量造成严重影响。

下面是我处理的代码voc_purification.py，值得注意的是，因为我的voc格式数据中ImageSets\Main\文件夹下有trainval.txt、train.txt、val.txt、test.txt四个文件，也就是四个划分，分别是训练验证集、训练集、验证集、测试集，所以在代码中我连续四次检查txt文件中是否有需要删除的行。

	import glob
	import xml.etree.ElementTree as ET
	import os
	# import xml.dom.minidom
	# 类名把要删除的类名称放进去
	delete_labels = ['a', 'b', 'c', 'd', 'e', 'f']
	# xml路径
	path = r'your/annotation/path' #存放xml文件的文件夹
	img_path = r'your/image/path' #存放图片的文件夹
	for xml_file in glob.glob(path + '/*.xml'):
	# 获取文件名（不带后缀）
	filename = os.path.basename(xml_file)[:-4]
	# 返回解析树
	tree = ET.parse(xml_file)
	# 获取根节点
	root = tree.getroot()
	# 对所有目标进行解析
	for member in root.findall('object'):
	# 获取object标签内的name
	objectname = member.find('name').text
	if objectname in delete_labels:
	# print(objectname)
	os.remove(os.path.join(img_path, filename + '.jpg'))
	print('remove img:' + filename + '.jpg' + '\n')
	with open(r"your/trainval.txt/path", 'r') as file:
	lines = file.readlines()
	with open(r"your/trainval.txt/path", 'w') as file:
	for line in lines:
	if line.strip("\n") != filename:
	file.write(line)
	with open(r"your/train.txt/path", 'r') as file:
	lines = file.readlines()
	with open(r"your/train.txt/path", 'w') as file:
	for line in lines:
	if line.strip("\n") != filename:
	file.write(line)
	with open(r"your/val.txt/path", 'r') as file:
	lines = file.readlines()
	with open(r"your/val.txt/path", 'w') as file:
	for line in lines:
	if line.strip("\n") != filename:
	file.write(line)
	with open(r"your/test.txt/path", 'r') as file:
	lines = file.readlines()
	with open(r"your/test.txt/path", 'w') as file:
	for line in lines:
	if line.strip("\n") != filename:
	file.write(line)
	print('remove txt file:' + filename + '.jpg' + '\n')
	os.remove(os.path.join(path, filename + '.xml'))
	print('remove xml:' + filename + '.jpg' + '\n')
	break

voc转coco格式

之前之所以先转成voc格式，就是因为voc格式中一张图片对应一个xml文件的方式对于删掉不需要的图片比较方便，但在实际使用中，还是coco格式用的比较多，因此我再把他转成coco格式。

这部分内容网上有很多教程可以参考，我贴出来一个以供参考。

voc2coco_from_txt

	import shutil
	import xml.etree.ElementTree as ET
	import os
	import json
	coco = dict()
	coco['images'] = []
	coco['type'] = 'instances'
	coco['annotations'] = []
	coco['categories'] = []
	category_set = dict()
	image_set = set()
	# 注意具体应用中，类别索引是从0开始，还是从1开始。
	# 若从1开始（包含背景的情况）下一句代码需改成category_item_id = 0
	category_item_id = -1
	image_id = 20180000000
	annotation_id = 0
	def addCatItem(name):
	global category_item_id
	category_item = dict()
	category_item['supercategory'] = 'none'
	category_item_id += 1
	category_item['id'] = category_item_id
	category_item['name'] = name
	coco['categories'].append(category_item)
	category_set[name] = category_item_id
	return category_item_id
	def addImgItem(file_name, size):
	global image_id
	if file_name is None:
	raise Exception('Could not find filename tag in xml file.')
	if size['width'] is None:
	raise Exception('Could not find width tag in xml file.')
	if size['height'] is None:
	raise Exception('Could not find height tag in xml file.')
	image_id += 1
	image_item = dict()
	image_item['id'] = image_id
	image_item['file_name'] = file_name
	image_item['width'] = size['width']
	image_item['height'] = size['height']
	coco['images'].append(image_item)
	image_set.add(file_name)
	return image_id
	def addAnnoItem(object_name, image_id, category_id, bbox):
	global annotation_id
	annotation_item = dict()
	annotation_item['segmentation'] = []
	seg = []
	# bbox[] is x,y,w,h
	# left_top
	seg.append(bbox[0])
	seg.append(bbox[1])
	# left_bottom
	seg.append(bbox[0])
	seg.append(bbox[1] + bbox[3])
	# right_bottom
	seg.append(bbox[0] + bbox[2])
	seg.append(bbox[1] + bbox[3])
	# right_top
	seg.append(bbox[0] + bbox[2])
	seg.append(bbox[1])
	annotation_item['segmentation'].append(seg)
	annotation_item['area'] = bbox[2] * bbox[3]
	annotation_item['iscrowd'] = 0
	annotation_item['ignore'] = 0
	annotation_item['image_id'] = image_id
	annotation_item['bbox'] = bbox
	annotation_item['category_id'] = category_id
	annotation_id += 1
	annotation_item['id'] = annotation_id
	coco['annotations'].append(annotation_item)
	def _read_image_ids(image_sets_file):
	ids = []
	with open(image_sets_file) as f:
	for line in f:
	ids.append(line.rstrip())
	return ids
	"""通过txt文件生成"""
	# split ='train' 'val' 'trainval' 'test'
	def parseXmlFiles_by_txt(data_dir, json_save_path, split='train'):
	print("hello")
	labelfile = split + ".txt"
	image_sets_file = data_dir + "/ImageSets/Main/" + labelfile
	ids = _read_image_ids(image_sets_file)
	for _id in ids:
	image_file = data_dir + f"/JPEGImages/{_id}.jpg"
	shutil.copy(image_file, fr"E:\DataSets\labelme_new\COCO_cls_4\val{_id}.jpg")
	xml_file = data_dir + f"/Annotations/{_id}.xml"
	bndbox = dict()
	size = dict()
	current_image_id = None
	current_category_id = None
	file_name = None
	size['width'] = None
	size['height'] = None
	size['depth'] = None
	tree = ET.parse(xml_file)
	root = tree.getroot()
	if root.tag != 'annotation':
	raise Exception('pascal voc xml root element should be annotation, rather than {}'.format(root.tag))
	# elem is <folder>, <filename>, <size>, <object>
	for elem in root:
	current_parent = elem.tag
	current_sub = None
	object_name = None
	if elem.tag == 'folder':
	continue
	if elem.tag == 'filename':
	# 若xml文件名和文件里'filename'标签的内容不一致，而xml文件名是正确的，
	# 即，（标注错误），则用xml文件名赋给file_name，即，下面一句代码换成file_name = _id + '.jpg'
	file_name = elem.text
	if file_name in category_set:
	raise Exception('file_name duplicated')
	# add img item only after parse <size> tag
	elif current_image_id is None and file_name is not None and size['width'] is not None:
	if file_name not in image_set:
	current_image_id = addImgItem(file_name, size)
	print('add image with {} and {}'.format(file_name, size))
	else:
	raise Exception('duplicated image: {}'.format(file_name))
	# subelem is <width>, <height>, <depth>, <name>, <bndbox>
	for subelem in elem:
	bndbox['xmin'] = None
	bndbox['xmax'] = None
	bndbox['ymin'] = None
	bndbox['ymax'] = None
	current_sub = subelem.tag
	if current_parent == 'object' and subelem.tag == 'name':
	object_name = subelem.text
	if object_name not in category_set:
	current_category_id = addCatItem(object_name)
	else:
	current_category_id = category_set[object_name]
	elif current_parent == 'size':
	if size[subelem.tag] is not None:
	raise Exception('xml structure broken at size tag.')
	size[subelem.tag] = int(subelem.text)
	# option is <xmin>, <ymin>, <xmax>, <ymax>, when subelem is <bndbox>
	for option in subelem:
	if current_sub == 'bndbox':
	if bndbox[option.tag] is not None:
	raise Exception('xml structure corrupted at bndbox tag.')
	bndbox[option.tag] = int(option.text)
	# only after parse the <object> tag
	if bndbox['xmin'] is not None:
	if object_name is None:
	raise Exception('xml structure broken at bndbox tag')
	if current_image_id is None:
	raise Exception('xml structure broken at bndbox tag')
	if current_category_id is None:
	raise Exception('xml structure broken at bndbox tag')
	bbox = []
	# x
	bbox.append(bndbox['xmin'])
	# y
	bbox.append(bndbox['ymin'])
	# w
	bbox.append(bndbox['xmax'] - bndbox['xmin'])
	# h
	bbox.append(bndbox['ymax'] - bndbox['ymin'])
	print('add annotation with {},{},{},{}'.format(object_name, current_image_id, current_category_id,
	bbox))
	addAnnoItem(object_name, current_image_id, current_category_id, bbox)
	json.dump(coco, open(json_save_path, 'w'))
	"""直接从xml文件夹中生成"""
	def parseXmlFiles(xml_path, json_save_path):
	for f in os.listdir(xml_path):
	if not f.endswith('.xml'):
	continue
	bndbox = dict()
	size = dict()
	current_image_id = None
	current_category_id = None
	file_name = None
	size['width'] = None
	size['height'] = None
	size['depth'] = None
	xml_file = os.path.join(xml_path, f)
	print(xml_file)
	tree = ET.parse(xml_file)
	root = tree.getroot()
	if root.tag != 'annotation':
	raise Exception('pascal voc xml root element should be annotation, rather than {}'.format(root.tag))
	# elem is <folder>, <filename>, <size>, <object>
	for elem in root:
	current_parent = elem.tag
	current_sub = None
	object_name = None
	if elem.tag == 'folder':
	continue
	if elem.tag == 'filename':
	file_name = elem.text
	if file_name in category_set:
	raise Exception('file_name duplicated')
	# add img item only after parse <size> tag
	elif current_image_id is None and file_name is not None and size['width'] is not None:
	if file_name not in image_set:
	current_image_id = addImgItem(file_name, size)
	print('add image with {} and {}'.format(file_name, size))
	else:
	raise Exception('duplicated image: {}'.format(file_name))
	# subelem is <width>, <height>, <depth>, <name>, <bndbox>
	for subelem in elem:
	bndbox['xmin'] = None
	bndbox['xmax'] = None
	bndbox['ymin'] = None
	bndbox['ymax'] = None
	current_sub = subelem.tag
	if current_parent == 'object' and subelem.tag == 'name':
	object_name = subelem.text
	if object_name not in category_set:
	current_category_id = addCatItem(object_name)
	else:
	current_category_id = category_set[object_name]
	elif current_parent == 'size':
	if size[subelem.tag] is not None:
	raise Exception('xml structure broken at size tag.')
	size[subelem.tag] = int(subelem.text)
	# option is <xmin>, <ymin>, <xmax>, <ymax>, when subelem is <bndbox>
	for option in subelem:
	if current_sub == 'bndbox':
	if bndbox[option.tag] is not None:
	raise Exception('xml structure corrupted at bndbox tag.')
	bndbox[option.tag] = int(option.text)
	# only after parse the <object> tag
	if bndbox['xmin'] is not None:
	if object_name is None:
	raise Exception('xml structure broken at bndbox tag')
	if current_image_id is None:
	raise Exception('xml structure broken at bndbox tag')
	if current_category_id is None:
	raise Exception('xml structure broken at bndbox tag')
	bbox = []
	# x
	bbox.append(bndbox['xmin'])
	# y
	bbox.append(bndbox['ymin'])
	# w
	bbox.append(bndbox['xmax'] - bndbox['xmin'])
	# h
	bbox.append(bndbox['ymax'] - bndbox['ymin'])
	print('add annotation with {},{},{},{}'.format(object_name, current_image_id, current_category_id,
	bbox))
	addAnnoItem(object_name, current_image_id, current_category_id, bbox)
	json.dump(coco, open(json_save_path, 'w'))
	if __name__ == '__main__':
	# 通过txt文件生成
	voc_data_dir = r"E:\DataSets\labelme_new\VOC2007" # 整个数据集文件夹所在路径
	json_save_path = r"E:\DataSets\labelme_new\COCO_cls_4\annotations\val.json" # 生成后的文件存放路径和生成文件的名字
	parseXmlFiles_by_txt(voc_data_dir, json_save_path, "test")
	# 通过文件夹生成
	# ann_path = "E:/VOCdevkit/VOC2007/Annotations"
	# json_save_path = "E:/VOCdevkit/test.json"
	# parseXmlFiles(ann_path, json_save_path)

COCO格式数据集切图

由于我的数据集图片中目标都比较小，采用切图训练的方式进行（一般当原始数据集全部有标注框的图片中，有1/2以上的图片标注框的平均宽高与原图宽高比例小于0.04时，建议进行切图训练），本节代码来自PaddleDetection官方GitHub仓库。

统计自己的数据集信息

先统计自己的数据集信息，看看是否需要切图训练

可以用下面代码box_distribution.py，使用过程在命令行输入

python box_distribution.py --json_path ../../dataset/annotations/train.json --out_img box_distribution.jpg

其中--json_path加载coco格式的json文件路径，--out_img输出统计分布图路径

	# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	import matplotlib.pyplot as plt
	import json
	import numpy as np
	import argparse
	def median(data):
	data.sort()
	mid = len(data) // 2
	median = (data[mid] + data[~mid]) / 2
	return median
	def draw_distribution(width, height, out_path):
	w_bins = int((max(width) - min(width)) // 10)
	h_bins = int((max(height) - min(height)) // 10)
	plt.figure()
	plt.subplot(221)
	plt.hist(width, bins=w_bins, color='green')
	plt.xlabel('Width rate *1000')
	plt.ylabel('number')
	plt.title('Distribution of Width')
	plt.subplot(222)
	plt.hist(height, bins=h_bins, color='blue')
	plt.xlabel('Height rate *1000')
	plt.title('Distribution of Height')
	plt.savefig(out_path)
	print(f'Distribution saved as {out_path}')
	plt.show()
	def get_ratio_infos(jsonfile, out_img):
	allannjson = json.load(open(jsonfile, 'r'))
	be_im_id = 1
	be_im_w = []
	be_im_h = []
	ratio_w = []
	ratio_h = []
	images = allannjson['images']
	for i, ann in enumerate(allannjson['annotations']):
	if ann['iscrowd']:
	continue
	x0, y0, w, h = ann['bbox'][:]
	if be_im_id == ann['image_id']:
	be_im_w.append(w)
	be_im_h.append(h)
	else:
	im_w = images[be_im_id - 1]['width']
	im_h = images[be_im_id - 1]['height']
	im_m_w = np.mean(be_im_w)
	im_m_h = np.mean(be_im_h)
	dis_w = im_m_w / im_w
	dis_h = im_m_h / im_h
	ratio_w.append(dis_w)
	ratio_h.append(dis_h)
	be_im_id = ann['image_id']
	be_im_w = [w]
	be_im_h = [h]
	im_w = images[be_im_id - 1]['width']
	im_h = images[be_im_id - 1]['height']
	im_m_w = np.mean(be_im_w)
	im_m_h = np.mean(be_im_h)
	dis_w = im_m_w / im_w
	dis_h = im_m_h / im_h
	ratio_w.append(dis_w)
	ratio_h.append(dis_h)
	mid_w = median(ratio_w)
	mid_h = median(ratio_h)
	ratio_w = [i * 1000 for i in ratio_w]
	ratio_h = [i * 1000 for i in ratio_h]
	print(f'Median of ratio_w is {mid_w}')
	print(f'Median of ratio_h is {mid_h}')
	print('all_img with box: ', len(ratio_h))
	print('all_ann: ', len(allannjson['annotations']))
	draw_distribution(ratio_w, ratio_h, out_img)
	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument(
	'--json_path', type=str, default=None, help="Dataset json path.")
	parser.add_argument(
	'--out_img',
	type=str,
	default='box_distribution.jpg',
	help="Name of distibution img.")
	args = parser.parse_args()
	get_ratio_infos(args.json_path, args.out_img)
	if __name__ == "__main__":
	main()

切图

如果统计结果中，有1/2以上的图片标注框的平均宽高与原图宽高比例小于0.04，如下输出信息，则考虑使用切图方式训练，能够比较有效地提高小目标的检测精度。

	Median of ratio_w is 0.03799439775910364
	Median of ratio_h is 0.04074914637387802
	all_img with box: 1409
	all_ann: 98905
	Distribution saved as box_distribution.jpg

切图的代码同样来自PaddleDetection官方Github仓库

slice_image.py

	# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	import argparse
	from tqdm import tqdm
	def slice_data(image_dir, dataset_json_path, output_dir, slice_size,
	overlap_ratio):
	try:
	from sahi.scripts.slice_coco import slice
	except Exception as e:
	raise RuntimeError(
	'Unable to use sahi to slice images, please install sahi, for example: `pip install sahi`, see https://github.com/obss/sahi'
	)
	tqdm.write(
	f" slicing for slice_size={slice_size}, overlap_ratio={overlap_ratio}")
	slice(
	image_dir=image_dir,
	dataset_json_path=dataset_json_path,
	output_dir=output_dir,
	slice_size=slice_size,
	overlap_ratio=overlap_ratio, )
	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument(
	'--image_dir', type=str, default=None, help="The image folder path.")
	parser.add_argument(
	'--json_path', type=str, default=None, help="Dataset json path.")
	parser.add_argument(
	'--output_dir', type=str, default=None, help="Output dir.")
	parser.add_argument(
	'--slice_size', type=int, default=500, help="slice_size")
	parser.add_argument(
	'--overlap_ratio', type=float, default=0.25, help="overlap_ratio")
	args = parser.parse_args()
	slice_data(args.image_dir, args.json_path, args.output_dir, args.slice_size,
	args.overlap_ratio)
	if __name__ == "__main__":
	main()

删除无目标的背景图

切图之后的数据集，文件夹里面存在大量的无目标标注框的图片，即原图中的背景部分。如果直接丢进去训练有可能造成正负样本不均衡的问题，从而影响精度。因此要把这部分图片删除掉。因为数据集是coco格式的，所以删的时候既要删掉图片，也要把json文件中对应的信息删除掉，具体实现参考下面代码。

coco_del_bg.py

	import json
	import os
	class CocoDataDeleteBackground:
	def __init__(self, imgPath, jsonPath):
	self.imgPath = imgPath
	self.jsonPath = jsonPath
	def delete_background(self):
	with open(self.jsonPath, 'r+') as f:
	annotation_json = json.load(f)
	# 查询所有那些有标注框的图片id
	all_img_id = []
	for anno in annotation_json['annotations']:
	img_id = anno['image_id'] # 获取当前目标所在的图片id
	all_img_id.append(img_id)
	all_img_id = list(set(all_img_id)) # id去重
	all_imgs_to_del = []
	# 遍历images对应的list，删掉其中id不在all_img_id中的项，以及对应的图片
	for i in range(len(annotation_json['images'][::])):
	image_name = annotation_json['images'][i]['file_name'] # 读取图片名
	img_id = annotation_json['images'][i]['id'] # 读取图片id
	if img_id not in all_img_id:
	all_imgs_to_del.append(i)
	os.remove(os.path.join(self.imgPath, image_name))
	print(image_name + 'has been removed!')
	all_imgs_to_del = sorted(all_imgs_to_del, reverse=True)
	for i in all_imgs_to_del:
	del annotation_json['images'][i]
	f.seek(0)
	f.truncate() # json清空
	f.write(json.dumps(annotation_json)) # json重写
	if __name__ == '__main__':
	# the first param is the directory's path of images
	# the second param is the path of json file
	d = CocoDataDeleteBackground(r"your\image\path",
	r"your\json\path")
	# run the delete function
	d.delete_background()
	print('done!')

作者:花伴

posted @ 2023-09-22 10:23 水木清扬阅读(1594) 评论(0) 收藏举报

刷新页面返回顶部

水木清扬