JSON格式转XML格式(COCO)
1 # 使用时仅需修改21、22、24行路径文件
2 import os
3 import time
4 import json
5 import pandas as pd
6 from tqdm import tqdm
7 from pycocotools.coco import COCO
8
9
10 def trans_id(category_id):
11 names = []
12 namesid = []
13 for i in range(0, len(cats)):
14 names.append(cats[i]['name'])
15 namesid.append(cats[i]['id'])
16 index = namesid.index(category_id)
17 return index
18
19
20 root = 'D:/Code/coco' # 你下载的 COCO 数据集所在目录
21 dataType = 'train2017'
22 anno = '{}/annotations/instances_{}.json'.format(root, dataType)
23 xml_dir = '{}/xml/{}_xml'.format(root, dataType)
24
25 coco = COCO(anno) # 读文件
26 cats = coco.loadCats(coco.getCatIds()) # 这里loadCats就是coco提供的接口,获取类别
27
28 # Create anno dir
29 dttm = time.strftime("%Y%m%d%H%M%S", time.localtime())
30 if os.path.exists(xml_dir):
31 os.rename(xml_dir, xml_dir + dttm)
32 os.mkdir(xml_dir)
33
34 with open(anno, 'r') as load_f:
35 f = json.load(load_f)
36
37 imgs = f['images']
38
39 df_cate = pd.DataFrame(f['categories'])
40 df_cate_sort = df_cate.sort_values(["id"], ascending=True)
41 categories = list(df_cate_sort['name'])
42 print('categories = ', categories)
43 df_anno = pd.DataFrame(f['annotations'])
44
45 for i in tqdm(range(len(imgs))):
46 xml_content = []
47 file_name = imgs[i]['file_name']
48 height = imgs[i]['height']
49 img_id = imgs[i]['id']
50 width = imgs[i]['width']
51
52 xml_content.append("<annotation>")
53 xml_content.append(" <folder>VOC2007</folder>")
54 xml_content.append(" <filename>" + file_name + "</filename>")
55 xml_content.append(" <size>")
56 xml_content.append(" <width>" + str(width) + "</width>")
57 xml_content.append(" <height>" + str(height) + "</height>")
58 xml_content.append(" </size>")
59 xml_content.append(" <segmented>0</segmented>")
60 # 通过img_id找到annotations
61 annos = df_anno[df_anno["image_id"].isin([img_id])]
62
63 for index, row in annos.iterrows():
64 bbox = row["bbox"]
65 category_id = row["category_id"]
66 cate_name = categories[trans_id(category_id)]
67
68 # add new object
69 xml_content.append(" <object>")
70 xml_content.append(" <name>" + cate_name + "</name>")
71 xml_content.append(" <pose>Unspecified</pose>")
72 xml_content.append(" <truncated>0</truncated>")
73 xml_content.append(" <difficult>0</difficult>")
74 xml_content.append(" <bndbox>")
75 xml_content.append(" <xmin>" + str(int(bbox[0])) + "</xmin>")
76 xml_content.append(" <ymin>" + str(int(bbox[1])) + "</ymin>")
77 xml_content.append(" <xmax>" + str(int(bbox[0] + bbox[2])) + "</xmax>")
78 xml_content.append(" <ymax>" + str(int(bbox[1] + bbox[3])) + "</ymax>")
79 xml_content.append(" </bndbox>")
80 xml_content.append(" </object>")
81 xml_content.append("</annotation>")
82
83 x = xml_content
84 xml_content = [x[i] for i in range(0, len(x)) if x[i] != "\n"]
85 ### list存入文件
86 xml_path = os.path.join(xml_dir, file_name.replace('.jpg', '.xml'))
87 with open(xml_path, 'w+', encoding="utf8") as f:
88 f.write('\n'.join(xml_content))
89 xml_content[:] = []
XML格式转TXT格式
import os.path
import xml.etree.ElementTree as ET
class_names = ['Bottle', 'Cloth', 'Kitchen Waste', 'Metal', 'Paper', 'Plastic']
xmlpath = 'D:\\Code\\Sample\\test\\labels\\' # xml文件的位置
txtpath = 'D:\\Code\\Sample\\test\\labels\\' # 导出txt的位置
files=[]
for root, dirs, files in os.walk(xmlpath):
None
number = len(files)
i = 0
while i < number:
name = files[i][0:-4]
xml_name = name + ".xml"
txt_name = name + ".txt"
xml_file_name = xmlpath + xml_name
txt_file_name = txtpath + txt_name
xml_file = open(xml_file_name)
tree = ET.parse(xml_file)
root = tree.getroot()
filename = root.find('filename').text
image_name = root.find('filename').text
w = int(root.find('size').find('width').text)
h = int(root.find('size').find('height').text)
f_txt = open(txt_file_name, 'w+')
content = ""
first = True
for obj in root.iter('object'):
name = obj.find('name').text
class_num = class_names.index(name)
xmlbox = obj.find('bndbox')
x1 = int(xmlbox.find('xmin').text)
x2 = int(xmlbox.find('xmax').text)
y1 = int(xmlbox.find('ymin').text)
y2 = int(xmlbox.find('ymax').text)
if first:
content += str(class_num) + " " + \
str((x1+x2)/2/w) + " " + str((y1+y2)/2/h) + " " + \
str((x2-x1)/w) + " " + str((y2-y1)/h)
first=False
else:
content += "\n" + \
str(class_num) + " " + \
str((x1 + x2) / 2 / w) + " " + str((y1 + y2) / 2 / h) + " " + \
str((x2 - x1) / w) + " " + str((y2 - y1) / h)
print(str(i/(number - 1) * 100) + "%\n")
f_txt.write(content)
f_txt.close()
xml_file.close()
i += 1
print("done!")