xml标签转txt标签,(coco)json标签转xml标签

JSON格式转XML格式(COCO)

 1 # 使用时仅需修改21、22、24行路径文件
 2 import os
 3 import time
 4 import json
 5 import pandas as pd
 6 from tqdm import tqdm
 7 from pycocotools.coco import COCO
 8 
 9 
10 def trans_id(category_id):
11     names = []
12     namesid = []
13     for i in range(0, len(cats)):
14         names.append(cats[i]['name'])
15         namesid.append(cats[i]['id'])
16     index = namesid.index(category_id)
17     return index
18 
19 
20 root = 'D:/Code/coco'  # 你下载的 COCO 数据集所在目录
21 dataType = 'train2017'
22 anno = '{}/annotations/instances_{}.json'.format(root, dataType)
23 xml_dir = '{}/xml/{}_xml'.format(root, dataType)
24 
25 coco = COCO(anno)  # 读文件
26 cats = coco.loadCats(coco.getCatIds())  # 这里loadCats就是coco提供的接口,获取类别
27 
28 # Create anno dir
29 dttm = time.strftime("%Y%m%d%H%M%S", time.localtime())
30 if os.path.exists(xml_dir):
31     os.rename(xml_dir, xml_dir + dttm)
32 os.mkdir(xml_dir)
33 
34 with open(anno, 'r') as load_f:
35     f = json.load(load_f)
36 
37 imgs = f['images']
38 
39 df_cate = pd.DataFrame(f['categories'])
40 df_cate_sort = df_cate.sort_values(["id"], ascending=True)
41 categories = list(df_cate_sort['name'])
42 print('categories = ', categories)
43 df_anno = pd.DataFrame(f['annotations'])
44 
45 for i in tqdm(range(len(imgs))):
46     xml_content = []
47     file_name = imgs[i]['file_name']
48     height = imgs[i]['height']
49     img_id = imgs[i]['id']
50     width = imgs[i]['width']
51 
52     xml_content.append("<annotation>")
53     xml_content.append("    <folder>VOC2007</folder>")
54     xml_content.append("    <filename>" + file_name + "</filename>")
55     xml_content.append("    <size>")
56     xml_content.append("        <width>" + str(width) + "</width>")
57     xml_content.append("        <height>" + str(height) + "</height>")
58     xml_content.append("    </size>")
59     xml_content.append("    <segmented>0</segmented>")
60     # 通过img_id找到annotations
61     annos = df_anno[df_anno["image_id"].isin([img_id])]
62 
63     for index, row in annos.iterrows():
64         bbox = row["bbox"]
65         category_id = row["category_id"]
66         cate_name = categories[trans_id(category_id)]
67 
68         # add new object
69         xml_content.append("    <object>")
70         xml_content.append("        <name>" + cate_name + "</name>")
71         xml_content.append("        <pose>Unspecified</pose>")
72         xml_content.append("        <truncated>0</truncated>")
73         xml_content.append("        <difficult>0</difficult>")
74         xml_content.append("        <bndbox>")
75         xml_content.append("            <xmin>" + str(int(bbox[0])) + "</xmin>")
76         xml_content.append("            <ymin>" + str(int(bbox[1])) + "</ymin>")
77         xml_content.append("            <xmax>" + str(int(bbox[0] + bbox[2])) + "</xmax>")
78         xml_content.append("            <ymax>" + str(int(bbox[1] + bbox[3])) + "</ymax>")
79         xml_content.append("        </bndbox>")
80         xml_content.append("    </object>")
81     xml_content.append("</annotation>")
82 
83     x = xml_content
84     xml_content = [x[i] for i in range(0, len(x)) if x[i] != "\n"]
85     ### list存入文件
86     xml_path = os.path.join(xml_dir, file_name.replace('.jpg', '.xml'))
87     with open(xml_path, 'w+', encoding="utf8") as f:
88         f.write('\n'.join(xml_content))
89     xml_content[:] = []

XML格式转TXT格式

import os.path
import xml.etree.ElementTree as ET

class_names = ['Bottle', 'Cloth', 'Kitchen Waste', 'Metal', 'Paper', 'Plastic']

xmlpath = 'D:\\Code\\Sample\\test\\labels\\' # xml文件的位置
txtpath = 'D:\\Code\\Sample\\test\\labels\\' # 导出txt的位置

files=[]

for root, dirs, files in os.walk(xmlpath):
    None

number = len(files)
i = 0
while i < number:

    name = files[i][0:-4]
    xml_name = name + ".xml"
    txt_name = name + ".txt"
    xml_file_name = xmlpath + xml_name
    txt_file_name = txtpath + txt_name

    xml_file = open(xml_file_name)
    tree = ET.parse(xml_file)
    root = tree.getroot()
    filename = root.find('filename').text

    image_name = root.find('filename').text
    w = int(root.find('size').find('width').text)
    h = int(root.find('size').find('height').text)

    f_txt = open(txt_file_name, 'w+')
    content = ""

    first = True

    for obj in root.iter('object'):

        name = obj.find('name').text
        class_num = class_names.index(name)

        xmlbox = obj.find('bndbox')

        x1 = int(xmlbox.find('xmin').text)
        x2 = int(xmlbox.find('xmax').text)
        y1 = int(xmlbox.find('ymin').text)
        y2 = int(xmlbox.find('ymax').text)

        if first:
            content += str(class_num) + " " + \
                       str((x1+x2)/2/w) + " " + str((y1+y2)/2/h) + " " + \
                       str((x2-x1)/w) + " " + str((y2-y1)/h)
            first=False
        else:
            content += "\n" + \
                       str(class_num) + " " + \
                       str((x1 + x2) / 2 / w) + " " + str((y1 + y2) / 2 / h) + " " + \
                       str((x2 - x1) / w) + " " + str((y2 - y1) / h)

    print(str(i/(number - 1) * 100) + "%\n")
    f_txt.write(content)
    f_txt.close()
    xml_file.close()
    i += 1

print("done!")
posted @ 2021-12-14 16:41  whiteleaf03  阅读(307)  评论(0)    收藏  举报