Python应用基础-根据指定文件生成XML

    因项目需要根据指定格式的文件生成XML标注文件,可以方便使用LabelImg打开进行编辑和查看。其原始文件默认使用逗号进行分隔,如下所示:
image.png

  • 第1个值:原始图片中切图小文件,以AIpng_x,其中x代表原始图片的第几个切图文件
  • 第2~5值:分别对应于ymin, xmin, ymax, xmax
  • 第6个值:代表对应的标签标注

    在生成XML文件时,需要对其进行汇总,即将属于同一个原始文件的切图小文件的标注汇总到一起,其实现代码如下所示:

import os
from Logger import MyLogger
from xml.dom.minidom import Document,parse
from collections import defaultdict
import re

class OpeateXML:

    def __init__(self, srcPath: str, targetPath: str, srcFileName: str):
        self._srcPath = srcPath
        self._targetPath = targetPath
        self._srcFileName = srcFileName

    def readSrcFileName(self, fileEncoding="utf8") -> defaultdict:
        data = defaultdict(list)
        s = re.compile("\.AIpng_\d{1,}", re.IGNORECASE)
        srcFileFullPath = os.path.join(self._srcPath, self._srcFileName)
        try:
            if os.path.exists(srcFileFullPath):
                with open(srcFileFullPath, mode="r", encoding=fileEncoding, errors="ignore") as fr:
                    for content in fr.readlines():
                        data[s.sub(".AIpng",content.strip().split(",")[0])].append(content.strip())
        except Exception as ex:
            MyLogger().error(f"OperateXML:read file error:\n{ex}")
            return {}
        else:
            # data.sort(key=lambda x: x.strip().split(",")[0])
            return data

    def getCreateXMLData(self,srcData:dict,mnlData:list)->defaultdict:
        """
        获取手动确认的图片
        srcData:Location.txt中的原始数据
        mnlData:手动确认数据
        """
        try:
            for key,values in srcData.items():
                for item in mnlData:
                    for v in values:
                        if item in v.strip().split(",")[0]:
                            srcData[key][srcData[key].index(v)]=srcData[key][srcData[key].index(v)].replace("auto","mnl")
        except Exception as ex:
            MyLogger().error(f"OperateXML: get data from location and mnldata interaction error\n{ex}")
            return {}
        else:
            return srcData

    def operateXML(self,data:defaultdict)->None:
        for k in data.keys():
            xmlFileFullPath = os.path.join(self._targetPath, os.path.splitext(k)[0]+".xml")
            if os.path.exists(xmlFileFullPath):
                self.appendExistXML(data={k:data[k]},xmlFileFullPath=xmlFileFullPath)
            else:
                self.createNewXML({k:data[k]})

    def appendExistXML(self,data:defaultdict,xmlFileFullPath:str,fileEncoding="utf8"):
        try:
            doc = parse(xmlFileFullPath)
            rootNode = doc.documentElement
            # print(rootNode.nodeName)
            key=rootNode.getElementsByTagName("filename")[0].childNodes[0].data
            objs=rootNode.getElementsByTagName("object")
            for obj in objs:
               name=obj.getElementsByTagName("name")[0].childNodes[0].data
               bndboxs = obj.getElementsByTagName("bndbox")
               for bndbox in bndboxs:
                   xmin = bndbox.getElementsByTagName("xmin")[0].childNodes[0].data
                   ymin = bndbox.getElementsByTagName("ymin")[0].childNodes[0].data
                   xmax = bndbox.getElementsByTagName("xmax")[0].childNodes[0].data
                   ymax = bndbox.getElementsByTagName("ymax")[0].childNodes[0].data
               existData=f"existData,{ymin},{xmin},{ymax},{xmax},{name}"
               data[key].append(existData)
            data[key]=list(set(data[key]))
            os.remove(xmlFileFullPath)
            self.createNewXML(data=data)
        except Exception as ex:
            MyLogger().error(f"OperateXML:append content to {xmlFileFullPath} error\n{ex}")
            return

    def createNewXML(self, data: dict, fileEncoding="utf8")->None:
        """
        data:传入的数据字典
        fileEncoding:XML默认编码格式
        """
        if data:
            try:
                for k,v in data.items():
                    doc = Document()
                    # 创建根节点
                    rootNode = doc.createElement("annotation")
                    # 添加根节点
                    doc.appendChild(rootNode)

                    folder = doc.createElement("folder")
                    folderText = doc.createTextNode(self._targetPath)
                    folder.appendChild(folderText)
                    rootNode.appendChild(folder)

                    filename = doc.createElement("filename")
                    filenameText = doc.createTextNode(k)
                    filename.appendChild(filenameText)
                    rootNode.appendChild(filename)

                    path = doc.createElement("path")
                    pathText = doc.createTextNode(os.path.join(self._targetPath,k))
                    path.appendChild(pathText)
                    rootNode.appendChild(path)
                    for i in v:
                        tmpData = i.strip().split(",")
                        if len(tmpData) == 6:
                            _, ymin, xmin, ymax, xmax, labelName = tmpData

                            if not labelName.__contains__("/"):
                                continue

                            objectObj = doc.createElement("object")
                            rootNode.appendChild(objectObj)

                            objectName = doc.createElement("name")
                            objectNameText = doc.createTextNode(labelName)
                            objectName.appendChild(objectNameText)
                            objectObj.appendChild(objectName)

                            objectBndBox = doc.createElement("bndbox")
                            objectObj.appendChild(objectBndBox)

                            objectBndBoxXmin = doc.createElement("xmin")
                            objectBndBoxYmin = doc.createElement("ymin")
                            objectBndBoxXmax = doc.createElement("xmax")
                            objectBndBoxYmax = doc.createElement("ymax")

                            objectBndBoxXminText = doc.createTextNode(xmin)
                            objectBndBoxYminText = doc.createTextNode(ymin)
                            objectBndBoxXmaxText = doc.createTextNode(xmax)
                            objectBndBoxYmaxText = doc.createTextNode(ymax)

                            objectBndBox.appendChild(objectBndBoxXmin)
                            objectBndBox.appendChild(objectBndBoxYmin)
                            objectBndBox.appendChild(objectBndBoxXmax)
                            objectBndBox.appendChild(objectBndBoxYmax)

                            objectBndBoxXmin.appendChild(objectBndBoxXminText)
                            objectBndBoxYmin.appendChild(objectBndBoxYminText)
                            objectBndBoxXmax.appendChild(objectBndBoxXmaxText)
                            objectBndBoxYmax.appendChild(objectBndBoxYmaxText)

                            objectObj.appendChild(objectBndBox)
                        else:
                            continue
                    # save xml
                    xmlName=os.path.splitext(k)[0]+".xml"
                    targetPath = os.path.join(self._targetPath, xmlName)
                    with open(targetPath, mode="w", encoding=fileEncoding,errors="ignore") as fw:
                        doc.writexml(fw, indent="\t", newl="\n", addindent="\t", encoding=fileEncoding)
            except Exception as ex:
                MyLogger().error(f"OperateXML:Save xml error\n{ex}")
                return

if __name__ == '__main__':
    srcPath = r"C:\Users\Surpass\Documents\PycharmProjects\data\TEST-8\outs"
    srcName = "locations.txt"
    targetPath = r"C:\Users\Surpass\Documents\PycharmProjects\data\TEST-8\outs\in_number"
    operateXML = OpeateXML(srcPath, targetPath, srcName)
    a = operateXML.readSrcFileName()
    testData=['slide1_cell420_image0_met.AIpng_36.jpg', 'slide1_cell420_image0_met.AIpng_33.jpg', 'slide1_cell420_image0_met.AIpng_10.jpg', 'slide1_cell420_image0_met.AIpng_30.jpg']
    res=operateXML.getCreateXMLData(a,testData)
    operateXML.operateXML(res)

    最终生成的XML效果如下所示:

image.png

    在LabelImg中的效果如下所示:
image.png

本文地址:https://www.cnblogs.com/surpassme/p/13204899.html

本文同步在微信订阅号上发布,如各位小伙伴们喜欢我的文章,也可以关注我的微信订阅号:woaitest,或扫描下面的二维码添加关注:
MyQRCode.jpg

posted @ 2020-06-28 20:23  Surpassme  阅读(816)  评论(0编辑  收藏  举报