Wider Face 转VOC格式制作LMDB数据集

Wider Face标注转VOC格式：

import os,h5py,cv2,sys,shutil
import numpy as np
from xml.dom.minidom import Document
rootdir="../"
convet2yoloformat=True
convert2vocformat=True
resized_dim=(48, 48)
 
#最小取20大小的脸，并且补齐
minsize2select=20
usepadding=True
 
datasetprefix="/home/yanhe/data/widerface"#
def gen_hdf5():
    imgdir=rootdir+"/WIDER_train/images"
    gtfilepath=rootdir+"/wider_face_split/wider_face_train_bbx_gt.txt"
    index =0
    with open(gtfilepath,'r') as gtfile:
        faces=[]
        labels=[]
        while(True ):#and len(faces)<10
            imgpath=gtfile.readline()[:-1]
            if(imgpath==""):
                break;
            print index,imgpath
            img=cv2.imread(imgdir+"/"+imgpath)
            numbbox=int(gtfile.readline())
            bbox=[]
            for i in range(numbbox):
                line=gtfile.readline()
                line=line.split()
                line=line[0:4]               
                if(int(line[3])<=0 or int(line[2])<=0):
                    continue
                bbox=(int(line[0]),int(line[1]),int(line[2]),int(line[3]))
                face=img[int(line[1]):int(line[1])+int(line[3]),int(line[0]):int(line[0])+int(line[2])]
                face=cv2.resize(face, resized_dim)
                faces.append(face)
                labels.append(1)
                cv2.rectangle(img,(int(line[0]),int(line[1])),(int(line[0])+int(line[2]),int(line[1])+int(line[3])),(255,0,0))
            #cv2.imshow("img",img)
            #cv2.waitKey(1)
            index=index+1
        faces=np.asarray(faces)
        labels=np.asarray(labels)
        f=h5py.File('train.h5','w')
        f['data']=faces.astype(np.float32)
        f['label']=labels.astype(np.float32)
        f.close()
def viewginhdf5():
    f = h5py.File('train.h5','r') 
    f.keys()
    faces=f['data'][:]
    for face in faces:
        face=face.astype(np.uint8)
        cv2.imshow("img",face)
        cv2.waitKey(1)
    f.close()
 
def convertimgset(img_set="train"):
    imgdir=rootdir+"/WIDER_"+img_set+"/images"
    gtfilepath=rootdir+"/wider_face_split/wider_face_"+img_set+"_bbx_gt.txt"
    imagesdir=rootdir+"/images"
    vocannotationdir=rootdir+"/Annotations"
    labelsdir=rootdir+"/labels"
    if not os.path.exists(imagesdir):
        os.mkdir(imagesdir)
    if convet2yoloformat:
        if not os.path.exists(labelsdir):
            os.mkdir(labelsdir)
    if convert2vocformat:
        if not os.path.exists(vocannotationdir):
            os.mkdir(vocannotationdir)
    index=0
    with open(gtfilepath,'r') as gtfile:
        while(True ):#and len(faces)<10
            filename=gtfile.readline()[:-1]
            if(filename==""):
                break;
            sys.stdout.write("\r"+str(index)+":"+filename+"\t\t\t")
            sys.stdout.flush()
            imgpath=imgdir+"/"+filename
            img=cv2.imread(imgpath)
            if not img.data:
                break;
            imgheight=img.shape[0]
            imgwidth=img.shape[1]
            maxl=max(imgheight,imgwidth)
            paddingleft=(maxl-imgwidth)>>1
            paddingright=(maxl-imgwidth)>>1
            paddingbottom=(maxl-imgheight)>>1
            paddingtop=(maxl-imgheight)>>1
            saveimg=cv2.copyMakeBorder(img,paddingtop,paddingbottom,paddingleft,paddingright,cv2.BORDER_CONSTANT,value=0)
            showimg=saveimg.copy()
            numbbox=int(gtfile.readline())
            bboxes=[]
            for i in range(numbbox):
                line=gtfile.readline()
                line=line.split()
                line=line[0:4]               
                if(int(line[3])<=0 or int(line[2])<=0):
                    continue
                x=int(line[0])+paddingleft
                y=int(line[1])+paddingtop
                width=int(line[2])
                height=int(line[3])
                bbox=(x,y,width,height)
                x2=x+width
                y2=y+height
                #face=img[x:x2,y:y2]
                if width>=minsize2select and height>=minsize2select:
                    bboxes.append(bbox)
                    cv2.rectangle(showimg,(x,y),(x2,y2),(0,255,0))
                    #maxl=max(width,height)
                    #x3=(int)(x+(width-maxl)*0.5)
                    #y3=(int)(y+(height-maxl)*0.5)
                    #x4=(int)(x3+maxl)
                    #y4=(int)(y3+maxl)
                    #cv2.rectangle(img,(x3,y3),(x4,y4),(255,0,0))
                else:
                    cv2.rectangle(showimg,(x,y),(x2,y2),(0,0,255))              
            filename=filename.replace("/","_")
            if len(bboxes)==0:
                print "warrning: no face"
                continue 
            cv2.imwrite(imagesdir+"/"+filename,saveimg)
            if convet2yoloformat:
                height=saveimg.shape[0]
                width=saveimg.shape[1]
                txtpath=labelsdir+"/"+filename
                txtpath=txtpath[:-3]+"txt"
                ftxt=open(txtpath,'w')  
                for i in range(len(bboxes)):
                    bbox=bboxes[i]
                    xcenter=(bbox[0]+bbox[2]*0.5)/width
                    ycenter=(bbox[1]+bbox[3]*0.5)/height
                    wr=bbox[2]*1.0/width
                    hr=bbox[3]*1.0/height
                    txtline="0 "+str(xcenter)+" "+str(ycenter)+" "+str(wr)+" "+str(hr)+"\n"
                    ftxt.write(txtline)
                ftxt.close()
            if convert2vocformat:
                xmlpath=vocannotationdir+"/"+filename
                xmlpath=xmlpath[:-3]+"xml"
                doc = Document()
                annotation = doc.createElement('annotation')
                doc.appendChild(annotation)
                folder = doc.createElement('folder')
                folder_name = doc.createTextNode('widerface')
                folder.appendChild(folder_name)
                annotation.appendChild(folder)
                filenamenode = doc.createElement('filename')
                filename_name = doc.createTextNode(filename)
                filenamenode.appendChild(filename_name)
                annotation.appendChild(filenamenode)
                source = doc.createElement('source')
                annotation.appendChild(source)
                database = doc.createElement('database')
                database.appendChild(doc.createTextNode('wider face Database'))
                source.appendChild(database)
                annotation_s = doc.createElement('annotation')
                annotation_s.appendChild(doc.createTextNode('PASCAL VOC2007'))
                source.appendChild(annotation_s)
                image = doc.createElement('image')
                image.appendChild(doc.createTextNode('flickr'))
                source.appendChild(image)
                flickrid = doc.createElement('flickrid')
                flickrid.appendChild(doc.createTextNode('-1'))
                source.appendChild(flickrid)
                owner = doc.createElement('owner')
                annotation.appendChild(owner)
                flickrid_o = doc.createElement('flickrid')
                flickrid_o.appendChild(doc.createTextNode('yanyu'))
                owner.appendChild(flickrid_o)
                name_o = doc.createElement('name')
                name_o.appendChild(doc.createTextNode('yanyu'))
                owner.appendChild(name_o)
                size = doc.createElement('size')
                annotation.appendChild(size)
                width = doc.createElement('width')
                width.appendChild(doc.createTextNode(str(saveimg.shape[1])))
                height = doc.createElement('height')
                height.appendChild(doc.createTextNode(str(saveimg.shape[0])))
                depth = doc.createElement('depth')
                depth.appendChild(doc.createTextNode(str(saveimg.shape[2])))
                size.appendChild(width)
                size.appendChild(height)
                size.appendChild(depth)
                segmented = doc.createElement('segmented')
                segmented.appendChild(doc.createTextNode('0'))
                annotation.appendChild(segmented)
                for i in range(len(bboxes)):
                    bbox=bboxes[i]
                    objects = doc.createElement('object')
                    annotation.appendChild(objects)
                    object_name = doc.createElement('name')
                    object_name.appendChild(doc.createTextNode('face'))
                    objects.appendChild(object_name)
                    pose = doc.createElement('pose')
                    pose.appendChild(doc.createTextNode('Unspecified'))
                    objects.appendChild(pose)
                    truncated = doc.createElement('truncated')
                    truncated.appendChild(doc.createTextNode('1'))
                    objects.appendChild(truncated)
                    difficult = doc.createElement('difficult')
                    difficult.appendChild(doc.createTextNode('0'))
                    objects.appendChild(difficult)
                    bndbox = doc.createElement('bndbox')
                    objects.appendChild(bndbox)
                    xmin = doc.createElement('xmin')
                    xmin.appendChild(doc.createTextNode(str(bbox[0])))
                    bndbox.appendChild(xmin)
                    ymin = doc.createElement('ymin')
                    ymin.appendChild(doc.createTextNode(str(bbox[1])))
                    bndbox.appendChild(ymin)
                    xmax = doc.createElement('xmax')
                    xmax.appendChild(doc.createTextNode(str(bbox[0]+bbox[2])))
                    bndbox.appendChild(xmax)
                    ymax = doc.createElement('ymax')
                    ymax.appendChild(doc.createTextNode(str(bbox[1]+bbox[3])))
                    bndbox.appendChild(ymax)
                f=open(xmlpath,"w")
                f.write(doc.toprettyxml(indent = ''))
                f.close()     
            #cv2.imshow("img",showimg)
            #cv2.waitKey()
            index=index+1
 
def generatetxt(img_set="train"):
    gtfilepath=rootdir+"/wider_face_split/wider_face_"+img_set+"_bbx_gt.txt"
    f=open(rootdir+"/"+img_set+".txt","w")
    with open(gtfilepath,'r') as gtfile:
        while(True ):#and len(faces)<10
            filename=gtfile.readline()[:-1]
            if(filename==""):
                break;
            filename=filename.replace("/","_")
            imgfilepath=datasetprefix+"/images/"+filename
            f.write(imgfilepath+'\n')
            numbbox=int(gtfile.readline())
            for i in range(numbbox):
                line=gtfile.readline()
    f.close()
 
def generatevocsets(img_set="train"):
    if not os.path.exists(rootdir+"/ImageSets"):
        os.mkdir(rootdir+"/ImageSets")
    if not os.path.exists(rootdir+"/ImageSets/Main"):
        os.mkdir(rootdir+"/ImageSets/Main")
    gtfilepath=rootdir+"/wider_face_split/wider_face_"+img_set+"_bbx_gt.txt"
    f=open(rootdir+"/ImageSets/Main/"+img_set+".txt",'w')
    with open(gtfilepath,'r') as gtfile:
        while(True ):#and len(faces)<10
            filename=gtfile.readline()[:-1]
            if(filename==""):
                break;
            filename=filename.replace("/","_")
            imgfilepath=filename[:-4]
            f.write(imgfilepath+'\n')
            numbbox=int(gtfile.readline())
            for i in range(numbbox):
                line=gtfile.readline()
    f.close()
 
def convertdataset():
    img_sets=["train","val"]
    for img_set in img_sets:
        convertimgset(img_set)
        generatetxt(img_set)
        generatevocsets(img_set)
 
if __name__=="__main__":
    convertdataset()
    shutil.move(rootdir+"/"+"train.txt",rootdir+"/"+"trainval.txt")
    shutil.move(rootdir+"/"+"val.txt",rootdir+"/"+"test.txt")
    shutil.move(rootdir+"/ImageSets/Main/"+"train.txt",rootdir+"/ImageSets/Main/"+"trainval.txt")
    shutil.move(rootdir+"/ImageSets/Main/"+"val.txt",rootdir+"/ImageSets/Main/"+"test.txt")

caffe 将三通道或四通道图片转换为lmdb格式，将标签（单通道灰度图）转换为lmdb格式

import numpy as np
import sys
from PIL import Image
import lmdb
import random
import os

sys.path.append('/home/guest/caffe/python/')

import caffe

if __name__ == '__main__' :
    train_list_file = '/home/guest/caffe/examples\
    /VOC2012ext/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt'
    train_images_root = '/home/guest/caffe/examples\
    /VOC2012ext/VOCdevkit/VOC2012/JPEGImages/'

    f = open(train_list_file, 'r')
    trainlist = f.readlines()
    f.close()

    random.shuffle(trainlist)

    # creating images lmdb
    in_db = lmdb.open('/home/guest/caffe/VOC2012ext_val_img_lmdb',\
     map_size=int(1e12))
    with in_db.begin(write=True) as in_txn :
        for in_idx, in_ in enumerate(trainlist) :
            fid = in_.strip()+'.jpg'
            fn = os.path.join(train_images_root, fid)
            im = np.array(Image.open(fn))
            Dtype = im.dtype


            im = im[:,:,::-1]
            im = Image.fromarray(im)  
            im = np.array(im, Dtype)  
            im = im.transpose((2, 0, 1)) 
            im_dat = caffe.io.array_to_datum(im)
            in_txn.put('{:0>10d}'.format(in_idx), im_dat.SerializeToString())
    in_db.close()


 # creating label lmdb
    in_db = lmdb.open('/home/guest/caffe/VOC2012ext_val_label_lmdb',\
     map_size=int(1e12))
    train_images_root = '/home/guest/caffe/examples\
    /VOC2012ext/VOCdevkit/VOC2012/SegmentationClass/'
    with in_db.begin(write=True) as in_txn :
        for in_idx, in_ in enumerate(trainlist) :
            fid = in_.strip()+'.png'
            fn = os.path.join(train_images_root, fid)
        Dtype = 'uint8'  
            L = np.array(Image.open(fn), Dtype)  
            Limg = Image.fromarray(L)  
            L = np.array(Limg,Dtype)  
            L = L.reshape(L.shape[0],L.shape[1],1)  
            L = L.transpose((2,0,1))  
            L_dat = caffe.io.array_to_datum(L)  
            in_txn.put('{:0>10d}'.format(in_idx),L_dat.SerializeToString())  
    in_db.close()

posted @ 2018-10-22 22:03 HOU_JUN 阅读(1663) 评论(0) 收藏举报

刷新页面返回顶部

HOU_JUN

Wider Face 转VOC格式制作LMDB数据集

公告