海思项目总结——python中OS
在制作训练集过程中,经常能遇到各种问题。解决这些问题,通过python对系统文件的操作至关重要。
1.菜鸟编程OS操作
2.
https://www.jb51.net/article/138812.htm
3.images 文件夹中删除labels文件夹中没有的相关项目
import os import re import sys def contnameall(): fileList = os.listdir(r"D:\Documents\Desktop\cat_dog_dataset\training_dataset\training_data\labels") #待修改文件夹 #输出文件夹中包含的文件 for num in range(1,2001,1): if 'cat_'+str(num)+'.txt' not in fileList: print('cat_'+str(num)) try: os.remove('D:/Documents/Desktop/cat_dog_dataset/training_dataset/training_data/images/'+'cat_'+str(num)+'.jpg') except FileNotFoundError: continue for num in range(1,2001,1): if 'dog_'+str(num)+'.txt'not in fileList: print('dog_'+str(num)) try: os.remove('D:/Documents/Desktop/cat_dog_dataset/training_dataset/training_data/images/'+'dog_'+str(num)+'.jpg') except FileNotFoundError: continue contnameall()
4.修改文件名
import os import re import sys def renameall(): fileList = os.listdir(r"D:\Documents\Desktop\cat_dog_data\environment") #待修改文件夹 print("修改前:"+str(fileList)) #输出文件夹中包含的文件 currentpath = os.getcwd() #得到进程当前工作目录 os.chdir(r"D:\Documents\Desktop\cat_dog_data\environment") #将当前工作目录修改为待修改文件夹的位置 num=1 #名称变量 for fileName in fileList: #遍历文件夹中所有文件 pat=".+\.(jpg|png|gif)" #匹配文件名正则表达式 pattern = re.findall(pat,fileName) #进行匹配 os.rename(fileName,('en_'+str(num)+'.'+pattern[0])) #文件重新命名 num = num+1 #改变编号,继续下一项 print("---------------------------------------------------") os.chdir(currentpath) #改回程序运行前的工作目录 sys.stdin.flush() #刷新 print("修改后:"+str(os.listdir(r"D:\Documents\Desktop\cat_dog_data\environment"))) #输出修改后文件夹中包含的文件 renameall()
5.json文件转txt文件
import os import json import numpy as np import math dir_json = 'D:/Documents/Desktop/cat_dog_dataset/validation_dataset/validation_data/json/' #json存储的文件目录 dir_txt = 'D:/Documents/Desktop/cat_dog_dataset/validation_dataset/validation_data/labels/' #txt存储目录 if not os.path.exists(dir_txt): os.makedirs(dir_txt) list_json = os.listdir(dir_json) def json2txt(path_json,path_txt): #可修改生成格式 with open(path_json,'r') as path_json: jsonx=json.load(path_json) with open(path_txt,'w+') as ftxt: imageHeight = jsonx['imageHeight'] imageWidth = jsonx['imageWidth'] for shape in jsonx['shapes']: label = str(shape['label'])+' ' xy=np.array(shape['points']) w = abs(xy[1][0]-xy[0][0]) h = abs(xy[1][1]-xy[0][1]) center_x = (xy[1][0]+xy[0][0])/2 center_y = (xy[1][1]+xy[0][1])/2 w = w/imageWidth h = h/imageHeight center_x = center_x/imageWidth center_y = center_y/imageHeight strxy = '\n' strxy=strxy+str(center_x)+'\n'+str(center_y)+'\n'+str(w)+'\n'+str(h) label += strxy ftxt.writelines(label + "\n") ''' for m,n in xy: m=int(m) n=int(n) strxy+=str(m)+' '+str(n)+' ' ''' for cnt,json_name in enumerate(list_json): print('cnt=%d,name=%s'%(cnt+1,json_name)) path_json = dir_json + json_name path_txt = dir_txt + json_name.replace('.json','.txt') json2txt(path_json,path_txt) #修改代码能够支持dark训练,归一化结果
6.将图像的P模式转化为RGB (labelme只能操作RGB 模式)
import os from PIL import Image # 将所有图像resize成224*224,并保存在目录'result/'下 def resize_image(image_name): # 获取输入文件夹中的所有文件 files = os.listdir(image_name+'images/') output_dir = image_name+'images/' # 判断输出文件夹是否存在,不存在则创建 # if not os.path.exists(output_dir): # os.makedirs(output_dir) for file in files: img = Image.open(image_name +'images/' + file) if img.mode == "P" or img.mode == 'RGBA': img = img.convert('RGB') # img = img.resize((224, 224), Image.ANTIALIAS) img.save(os.path.join(output_dir, file)) if __name__ == '__main__': resize_image('D:/Documents/Desktop/cat_dog_dataset/validation_dataset/validation_data/')
7.通过二进制形式读取文件
f = open('D:\Documents\Desktop\darknet-master\dog_cat_training\list.txt',"rb+") data = f.read() #这样data是一个b开头的ASCII数字。 f.close() print(data) #将二进制数据转化为10进制数据。
8.将image文件名读取到txt文件中
import os def ListFilesToTxt(dir, file, wildcard, recursion): exts = wildcard.split(" ") files = os.listdir(dir) for name in files: fullname = os.path.join(dir, name) if (os.path.isdir(fullname) & recursion): ListFilesToTxt(fullname, file, wildcard, recursion) else: for ext in exts: if (name.endswith(ext)): file.write("/home/hisi/data/dog_and_cat/image/"+name + "\n") break def ReadName(): dir = "D:/Documents/Desktop/dog_and_cat/image/" # 读取文件路径 outfile = "D:/Documents/Desktop/dog_and_cat/list/list.txt" # 将文件名写入Image.txt wildcard = ".jpg" # 读取kml文件 # wildcard = ".jpg .txt .exe .dll .lib"等等 #要读取的文件类型; file = open(outfile, "w") if not file: print("cannot open the file %s for writing" % outfile) ListFilesToTxt(dir, file, wildcard, 1) file.close() ReadName()