1 ##深度学习过程中,需要制作训练集和验证集、测试集。
2
3 import os, random, shutil
4 from config import *
5 import re
6
7 #用于清空并生成文件夹
8 def test_train_dir():
9 # 清空文件夹里面的所有文件,然后创建,解决重复占用问题
10 # shutil.rmtree('要清空的文件夹名')
11 # os.mkdir('要清空的文件夹名')
12
13 if os.path.exists(os.path.join(abs_path, "data")):
14
15
16 shutil.rmtree(os.path.join(abs_path, "data"))
17 # 通过makedirs创建多级目录
18 os.makedirs(os.path.join(abs_path, "data", "test"))
19 os.makedirs(os.path.join(abs_path, "data", "train"))
20 os.makedirs(os.path.join(abs_path, "data", "samples"))
21
22 def tet_data(fileDir):
23 pathDir=os.listdir(fileDir)
24 for i in pathDir:
25 print(os.path.join(abs_path,"data","train",i))
26 # shutil.move(fileDir + "\\" + i, tarDir_test + "\\" + i)
27 if os.path.exists(os.path.join(abs_path,"data","train",i)):
28 continue
29
30 else:
31 shutil.copy(fileDir + "\\" + i, tarDir_test + "\\" + i)
32
33
34
35 def moveFile(fileDir):
36 pathDir = os.listdir(fileDir) # 取图片的原始路径
37 # filenumber = len(pathDir)
38 # rate = 0.1 # 自定义抽取图片的比例,比方说100张抽10张,那就是0.1
39 # picknumber = int(filenumber * rate) # 按照rate比例从文件夹中取一定数量图片
40 sample = random.sample(pathDir, 30) # 随机选取picknumber数量的样本图片
41 print(sample)
42 for name in sample:
43 # shutil.move(fileDir +"\\"+ name, tarDir +"\\"+name)
44 shutil.copy(fileDir +"\\"+ name, tarDir +"\\"+name)
45 # return
46
47 def train_and_labels():
48
49
50 # file1=open(os.path.join("D:\Jonie_Project_sss15\data","train_dir.txt"),"w")
51 file2=open(os.path.join(abs_path,"data","samples","train_dir.txt"),"w")
52 file3=open(os.path.join(abs_path,"data","samples","train_label.txt"),"w")
53 dir_sam=os.listdir(os.path.join(abs_path,"data","train"))
54 for i in dir_sam:
55 # print(i.split("_")[-2])
56 # print(os.path.join(abs_path,"data")+ i + "\t" +i.split("_")[-2]+"\n")
57 print(os.path.join(abs_path,"data")+ i + "\t" +re.split('_\d+.',i)[0]+"\n")
58 # file2.write(os.path.join(abs_path,"dataset","scene_categories",i.split("_")[-2],i) +"\n")#解决a_b_12.jpg.jpg截取结果为b的问题
59 file2.write(os.path.join(abs_path,"dataset","scene_categories",re.split('_\d+.',i)[0],i) +"\n")
60 # file3.write(i.split("_")[-2]+"\n")
61 file3.write(re.split('_\d+.',i)[0]+"\n")
62 file2.close()
63 file3.close()
64 def tet_and_labels():
65 # file1 = open(os.path.join("D:\Jonie_Project_sss15\data", "labels.txt"), "w")
66 file2 = open(os.path.join(abs_path,"data","samples", "test_dir.txt"), "w")
67 file3 = open(os.path.join(abs_path,"data","samples", "test_label.txt"), "w")
68 dir_sam = os.listdir(os.path.join(abs_path,"data","test"))
69 for i in dir_sam:
70 # print(i.split("_")[-2])
71 # print(os.path.join(abs_path,"data",i)+ i.split("_")[-2] + "\n")
72 print(os.path.join(abs_path,"data",i)+ re.split('_\d+.',i)[0] + "\n")
73 # file2.write(os.path.join(abs_path,"dataset","scene_categories")+"\\" +i.split("_")[-2]+"\\"+i +"\n")
74 file2.write(os.path.join(abs_path,"dataset","scene_categories")+"\\" +re.split('_\d+.',i)[0]+"\\"+i +"\n")
75 # file3.write( i.split("_")[-2] + "\n")
76 file3.write( re.split('_\d+.',i)[0] + "\n")
77 file2.close()
78 file3.close()
79
80 if __name__ == '__main__':
81
82
83 for num1 in range(4):
84 test_train_dir()
85
86 dir_name=os.listdir(os.path.join(abs_path,"dataset","scene_categories"))
87 print("-------------------------",os.path.join(abs_path,"dataset","scene_categories"))
88 for i in dir_name:
89 # print(i)
90 fileDir =os.path.join(abs_path,"dataset","scene_categories",i) # 源图片文件夹路径
91 tarDir = os.path.join(abs_path,"data","train") # 移动到新的文件夹路径
92 tarDir_test = os.path.join(abs_path,"data","test") # 移动到新的文件夹路径
93 moveFile(fileDir)#train
94 # tarDir_test = r"D:\Jonie_Project_sss15\data\test" # 移动到新的文件夹路径
95
96 tet_data(fileDir)#test
97 train_and_labels()
98 tet_and_labels()
99 if not os.path.exists(os.path.join(abs_path,"data1",str(num1))):
100 os.makedirs(os.path.join(abs_path,"data1",str(num1)))
101 new_path = os.path.join(abs_path, "data1", str(num1))
102 for derName, subfolders, filenames in os.walk(os.path.join(abs_path, "data", "samples")):
103 # print(derName/subfolders/filenames)
104 for j in range(len(filenames)):
105 if filenames[j].endswith('.txt'):
106 file_path = derName + '\\' + filenames[j]
107 newpath = new_path + '\\' + filenames[j]
108 shutil.move(file_path, newpath)