分割CelebA数据集为训练集,验证集,测试集

总共数据是202599张图片,在这里的分了训练集162770(比例0.8),验证集19867(比例0.098),测试集19962(比例0.096)

这里是采用建立连接的方式

1 rel_link = os.path.relpath(in_file, out_dir)  # 得到从out_file到in_file的相对路径
2 os.symlink(rel_link, link_file)  # link_file使用链接连接到rel_link上面,也就是目标地址连接到源地址上面
View Code
 1 # check, if file exists, make link
 2 def check_link(in_dir, basename, out_dir):
 3     in_file = os.path.join(in_dir, basename)
 4     if os.path.exists(in_file):
 5         link_file = os.path.join(out_dir, basename)
 6         rel_link = os.path.relpath(in_file, out_dir)  # from out_dir to in_file
 7         os.symlink(rel_link, link_file)
 8 
 9 def add_splits(data_path):
10     images_path = os.path.join(data_path, 'Img/img_align_celeba')
11     train_dir = os.path.join(data_path, 'splits', 'train')
12     valid_dir = os.path.join(data_path, 'splits', 'valid')
13     test_dir = os.path.join(data_path, 'splits', 'test')
14     if not os.path.exists(train_dir):
15         os.makedirs(train_dir)
16     if not os.path.exists(valid_dir):
17         os.makedirs(valid_dir)
18     if not os.path.exists(test_dir):
19         os.makedirs(test_dir)
20 
21     # these constants based on the standard CelebA splits
22     NUM_EXAMPLES = 202599
23     TRAIN_STOP = 162770
24     VALID_STOP = 182637
25 
26     for i in range(0, TRAIN_STOP):
27         basename = "{:06d}.jpg".format(i+1)
28         check_link(images_path, basename, train_dir)
29     for i in range(TRAIN_STOP, VALID_STOP):
30         basename = "{:06d}.jpg".format(i+1)
31         check_link(images_path, basename, valid_dir)
32     for i in range(VALID_STOP, NUM_EXAMPLES):
33         basename = "{:06d}.jpg".format(i+1)
34         check_link(images_path, basename, test_dir)
35 
36 if __name__ == '__main__':
37     base_path = '../DATA/CelebA'
38     add_splits(base_path)
View Code

 

posted @ 2021-04-07 11:21  临近边缘  阅读(571)  评论(0)    收藏  举报