常用脚本之 拼接多个 txt 文件

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
'''
@File    :   gen_all_data.py
@Time    :   2023/05/30 10:46:29
@Author  :   H
@Version :   1.0
@Contact :   
@License :   
@Desc    :   拼接多个 txt 文件,以生成图像分类的数据标注
'''

# here put the import lib


import os
import random
from PIL import Image

male_all_txt = []

male_input_txt = ['baseBodyDataset/bodyCrop/aaf-images-bodyCrop/images_m_list.txt',
			 '/data/huyuzhen/datasets/baseBodyDataset/bodyCrop/CACD2000-bodyCrop/m_list.txt',
			 '/data/huyuzhen/datasets/baseBodyDataset/bodyCrop/detect-bodyCrop/m_list.txt',
			 'baseBodyDataset/bodyCrop/megaage-asian/6-17-60+m_list.txt',
			 'baseBodyDataset/bodyCrop/megaage-asian/18-59-m_list.txt',
			 'baseBodyDataset/bodyCrop/UTKface-inthewild-bodyCrop/m_list.txt',
			 'baseBodyDataset/bodyCrop/SCUT-FBP5500_v2/m_list.txt',
			 'baseBodyDataset/bodyCrop/Adience-bodyCrop/m_list.txt',
			 'baseBodyDataset/bodyCrop/CelebA/Anno/male_list.txt',
			 'baseBodyDataset/bodyCrop/LFW/Anno/male_list.txt'
]

for txt_path in male_input_txt:
	print(txt_path)
	f = open(txt_path, encoding='utf-8')

	for line in f:
		img_path = line.split(' ')[0].replace('/bodyCrop', '/baseBodyDataset/bodyCrop')
		if os.path.exists(img_path):
			try:
				img = Image.open(img_path)
				male_all_txt.append(img_path + f' 0')
			except:
				continue
	f.close()

print(len(male_all_txt))
random.shuffle(male_all_txt)


female_input_txt = ['baseBodyDataset/bodyCrop/aaf-images-bodyCrop/images_fm_list.txt',
			 '/data/huyuzhen/datasets/baseBodyDataset/bodyCrop/CACD2000-bodyCrop/fm_list.txt',
			 '/data/huyuzhen/datasets/baseBodyDataset/bodyCrop/detect-bodyCrop/fm_list.txt',
			 'baseBodyDataset/bodyCrop/megaage-asian/6-17-60+fm_list.txt',
			 'baseBodyDataset/bodyCrop/megaage-asian/18-59-fm_list.txt',
			 'baseBodyDataset/bodyCrop/UTKface-inthewild-bodyCrop/fm_list.txt',
			 'baseBodyDataset/bodyCrop/SCUT-FBP5500_v2/fm_list.txt',
			 'baseBodyDataset/bodyCrop/Adience-bodyCrop/fm_list.txt',
			 'baseBodyDataset/bodyCrop/CelebA/Anno/female_list.txt',
			 'baseBodyDataset/bodyCrop/LFW/Anno/female_list.txt'
]

female_all_txt = []

for txt_path in female_input_txt:
	print(txt_path)
	f = open(txt_path, encoding='utf-8')

	for line in f:
		img_path = line.split(' ')[0].replace('/bodyCrop', '/baseBodyDataset/bodyCrop')
		if os.path.exists(img_path):
			try:
				img = Image.open(img_path)
				female_all_txt.append(img_path + f' 1')
			except:
				continue
	f.close()

print(len(female_all_txt))
random.shuffle(female_all_txt)


female_all_txt = female_all_txt[:len(male_all_txt)]

train_all_txt = male_all_txt[:int(len(male_all_txt) * 0.9)] + female_all_txt[:int(len(male_all_txt) * 0.9)]
val_all_txt = male_all_txt[int(len(male_all_txt) * 0.9)+1:] + female_all_txt[int(len(male_all_txt) * 0.9)+1:]

print(len(train_all_txt), len(val_all_txt))

with open(f'/data/huyuzhen/datasets/baseBodyDataset/Annos/train.txt', 'w') as train_f:
	for line in train_all_txt:
		train_f.write(line + f' \n')

with open(f'/data/huyuzhen/datasets/baseBodyDataset/Annos/val.txt', 'w') as val_f:
	for line in val_all_txt:
		val_f.write(line + f' \n')
posted @ 2023-05-30 10:48  Zenith_Hugh  阅读(91)  评论(0)    收藏  举报