import os
import random
import shutil
def split_data(source_dir, train_dir, val_dir, test_dir):
# 确保目标文件夹存在
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
# 列出源文件夹中的所有文件
files = os.listdir(source_dir)
total_files = len(files)
# 计算每个子集的大小
train_size = int(total_files * 0.6)
val_size = int(total_files * 0.2)
test_size = total_files - train_size - val_size
# 随机打乱文件列表
random.shuffle(files)
# 将文件分配到各个子集
for file in files:
file_path = os.path.join(source_dir, file)
if train_size > 0:
shutil.copy2(file_path, os.path.join(train_dir, file))
train_size -= 1
elif val_size > 0:
shutil.copy2(file_path, os.path.join(val_dir, file))
val_size -= 1
else:
shutil.copy2(file_path, os.path.join(test_dir, file))
test_size -= 1
if __name__ == "__main__":
source_dir = "frog_dataset" # 替换为源文件夹的路径
train_dir = "./data/train/" # 替换为训练文件夹的路径
val_dir = "./data/val/" # 替换为验证文件夹的路径
test_dir = "./data/test/" # 替换为测试文件夹的路径
split_data(source_dir, train_dir, val_dir, test_dir)