切分数据集到train-val-test文件夹

import os  
import random  
import shutil  
  
def split_data(source_dir, train_dir, val_dir, test_dir):  
    # 确保目标文件夹存在  
    os.makedirs(train_dir, exist_ok=True)  
    os.makedirs(val_dir, exist_ok=True)  
    os.makedirs(test_dir, exist_ok=True)  
  
    # 列出源文件夹中的所有文件  
    files = os.listdir(source_dir)  
    total_files = len(files)  
  
    # 计算每个子集的大小  
    train_size = int(total_files * 0.6)  
    val_size = int(total_files * 0.2)  
    test_size = total_files - train_size - val_size  
  
    # 随机打乱文件列表  
    random.shuffle(files)  
  
    # 将文件分配到各个子集  
    for file in files:  
        file_path = os.path.join(source_dir, file)  
        if train_size > 0:  
            shutil.copy2(file_path, os.path.join(train_dir, file))  
            train_size -= 1  
        elif val_size > 0:  
            shutil.copy2(file_path, os.path.join(val_dir, file))  
            val_size -= 1  
        else:  
            shutil.copy2(file_path, os.path.join(test_dir, file))  
            test_size -= 1

if __name__ == "__main__":  
    source_dir = "frog_dataset"  # 替换为源文件夹的路径  
    train_dir = "./data/train/"  # 替换为训练文件夹的路径  
    val_dir = "./data/val/"  # 替换为验证文件夹的路径  
    test_dir = "./data/test/"  # 替换为测试文件夹的路径  
  
    split_data(source_dir, train_dir, val_dir, test_dir)

 

posted @ 2024-01-20 21:22  cup_leo  阅读(9)  评论(0编辑  收藏  举报