今天将官网提供的车辆缺陷的数据集给下载下来,进行预处理


分配

import os
import random
import shutil

# 数据集路径
dataset_path = ""
images_train_dir = os.path.join(dataset_path, "images", "train")
labels_train_dir = os.path.join(dataset_path, "labels", "train")
images_val_dir = os.path.join(dataset_path, "images", "val")
labels_val_dir = os.path.join(dataset_path, "labels", "val")

# 创建验证集目录
os.makedirs(images_val_dir, exist_ok=True)
os.makedirs(labels_val_dir, exist_ok=True)

# 获取所有图片文件
image_files = [f for f in os.listdir(images_train_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]

# 随机打乱文件列表
random.shuffle(image_files)

# 分配验证集(20%)
val_ratio = 0.2
val_count = int(len(image_files) * val_ratio)  # 修正:添加右括号

for image_name in image_files[:val_count]:
    # 移动图片文件
    shutil.move(os.path.join(images_train_dir, image_name), os.path.join(images_val_dir, image_name))

    # 移动标签文件
    label_name = image_name.replace(".jpg", ".txt").replace(".png", ".txt").replace(".jpeg", ".txt")
    shutil.move(os.path.join(labels_train_dir, label_name), os.path.join(labels_val_dir, label_name))

print(f"Moved {val_count} images and labels to validation set.")

验证是否分配有错

import os

# 数据集路径
images_train_dir = "images/train"
images_val_dir = "images/val"
labels_train_dir = "labels/train"
labels_val_dir = "labels/val"

# 检查训练集
train_images = set(os.listdir(images_train_dir))
train_labels = set(os.listdir(labels_train_dir))
print(f"训练集图片数量: {len(train_images)}")
print(f"训练集标签数量: {len(train_labels)}")

# 检查验证集
val_images = set(os.listdir(images_val_dir))
val_labels = set(os.listdir(labels_val_dir))
print(f"验证集图片数量: {len(val_images)}")
print(f"验证集标签数量: {len(val_labels)}")

# 检查图片和标签是否匹配
missing_labels = train_images - {f.replace(".txt", ".jpg") for f in train_labels}
print(f"训练集中缺失的标签文件: {missing_labels}")

missing_labels = val_images - {f.replace(".txt", ".jpg") for f in val_labels}
print(f"验证集中缺失的标签文件: {missing_labels}")
posted on 2025-02-24 13:31  许七安gyg  阅读(18)  评论(0)    收藏  举报