今天将官网提供的车辆缺陷的数据集给下载下来,进行预处理
分配
import os
import random
import shutil
# 数据集路径
dataset_path = ""
images_train_dir = os.path.join(dataset_path, "images", "train")
labels_train_dir = os.path.join(dataset_path, "labels", "train")
images_val_dir = os.path.join(dataset_path, "images", "val")
labels_val_dir = os.path.join(dataset_path, "labels", "val")
# 创建验证集目录
os.makedirs(images_val_dir, exist_ok=True)
os.makedirs(labels_val_dir, exist_ok=True)
# 获取所有图片文件
image_files = [f for f in os.listdir(images_train_dir) if f.endswith(('.jpg', '.png', '.jpeg'))]
# 随机打乱文件列表
random.shuffle(image_files)
# 分配验证集(20%)
val_ratio = 0.2
val_count = int(len(image_files) * val_ratio) # 修正:添加右括号
for image_name in image_files[:val_count]:
# 移动图片文件
shutil.move(os.path.join(images_train_dir, image_name), os.path.join(images_val_dir, image_name))
# 移动标签文件
label_name = image_name.replace(".jpg", ".txt").replace(".png", ".txt").replace(".jpeg", ".txt")
shutil.move(os.path.join(labels_train_dir, label_name), os.path.join(labels_val_dir, label_name))
print(f"Moved {val_count} images and labels to validation set.")
验证是否分配有错
import os
# 数据集路径
images_train_dir = "images/train"
images_val_dir = "images/val"
labels_train_dir = "labels/train"
labels_val_dir = "labels/val"
# 检查训练集
train_images = set(os.listdir(images_train_dir))
train_labels = set(os.listdir(labels_train_dir))
print(f"训练集图片数量: {len(train_images)}")
print(f"训练集标签数量: {len(train_labels)}")
# 检查验证集
val_images = set(os.listdir(images_val_dir))
val_labels = set(os.listdir(labels_val_dir))
print(f"验证集图片数量: {len(val_images)}")
print(f"验证集标签数量: {len(val_labels)}")
# 检查图片和标签是否匹配
missing_labels = train_images - {f.replace(".txt", ".jpg") for f in train_labels}
print(f"训练集中缺失的标签文件: {missing_labels}")
missing_labels = val_images - {f.replace(".txt", ".jpg") for f in val_labels}
print(f"验证集中缺失的标签文件: {missing_labels}")
浙公网安备 33010602011771号