【CV】GAN代码解析：unaligned_dataset.py


# 默认的dataset_mode值是unaligned，所以只学一下UnalignedDataset的文件

import os
# 路径处理库
from data.base_dataset import BaseDataset, get_transform
# 从自定义模块导入 BaseDataset（数据集基类）和图像增强函数 get_transform
from data.image_folder import make_dataset
# 从自定义的 image_folder 模块导入 make_dataset，用于根据目录枚举图像路径列表。
from PIL import Image
# 从 Pillow 导入 Image，用于打开/读取图片
import random
# 导入 random，稍后用来随机采样 B 域图片索引


class UnalignedDataset(BaseDataset):

    # 声明类 UnalignedDataset，继承 BaseDataset。
    """
    This dataset class can load unaligned/unpaired datasets.

    It requires two directories to host training images from domain A '/path/to/data/trainA'
    and from domain B '/path/to/data/trainB' respectively.
    You can train the model with the dataset flag '--dataroot /path/to/data'.
    Similarly, you need to prepare two directories:
    '/path/to/data/testA' and '/path/to/data/testB' during test time.
    """

    def __init__(self, opt):
        # 根据命令行参数opt构造对象
        """Initialize this dataset class.

        Parameters:
            opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
        """
        BaseDataset.__init__(self, opt) # 显式调用父类构造函数，完成基类初始化。（这里初始化的是self对象，即自身）
        self.dir_A = os.path.join(opt.dataroot, opt.phase + 'A')  # create a path '/path/to/data/trainA'
        self.dir_B = os.path.join(opt.dataroot, opt.phase + 'B')  # create a path '/path/to/data/trainB'
        # 路径拼接操作，得到A域和B域的路径

        self.A_paths = sorted(make_dataset(self.dir_A, opt.max_dataset_size))   # load images from '/path/to/data/trainA'
        self.B_paths = sorted(make_dataset(self.dir_B, opt.max_dataset_size))    # load images from '/path/to/data/trainB'
        # 枚举路径中所有图像文件，并按字典序排序

        self.A_size = len(self.A_paths)  # get the size of dataset A
        self.B_size = len(self.B_paths)  # get the size of dataset B
        # 记录每个域中图像的数量

        btoA = self.opt.direction == 'BtoA'
        # 定义一个变量指定风格迁移方向

        input_nc = self.opt.output_nc if btoA else self.opt.input_nc       # get the number of channels of input image
        output_nc = self.opt.input_nc if btoA else self.opt.output_nc      # get the number of channels of output image
        # 根据方向决定输入通道数：若是 B→A，则输入来自 B 域，其通道数应取 output_nc；否则取 input_nc

        self.transform_A = get_transform(self.opt, grayscale=(input_nc == 1))
        # def get_transform(opt: {preprocess, no_flip},
        #                   params: Any = None,
        #                   grayscale: Any = False,
        #                   method: Any = transforms. InterpolationMode. BICUBIC,
        #                   convert: Any = True) -> Any
        self.transform_B = get_transform(self.opt, grayscale=(output_nc == 1))
        # 基于 input_nc 是否为 1 来设置 A 域的图像增强与预处理（灰度/非灰度）

    def __getitem__(self, index):
        # 实现数据集索引访问 __getitem__(self, index)

        """Return a data point and its metadata information.

        Parameters:
            index (int)      -- a random integer for data indexing

        Returns a dictionary that contains A, B, A_paths and B_paths
            A (tensor)       -- an image in the input domain
            B (tensor)       -- its corresponding image in the target domain
            A_paths (str)    -- image paths
            B_paths (str)    -- image paths
        """
        A_path = self.A_paths[index % self.A_size]  # make sure index is within then range
        # 从 A 域选择样本路径：用 index % A_size 保证越界安全（可循环遍历）

        if self.opt.serial_batches:   # make sure index is within then range
            index_B = index % self.B_size
        else:   # 为了避免固定配对，对 B 域索引做一次随机采样
            index_B = random.randint(0, self.B_size - 1)
        # 上面是根据serial_batches确定B域中batch的索引方式
        # serial_batches参数解释
        # parser.add_argument('--serial_batches', action='store_true',
        # help='if true, takes images in order to make batches, otherwise takes them randomly')

        B_path = self.B_paths[index_B]
        # 按上述确定性规则得到 index_B

        A_img = Image.open(A_path).convert('RGB')
        B_img = Image.open(B_path).convert('RGB')
        # 用 Pillow 打开图像，并统一转换为 RGB 三通道

        # apply image transformation
        A = self.transform_A(A_img)
        B = self.transform_B(B_img)
        # 对图像应用transform方法，得到对应张量
        # （transform里定义的是什么操作？）

        return {'A': A, 'B': B, 'A_paths': A_path, 'B_paths': B_path}
        # 返回一个字典：包含张量 A/B 以及其各自的文件路径

    def __len__(self):
        # 因 A/B 样本数可能不同，长度取二者的最大值
        """Return the total number of images in the dataset.

        As we have two datasets with potentially different number of images,
        we take a maximum of
        """
        return max(self.A_size, self.B_size)
posted @ 2025-09-24 10:14 SaTsuki26681534 阅读(20) 评论(0) 收藏举报
刷新页面返回顶部
Loading

satsuki26681534

【CV】GAN代码解析：unaligned_dataset.py

公告