# 默认的dataset_mode值是unaligned,所以只学一下UnalignedDataset的文件
import os
# 路径处理库
from data.base_dataset import BaseDataset, get_transform
# 从自定义模块导入 BaseDataset(数据集基类)和图像增强函数 get_transform
from data.image_folder import make_dataset
# 从自定义的 image_folder 模块导入 make_dataset,用于根据目录枚举图像路径列表。
from PIL import Image
# 从 Pillow 导入 Image,用于打开/读取图片
import random
# 导入 random,稍后用来随机采样 B 域图片索引
class UnalignedDataset(BaseDataset):
# 声明类 UnalignedDataset,继承 BaseDataset。
"""
This dataset class can load unaligned/unpaired datasets.
It requires two directories to host training images from domain A '/path/to/data/trainA'
and from domain B '/path/to/data/trainB' respectively.
You can train the model with the dataset flag '--dataroot /path/to/data'.
Similarly, you need to prepare two directories:
'/path/to/data/testA' and '/path/to/data/testB' during test time.
"""
def __init__(self, opt):
# 根据命令行参数opt构造对象
"""Initialize this dataset class.
Parameters:
opt (Option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions
"""
BaseDataset.__init__(self, opt) # 显式调用父类构造函数,完成基类初始化。(这里初始化的是self对象,即自身)
self.dir_A = os.path.join(opt.dataroot, opt.phase + 'A') # create a path '/path/to/data/trainA'
self.dir_B = os.path.join(opt.dataroot, opt.phase + 'B') # create a path '/path/to/data/trainB'
# 路径拼接操作,得到A域和B域的路径
self.A_paths = sorted(make_dataset(self.dir_A, opt.max_dataset_size)) # load images from '/path/to/data/trainA'
self.B_paths = sorted(make_dataset(self.dir_B, opt.max_dataset_size)) # load images from '/path/to/data/trainB'
# 枚举路径中所有图像文件,并按字典序排序
self.A_size = len(self.A_paths) # get the size of dataset A
self.B_size = len(self.B_paths) # get the size of dataset B
# 记录每个域中图像的数量
btoA = self.opt.direction == 'BtoA'
# 定义一个变量指定风格迁移方向
input_nc = self.opt.output_nc if btoA else self.opt.input_nc # get the number of channels of input image
output_nc = self.opt.input_nc if btoA else self.opt.output_nc # get the number of channels of output image
# 根据方向决定输入通道数:若是 B→A,则输入来自 B 域,其通道数应取 output_nc;否则取 input_nc
self.transform_A = get_transform(self.opt, grayscale=(input_nc == 1))
# def get_transform(opt: {preprocess, no_flip},
# params: Any = None,
# grayscale: Any = False,
# method: Any = transforms. InterpolationMode. BICUBIC,
# convert: Any = True) -> Any
self.transform_B = get_transform(self.opt, grayscale=(output_nc == 1))
# 基于 input_nc 是否为 1 来设置 A 域的图像增强与预处理(灰度/非灰度)
def __getitem__(self, index):
# 实现数据集索引访问 __getitem__(self, index)
"""Return a data point and its metadata information.
Parameters:
index (int) -- a random integer for data indexing
Returns a dictionary that contains A, B, A_paths and B_paths
A (tensor) -- an image in the input domain
B (tensor) -- its corresponding image in the target domain
A_paths (str) -- image paths
B_paths (str) -- image paths
"""
A_path = self.A_paths[index % self.A_size] # make sure index is within then range
# 从 A 域选择样本路径:用 index % A_size 保证越界安全(可循环遍历)
if self.opt.serial_batches: # make sure index is within then range
index_B = index % self.B_size
else: # 为了避免固定配对,对 B 域索引做一次随机采样
index_B = random.randint(0, self.B_size - 1)
# 上面是根据serial_batches确定B域中batch的索引方式
# serial_batches参数解释
# parser.add_argument('--serial_batches', action='store_true',
# help='if true, takes images in order to make batches, otherwise takes them randomly')
B_path = self.B_paths[index_B]
# 按上述确定性规则得到 index_B
A_img = Image.open(A_path).convert('RGB')
B_img = Image.open(B_path).convert('RGB')
# 用 Pillow 打开图像,并统一转换为 RGB 三通道
# apply image transformation
A = self.transform_A(A_img)
B = self.transform_B(B_img)
# 对图像应用transform方法,得到对应张量
# (transform里定义的是什么操作?)
return {'A': A, 'B': B, 'A_paths': A_path, 'B_paths': B_path}
# 返回一个字典:包含张量 A/B 以及其各自的文件路径
def __len__(self):
# 因 A/B 样本数可能不同,长度取二者的最大值
"""Return the total number of images in the dataset.
As we have two datasets with potentially different number of images,
we take a maximum of
"""
return max(self.A_size, self.B_size)