道隐于小成,言隐于荣华

Pytorch相关总结

PyTorch相关总结

以二分类为例子。

Dataset

一般自定义的数据集建议自己继承Dataset类进行编写,以下是一个简单的例子,如果是想简易方法直接调用TensorDataset。

# Bert Dataset
class BertDataset(Dataset):
    def __init__(self, data, tokenizer, max_length):
        self.tokenizer = tokenizer
        self.features = tokenizer(data.cleaned_text.values.tolist(),
                                  max_length=max_length,
                                  truncation=True,
                                  padding=True,
                                  return_tensors='pt')
        self.labels = data.iloc[:, [1,2,3]].values.tolist()
        self.idx = 0

    def __getitem__(self, item):
        return {'attention_mask': self.features['attention_mask'][item],
                'input_ids': self.features['input_ids'][item],
                'labels': self.labels[item][self.idx],
                'token_type_ids': self.features['token_type_ids'][item]
                }

    def __len__(self):
        return len(self.labels)

    def set_label_type(self, idx):
        self.idx = idx

常见数据集一般有封装的话,可以将数据集类与加载器封装在一块,以下是一个CIFAR100数据加载类的例子。

# CV CIFAR data
def get_statistics():
    # 获取数据集的均值与标准差
    train_set = torchvision.datasets.CIFAR100(root="./data", train=True, download=True,
                                              transform=transforms.ToTensor())
    data = torch.cat([data[0] for data in DataLoader(train_set)])
    return data.mean(dim=[0, 2, 3]), data.std(dim=[0, 2, 3])


class Data:

    @staticmethod
    def get_statistics():
        # 获取数据集的均值与标准差
        train_set = torchvision.datasets.CIFAR100(root="./data", train=True, download=True,
                                                  transform=transforms.ToTensor())
        data = torch.cat([data[0] for data in DataLoader(train_set)])
        return data.mean(dim=[0, 2, 3]), data.std(dim=[0, 2, 3])

    def __init__(self, batch_size, threads):
        mean, std = self.get_statistics()
        # 训练集的数据增强
        train_transform = transforms.Compose([
            # 随机裁剪
            torchvision.transforms.RandomCrop(size=(32, 32), padding=4),
            # 依概率水平翻转
            torchvision.transforms.RandomHorizontalFlip(),
            # Tensor化
            transforms.ToTensor(),
            # 归一化
            transforms.Normalize(mean, std),
            # 模拟遮掩
            Cutout()
        ])
        # 测试集只进行tensor化与归一化
        test_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ])
        # 训练集数据类
        train_set = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=train_transform)
        # 测试集数据类
        test_set = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=test_transform)
        # 训练集、测试集加载器
        self.train = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=threads)
        self.test = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=threads)
        self.classes = train_set.classes

Seed & Device

为了实现结果的再现,需要设置种子以支持复现。

# 初步固定
def setup_seed(seed):
     torch.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)
     torch.cuda.manual_seed(seed)
     np.random.seed(seed)
     random.seed(seed)
     torch.backends.cudnn.deterministic = True	# 这一步会降低速度
     torch.backends.cudnn.enabled = False
     torch.backends.cudnn.benchmark = False
     #torch.backends.cudnn.benchmark = True #for accelerating the running
 # 深入加强,一般初步固定即可
def _init_fn(worker_id): 
    random.seed(10 + worker_id)
    np.random.seed(10 + worker_id)
    torch.manual_seed(10 + worker_id)
    torch.cuda.manual_seed(10 + worker_id)
    torch.cuda.manual_seed_all(10 + worker_id)
dataloader = DataLoader(tensor_dataset,                        
                    batch_size=opt.batchSize,     
                    shuffle=True,     
                    num_workers=opt.workers,
                    worker_init_fn=_init_fn)

一般单卡跑的话需要设置CUDA类型,并将模型、输入数据添加到设备之中。

# os.environ['CUDA_VISIBLE_DEVICE'] = '0'
device = torch.device("cuda:1") if torch.cuda.is_available() else torch.device("cpu")
......
model.to(device)
......
	for features, labels in dataloader:
    feature.to(device)
    labels.to(device)

训练 & 测试

训练与测试过程很简单、明了,以下是一个例子。

    # bert train with fine tuning
  	for epoch in range(num_epochs):
        train_dict = {"loss": 0, "length": 0, "correct": 0}
        with torch.enable_grad():
            for batch in train_loader:
                batch = {k: v.to(device) for k, v in batch.items()}
                outputs = model(**batch)
                loss = outputs.loss  # 只要传入labels,就会计算loss
                train_dict['loss'] += loss.item()
                train_dict['length'] += len(outputs.logits)
                train_dict['correct'] += (torch.argmax(outputs.logits, dim=-1) &
                                          batch['labels']).cpu().sum().item()
                loss.backward()
                optimizer.step()
                lr_scheduler.step()
                optimizer.zero_grad()
                progress_bar.update(1)
        print("\nTrain Epoch {0} Loss:{1:.2f}\tAccuracy: {2:.2f} %".
              format(epoch, train_dict['loss'] / len(train_loader), (train_dict["correct"] / train_dict['length'])*100))

        with torch.no_grad():
            result_dict = {'TP': 0, 'TN': 0, 'FP': 0, 'FN': 0}
            for batch in test_loader:
                batch = {k: v.to(device) for k, v in batch.items()}
                outputs = model(**batch)
                predictions = torch.argmax(outputs.logits, dim=-1)
                # TP predict 和 label 同时为1
                result_dict['TP'] += ((predictions == 1) & (batch['labels'] == 1)).cpu().sum().item()
                # TN predict 和 label 同时为0
                result_dict['TN'] += ((predictions == 0) & (batch['labels'] == 0)).cpu().sum().item()
                # FN predict 0 label 1
                result_dict['FN'] += ((predictions == 0) & (batch['labels'] == 1)).cpu().sum().item()
                # FP predict 1 label 0
                result_dict['FP'] += ((predictions == 1) & (batch['labels'] == 0)).cpu().sum().item()
            acc, prec, recall, f1 = eval_indicators(result_dict)
            if (epoch+1) % 5 == 0:
                torch.save(model, result_dir+"/NS-" + str(epoch) + ".pt")
            print("\nEval Epoch {0} Accuracy:{1:.2f} %\tPrecision: {2:.2f} %\tRecall: {3:.2f} %\tF1 Score: {4:.2f} %".
                  format(epoch, acc * 100, prec * 100, recall * 100, f1 * 100))

常见的评估指标基本为ACC、Precision、Recall、F1 Score,简单的计算代码如下。

def eval_indicators(result_dict):
    acc = (result_dict['TP']+result_dict['TN'])/(result_dict['TP'] + result_dict['TN'] +
                                                 result_dict['FP'] + result_dict['FN'])
    precision = (result_dict['TP']/(result_dict['TP'] + result_dict['FP']))
    recall = (result_dict['TP']/(result_dict['TP'] + result_dict['FN']))
    f1 = 2 * (precision * recall) / (precision + recall)
    return acc, precision, recall, f1

如果存在BN层,需要通过以下方法在测试部分调用disable_runing_status函数停止BN层变化。

def disable_running_status(model):
    def _disable(module):
        if isinstance(model, nn.BatchNorm2d):
            # 保存当前BN的momentum到module.backup_momentum进行现场保存
            module.backup_momentum = module.momentum
            # 将momentum清空
            module.momentum = 0
    model.apply(_disable)


def enable_running_status(model):
    def _enable(module):
        if isinstance(module, nn.BatchNorm2d) and hasattr(module, "backup_momentum"):
            # 检查BN以及备份的字段并恢复现场
            module.momentum = module.backup_momentum
    model.apply(_enable)

Transformers Fine Tuning

微调的过程中使用的数据集、数据加载部分就不多说。对于transformers系列的库,一般NLP的微调步骤为:加载分词器、小数据集进行分词、加载模型修改输出层、冻结特定层参数并训练预训练模型。

import transformers as trm
# 加载预训练的分词器
tokenizer = trm.AutoTokenizer.from_pretrained("bert-base-uncased")
# 对数据进行分词,一般包含token_ids,attention_mask,token_type_ids
tokens = tokenizer("Hello,world!", max_length=max_length, truncation=True, padding=True, return_tensors='pt')
# 之后加载模型,以二分类任务为例子,输出层修改为2
model = trm.AutoModelForSequenceClassification.from_pretrained("bert-base-uncased",num_labels=2)
# 设置其他的参数,例如优化器、学习率策略、等等
......
# 进行微调训练
model.train()
# 微调训练根据transformers官方的有两种,一种是Trainer类,另一种是常规训练,以常规训练说明
for epoch in range(num_epochs):
   with torch.enable_grad():
      ......
      outputs = model(**batch)
      loss = outputs.loss  # 只要传入labels,就会计算loss
      loss.backward()
      optimizer.step()
      optimizer.zero_grad() # 只会影响BN层
   with torch.no_grad():
    	# 进行模型测试评估,当然可以改成k折训练进行交叉验证
      ......     

冻结层的相关简易代码如下。

# 不需要冻结的层名
unfreeze_layers = [...]
# 进行中间层冻结
for name, param in model.named_parameters():
	param.requires_grad = False
		for element in unfreeze_layers:
			if element in name:
      		param.requires_grad = True
      		break
# 调用torch自带的优化器的话,需要优化器过滤其中的冻结层参数以不再更新这些参数
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=5e-5)

一个可以借鉴的方法如下。

from collections.abc import Iterable

def set_freeze_by_names(model, layer_names, freeze=True):
    if not isinstance(layer_names, Iterable):
        layer_names = [layer_names]
    for name, child in model.named_children():
        if name not in layer_names:
            continue
        for param in child.parameters():
            param.requires_grad = not freeze
            
def freeze_by_names(model, layer_names):
    set_freeze_by_names(model, layer_names, True)

def unfreeze_by_names(model, layer_names):
    set_freeze_by_names(model, layer_names, False)

def set_freeze_by_idxs(model, idxs, freeze=True):
    if not isinstance(idxs, Iterable):
        idxs = [idxs]
    num_child = len(list(model.children()))
    idxs = tuple(map(lambda idx: num_child + idx if idx < 0 else idx, idxs))
    for idx, child in enumerate(model.children()):
        if idx not in idxs:
            continue
        for param in child.parameters():
            param.requires_grad = not freeze
            
def freeze_by_idxs(model, idxs):
    set_freeze_by_idxs(model, idxs, True)

def unfreeze_by_idxs(model, idxs):
    set_freeze_by_idxs(model, idxs, False)
posted @ 2022-05-06 19:23  FrancisQiu  阅读(17)  评论(0)    收藏  举报