JoyBeanRobber

导航

李沐动手学深度学习8——softmax回归

对于多分类问题,用独热标签表示不同分类结果,比如对于三个结果A、B、C的分类问题,y属于{(0,0,1)、(0,1,0)、(1,0,0)}。

对于二分类问题,一般使用Sigmoid激活函数,输出一个范围是[0,1]的结果,输出层只需要单个神经元。

对于多分类问题(设n个分类),使用Softmax激活函数,为了估计所有可能类别的条件概率,模型需要有多个输出,输出层需要n个神经元。

对于多分类问题,由于计算任一输出均需要所有输入,因此其输出层是全连接层。

我们希望多分类问题的输出能代表在各个分类上的概率,非负且总和为1,因此使用softmax函数:

 softmax回归的输出由输入特征的仿射变换决定,因此,softmax回归是一个线性模型。

 

熵的定义

对于一个离散随机变量X,其概率分布是P(x),熵H(x)的定义为:

 直观理解:

易得若视 P(x) 为自变量,其取值范围为[0, 1],则 logP(x) 的对应取值范围为(-∞,0]。

若是确定性事件,H(x) = -1*log1=0 。熵最小为0,对应事件完全确定

若是均匀分布事件,如掷骰子,H(x) = -6*(1/6)*log(1/6) = log6 。 熵越大对应事件越不可预测

 

交叉熵:

是用于分类任务的损失函数,定义为:

对于多分类任务,真实标签中,只有真实类别的yk值为1,其余为0,所以 L=-log(yk-hat),预测值yk-hat越接近0,损失越大,越接近1,损失越小

对于二分类任务,其交叉熵损失函数简化可以写为

本质是多分类交叉熵函数的特殊形式

 

实践softmax回归时,使用的数据集是FashionMNIST,以下是数全部代码,运行时间较长:

import matplotlib.pyplot as plt
import torch
from torchvision import transforms
from torch.utils import data
import torchvision


def load_data_fashion_mnist(batch_size, resize=None):
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root="../data", train=True, transform=trans, download=False)
    mnist_test = torchvision.datasets.FashionMNIST(root="../data", train=False, transform=trans, download=False)
    return (data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=get_dataloader_workers()),
            data.DataLoader(mnist_test, batch_size, shuffle=False, num_workers=get_dataloader_workers()))


def get_dataloader_workers():
    return 4


def get_fashion_mnist_labels(labels):
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i)] for i in labels]


def show_images(imgs, num_rows, num_cols, titles=None, scala=1.5):
    figsize = (num_cols * scala, num_rows * scala)
    fig, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
    axes = axes.flatten()
    for i, (ax, img) in enumerate(zip(axes, imgs)):
        if torch.is_tensor(img):
            ax.imshow(img.numpy())
        else:
            ax.imshow(img)
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
        if titles:
            ax.set_title(titles[i])
    return axes


def softmax(X):
    X_exp = torch.exp(X)
    partition = X_exp.sum(dim=1, keepdim=True)
    return X_exp/partition


def net(X):
    return softmax(torch.matmul(X.reshape(-1, W.shape[0]), W) + b)


def cross_entropy_loss(y_hat, y):
    # tensor切片接受两个列表分别作为行和列的传参
    return -torch.log(y_hat[range(len(y_hat)), y])


def accuracy(y_hat, y):
    # 若要将and换成&,需要给两边条件判断式加上括号,因为&优先级较高
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)
    cmp = y_hat.type(y.dtype) == y
    return float(cmp.type(y.dtype).sum())


class Accumulator:
    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]


def evaluate_accuracy(net, data_iter):
    if isinstance(net, torch.nn.Module):
        net.eval()
    metric = Accumulator(2)
    with torch.no_grad():
        for X, y in data_iter:
            metric.add(accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]


# 小批量梯度下降
def sgd(params, lr, batch_size):
    with torch.no_grad():
        for param in params:
            param -= lr * param.grad / batch_size
            param.grad.zero_()


def updater(batch_size):
    return sgd([W, b], lr, batch_size)


def train_epoch_ch3(net, train_iter, loss, updater):
    if isinstance(net, torch.nn.Module):
        net.train()
    metric = Accumulator(3)
    for X, y in train_iter:
        y_hat = net(X)
        l = loss(y_hat, y)
        if isinstance(updater, torch.optim.Optimizer):
            # 对优化器进行梯度清零会对所有参数进行梯度清零
            updater.zero_grad()
            l.mean().backward()
            updater.step()
        else:
            l.sum().backward()
            updater(X.shape[0])
        metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    return metric[0] / metric[2], metric[1] / metric[2]


def animator_display(xlabel, ylabel, legend, x, data):
    figsize = (12, 8)
    fig, axes = plt.subplots(figsize=figsize)
    axes.set_xlabel(xlabel)
    axes.set_ylabel(ylabel)
    for i, data_item in enumerate(data):
        axes.plot(x, data_item, label=legend[i])
    axes.legend()
    plt.show()


def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
    data = [[], [], []]
    for epoch in range(num_epochs):
        print(epoch)
        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
        test_acc = evaluate_accuracy(net, test_iter)
        data[0].append(train_metrics[0])
        data[1].append(train_metrics[1])
        data[2].append(test_acc)
    animator_display(xlabel="epoch", ylabel="", legend=['train loss', 'train acc', 'test acc'],
                     x=range(1, num_epochs+1), data=data)


if __name__ == "__main__":
    _batch_size = 256
    num_input = 784
    num_output = 10
    lr = 0.1
    num_epochs = 10
    W = torch.normal(0, 0.01, size=(num_input, num_output), requires_grad=True)
    b = torch.zeros(num_output, requires_grad=True)
    train_iter, test_iter = load_data_fashion_mnist(_batch_size)
    train_ch3(net, train_iter, test_iter, cross_entropy_loss, num_epochs, updater)

    # 1.导入并查看fashion_mnist数据
    # X, y = next(iter(train_iter))
    # show_images(X.reshape(32, 64, 64), 4, 8, titles=get_fashion_mnist_labels(y))
    # plt.show()

    # 2.设计模型:
    # 神经网络层: 原始图片是28*28像素,在多类回归时先展平为长度784的一维向量,
    # 传入10个神经元的全连接层,激活函数是softmax
    # 损失函数为交叉熵损失
    # print(softmax(torch.tensor([[1, 2, 3], [3, 2, 1]])))

    # 精度计算测试
    # test_y_hat = torch.tensor([[0.1, 0.2, 0.3], [0.3, 0.2, 0.1]])
    # test_y = torch.tensor([2, 0])
    # print(accuracy(test_y_hat, test_y) / len(test_y))

    # 图像绘制测试
    # xlabel = "x"
    # ylabel = "y"
    # legend = ["A", "B", "C"]
    # x = range(1, 6)
    # y_data = [[0, 1, 1, 1, 1], [2, 2, 2, 2, 2], [3, 2, 1, 0, -1]]
    # animator_display(xlabel, ylabel, legend, x, y_data)

 

posted on 2025-04-20 18:24  欢乐豆掠夺者  阅读(57)  评论(0)    收藏  举报