第七章 手写数字识别V1

# 导入必要的库
import numpy as np
import os
import struct
import matplotlib.pyplot as plt


# 定义导入函数
def load_images(path):
    with open(path, "rb") as f:
        data = f.read()
    magic_number, num_items, rows, cols = struct.unpack(">iiii", data[:16])
    return np.asanyarray(bytearray(data[16:]), dtype=np.uint8).reshape(
        num_items, 28, 28
    )


def load_labels(file):
    with open(file, "rb") as f:
        data = f.read()
    return np.asanyarray(bytearray(data[8:]), dtype=np.int32)


# 定义sigmoid函数
def sigmoid(x):
    result = np.zeros_like(x)
    positive_mask = x >= 0
    result[positive_mask] = 1 / (1 + np.exp(-x[positive_mask]))
    negative_mask = x < 0
    exp_x = np.exp(x[negative_mask])
    result[negative_mask] = exp_x / (1 + exp_x)

    return result


# 定义softmax函数
def softmax(x):
    max_x = np.max(x, axis=-1, keepdims=True)
    x = x - max_x

    ex = np.exp(x)
    sum_ex = np.sum(ex, axis=1, keepdims=True)

    result = ex / sum_ex

    result = np.clip(result, 1e-10, 1e10)
    return result


# 定义独热编码函数
def make_onehot(labels, class_num):
    result = np.zeros((labels.shape[0], class_num))
    for idx, cls in enumerate(labels):
        result[idx, cls] = 1
    return result


# 定义dataset类
class Dataset:
    def __init__(self, all_images, all_labels):
        self.all_images = all_images
        self.all_labels = all_labels

    def __getitem__(self, index):
        image = self.all_images[index]
        label = self.all_labels[index]
        return image, label

    def __len__(self):
        return len(self.all_images)


# 定义dataloader类
class DataLoader:
    def __init__(self, dataset, batch_size, shuffle=True):
        self.dataset = dataset
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.idx = np.arange(len(self.dataset))

    def __iter__(self):
        # 如果需要打乱,则在每个 epoch 开始时重新排列索引
        if self.shuffle:
            np.random.shuffle(self.idx)
        self.cursor = 0
        return self

    def __next__(self):
        if self.cursor >= len(self.dataset):
            raise StopIteration

        # 使用索引来获取数据
        batch_idx = self.idx[
            self.cursor : min(self.cursor + self.batch_size, len(self.dataset))
        ]

        batch_images = self.dataset.all_images[batch_idx]
        batch_labels = self.dataset.all_labels[batch_idx]

        self.cursor += self.batch_size
        return batch_images, batch_labels


# 主函数
if __name__ == "__main__":
    # 加载训练集图片、标签
    train_images = (
        load_images(
            os.path.join(
                "Python", "NLP basic", "data", "minist", "train-images.idx3-ubyte"
            )
        )
        / 255
    )
    train_labels = make_onehot(
        load_labels(
            os.path.join(
                "Python", "NLP basic", "data", "minist", "train-labels.idx1-ubyte"
            )
        ),
        10,
    )

    # 加载测试集图片、标签
    dev_images = (
        load_images(
            os.path.join(
                "Python", "NLP basic", "data", "minist", "t10k-images.idx3-ubyte"
            )
        )
        / 255
    )
    dev_labels = load_labels(
        os.path.join("Python", "NLP basic", "data", "minist", "t10k-labels.idx1-ubyte")
    )

    # 设置超参数
    epochs = 10
    lr = 0.08
    batch_size = 200
    shuffle = True

    # 展开图片数据
    train_images = train_images.reshape(60000, 784)
    dev_images = dev_images.reshape(-1, 784)

    # 调用dataset类和dataloader类
    train_dataset = Dataset(train_images, train_labels)
    train_dataloader = DataLoader(train_dataset, batch_size, shuffle)

    dev_dataset = Dataset(dev_images, dev_labels)
    dev_dataloader = DataLoader(dev_dataset, batch_size, shuffle)

    # 权重和偏置
    w1 = np.random.normal(0, 1, size=(784, 512))  # 第一层权重
    w3 = np.random.normal(0, 1, size=(512, 256))  # 第三层权重

    w4 = np.random.normal(0, 1, size=(256, 10))  # 第四层权重

    b1 = np.random.normal(0, 1, size=(1, 512))  # 第一层偏置
    b3 = np.random.normal(0, 1, size=(1, 256))  # 第三层偏置
    b4 = np.random.normal(0, 1, size=(1, 10))  # 第四层偏置

    # 训练集训练过程
    for e in range(epochs):
        for batch_images, batch_labels in train_dataloader:
            # 前向传播
            H1 = np.dot(batch_images, w1) + b1  # 第一层输出
            H2 = sigmoid(H1)  # 第二层输出,使用sigmoid激活函数
            H3 = np.dot(H2, w3) + b3  # 第三层输出
            H4 = np.dot(H3, w4) + b4  # 第四层输出

            p = softmax(H4)  # 输出层输出,使用softmax激活函数

            # 计算损失
            loss = -np.mean(np.sum(batch_labels * np.log(p), axis=1))

            # 反向传播
            # 第四层
            G4 = (p - batch_labels) / batch_images.shape[0]  # 第四层误差
            dw4 = np.dot(H3.T, G4)  # 第四层权重梯度
            db4 = np.mean(G4, axis=0, keepdims=True)  # 第四层偏置梯度

            # 第三层
            G3 = np.dot(G4, w4.T)  # 第三层误差
            dw3 = np.dot(H2.T, G3)  # 第三层权重梯度
            db3 = np.mean(G3, axis=0, keepdims=True)  # 第三层偏置梯度

            # 第二层
            G2 = np.dot(G3, w3.T)  # 第二层误差

            # 第一层
            G1 = G2 * (H2 * (1 - H2))  # 第一层误差
            dw1 = np.dot(batch_images.T, G1)  # 第一层权重梯度
            db1 = np.mean(G1, axis=0, keepdims=True)  # 第一层偏置梯度

            # 更新权重和偏置
            w1 -= lr * dw1
            b1 -= lr * db1
            w3 -= lr * dw3
            b3 -= lr * db3
            w4 -= lr * dw4
            b4 -= lr * db4

        # 验证集验证并输出预测准确率
        right_num = 0
        for bictc_images, bictc_labels in dev_dataloader:
            H1 = np.dot(bictc_images, w1) + b1  # 第一层输出
            H2 = sigmoid(H1)  # 第二层输出,使用sigmoid激活函数
            H3 = np.dot(H2, w3) + b3  # 第三层输出
            H4 = np.dot(H3, w4) + b4  # 第四层输出
            p = softmax(H4)  # 输出层输出,使用softmax激活函数

            pre_idx = np.argmax(p, axis=-1)  # 预测类别
            right_num += np.sum(pre_idx == bictc_labels)  # 统计正确个数

        acc = right_num / len(dev_images)  # 计算准确率
        print(f"Epoch {e + 1}, Acc: {acc:.4f}")

image

posted @ 2025-09-23 11:20  李大嘟嘟  阅读(12)  评论(0)    收藏  举报