第七章 验收手写数字识别

# ==================================================================================
# 10_11验收手写数字识别
# ==========

# 在解耦和验收测试集图片和标签处理上仍有较大优化空间


# 导入相关库
import numpy as np
import os
import struct
import pickle
import cv2


# 定义导入函数
def load_images(path):
    with open(path, "rb") as f:
        data = f.read()
    magic_number, num_items, rows, cols = struct.unpack(">iiii", data[:16])
    return np.asanyarray(bytearray(data[16:]), dtype=np.uint8).reshape(
        num_items, 28, 28
    )


def load_labels(path):
    with open(path, "rb") as f:
        data = f.read()
    return np.asanyarray(bytearray(data[8:]), dtype=np.int32)


# 激活函数
# 定义sigmoid函数
def sigmoid(x):
    result = np.zeros_like(x)
    positive_mask = x >= 0
    result[positive_mask] = 1 / (1 + np.exp(-x[positive_mask]))
    negative_mask = x < 0
    exp_x = np.exp(x[negative_mask])
    result[negative_mask] = exp_x / (1 + exp_x)

    return result


# 定义softmax函数
def softmax(x):
    max_x = np.max(x, axis=-1, keepdims=True)
    x = x - max_x

    ex = np.exp(x)
    sum_ex = np.sum(ex, axis=1, keepdims=True)

    result = ex / sum_ex

    result = np.clip(result, 1e-10, 1e10)
    return result


# 训练集编码处理
# 定义独热编码函数
def make_onehot(labels, class_num):
    result = np.zeros((labels.shape[0], class_num))
    for idx, cls in enumerate(labels):
        result[idx, cls] = 1
    return result


# 定义dataset类
class Dataset:
    def __init__(self, all_images, all_labels):
        self.all_images = all_images
        self.all_labels = all_labels

    def __getitem__(self, index):
        image = self.all_images[index]
        label = self.all_labels[index]
        return image, label

    def __len__(self):
        return len(self.all_images)


# 定义dataloader类
class DataLoader:
    def __init__(self, dataset, batch_size, shuffle=True):
        self.dataset = dataset
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.idx = np.arange(len(self.dataset))

    def __iter__(self):
        # 如果需要打乱,则在每个 epoch 开始时重新排列索引
        if self.shuffle:
            np.random.shuffle(self.idx)
        self.cursor = 0
        return self

    def __next__(self):
        if self.cursor >= len(self.dataset):
            raise StopIteration

        # 使用索引来获取数据
        batch_idx = self.idx[
            self.cursor : min(self.cursor + self.batch_size, len(self.dataset))
        ]

        batch_images = self.dataset.all_images[batch_idx]
        batch_labels = self.dataset.all_labels[batch_idx]

        self.cursor += self.batch_size
        return batch_images, batch_labels


# 父类Module,查看各层结构
# 定义Module类
class Module:
    def __init__(self):
        self.info = "Module:/n"
        self.params = []

    def __repr__(self):
        return self.info


# 定义Parameter类
class Parameter:
    def __init__(self, weight):
        self.weight = weight
        self.grad = np.zeros_like(weight)
        self.velocity = np.zeros_like(weight)  # 🆕 新增:动量/速度向量


# 定义linear类
class Linear(Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.info += f"**    Linear({in_features}, {out_features})"

        # 🆕 修正:使用 He 初始化,适用于 ReLU
        std_test = np.sqrt(2 / in_features)
        # 使用 std_test 来初始化权重
        self.W = Parameter(
            np.random.normal(0, std_test, size=(in_features, out_features))
        )
        # 偏置 B 最好初始化为 0,而非随机值
        self.B = Parameter(np.zeros((1, out_features)))

        self.params.append(self.W)
        self.params.append(self.B)

    def forward(self, x):

        self.x = x
        return np.dot(x, self.W.weight) + self.B.weight

    def backward(self, G):
        self.W.grad = np.dot(self.x.T, G)
        self.B.grad = np.mean(G, axis=0, keepdims=True)

        return np.dot(G, self.W.weight.T)


# 定义Conv2D类
class Conv2D(Module):
    def __init__(self, in_channel, out_channel):
        super(Conv2D, self).__init__()
        self.info += f"     Conv2D({in_channel, out_channel})"

        std_test = np.sqrt(2 / in_channel)
        self.W = Parameter(np.random.normal(0, std_test, size=(in_channel, out_channel)))
        self.B = Parameter(np.zeros((1, out_channel)))

        self.params.append(self.W)
        self.params.append(self.B)

    def forward(self, x):
        result = x @ self.W.weight + self.B.weight

        self.x = x
        return result

    def backward(self, G):
        self.W.grad = self.x.T @ G
        self.B.grad = np.mean(G, axis=0, keepdims=True)

        delta_x = G @ self.W.weight.T

        return delta_x


# 定义Conv1D类
class Conv1D(Module):
    def __init__(self, in_channel, out_channel):
        super(Conv1D, self).__init__()
        self.info += f"     Conv1D({in_channel,out_channel})"
        self.W = Parameter(np.random.normal(0, 1, size=(in_channel, out_channel)))
        self.B = Parameter(np.zeros((1, out_channel)))

        self.params.append(self.W)
        self.params.append(self.B)

    def forward(self, x):
        result = x @ self.W.weight + self.B.weight

        self.x = x
        return result

    def backward(self, G):
        self.W.grad = self.x.T @ G
        self.B.grad = np.mean(G, axis=0, keepdims=True)

        delta_x = G @ self.W.weight.T

        return delta_x


# 优化器的父类
# 定义Optimizer类
class Optimizer:
    def __init__(self, parameters, lr):
        self.parameters = parameters
        self.lr = lr

    def zero_grad(self):
        for p in self.parameters:
            p.grad.fill(0)


# 定义SGD类,学习率较大
class SGD(Optimizer):

    def step(self):
        for p in self.parameters:
            p.weight -= self.lr * p.grad


# 定义MSGD类,学习率较大
class MSGD(Optimizer):
    def __init__(self, parameters, lr, u):
        super().__init__(parameters, lr)
        self.u = u

    def step(self):
        for p in self.parameters:
            # 1. 更新速度 V_t = u * V_{t-1} + p.grad
            p.velocity = self.u * p.velocity + p.grad

            # 2. 更新权重 W = W - lr * V_t
            p.weight -= self.lr * p.velocity


# 定义Adam类,学习率一般较小10^-3到10^-6
class Adam(Optimizer):

    def __init__(self, parameters, lr, beta1=0.9, beta2=0.999, e=1e-8):
        super().__init__(parameters, lr)
        self.beta1 = beta1
        self.beta2 = beta2
        self.e = e

        self.t = 0

        for p in self.parameters:
            # p.m = 0
            p.m = np.zeros_like(p.weight)
            # p.v = 0
            p.v = np.zeros_like(p.weight)

    def step(self):
        self.t += 1
        for p in self.parameters:
            gt = p.grad
            p.m = self.beta1 * p.m + (1 - self.beta1) * gt
            p.v = self.beta2 * p.v + (1 - self.beta2) * gt**2
            mt_ = p.m / (1 - self.beta1**self.t)
            vt_ = p.v / (1 - self.beta2**self.t)
            p.weight = p.weight - self.lr * mt_ / np.sqrt(vt_ + self.e)


# 定义Sigmoid类
class Sigmoid(Module):
    def __init__(self):
        super().__init__()
        self.info += "**    Sigmoid()"  # 打印信息

    def forward(self, x):
        self.result = sigmoid(x)
        return self.result

    def backward(self, G):
        return G * self.result * (1 - self.result)


# 定义Tanh类
class Tanh(Module):
    def __init__(self):
        super().__init__()
        self.info += "**    Tanh()"  # 打印信息

    def forward(self, x):
        self.result = 2 * sigmoid(2 * x) - 1
        return self.result

    def backward(self, G):
        return G * (1 - self.result**2)


# 定义Softmax类
class Softmax(Module):
    def __init__(self):
        super().__init__()
        self.info += "**    Softmax()"  # 打印信息

    def forward(self, x):
        self.p = softmax(x)
        return self.p

    def backward(self, G):
        G = (self.p - G) / len(G)
        return G


# 定义ReLU类
class ReLU(Module):
    def __init__(self):
        super().__init__()
        self.info += "**    ReLU()"  # 打印信息

    def forward(self, x):
        self.x = x
        return np.maximum(0, x)

    def backward(self, G):
        grad = G.copy()
        grad[self.x <= 0] = 0
        return grad


# 定义Dropout类


class Dropout(Module):
    def __init__(self, p=0.3):
        super().__init__()
        self.info += f"**    Dropout(p={p})"  # 打印信息
        self.p = p
        self.is_training = True  # 🆕 新增:训练状态标志

    def forward(self, x):
        if not self.is_training:
            return x  # 评估时直接返回

        r = np.random.rand(*x.shape)
        self.mask = r >= self.p  # 创建掩码

        # 应用掩码和缩放
        return (x * self.mask) / (1 - self.p)

    def backward(self, G):
        if not self.is_training:
            return G  # 评估时直接返回梯度

        G[~self.mask] = 0
        return G / (1 - self.p)


# 定义ModelList类
class ModelList:

    def __init__(self, layers):
        self.layers = layers

    def forward(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x

    def backward(self, G):
        for layer in self.layers[::-1]:
            G = layer.backward(G)

    def __repr__(self):
        info = ""
        for layer in self.layers:
            info += layer.info + "/n"
        return info


# 定义Model类
class Model:
    def __init__(self):
        self.model_list = ModelList(
            [
                Linear(784, 512),
                ReLU(),
                Dropout(0.2),
                Conv2D(512, 256),
                Tanh(),
                Dropout(0.1),
                Linear(256, 10),
                Softmax(),
            ]
        )

    def forward(self, x, label=None):
        pre = self.model_list.forward(x)

        if label is not None:
            self.label = label
            loss = -np.mean(self.label * np.log(pre))

            return loss

        else:
            return np.argmax(pre, axis=-1)

    def backward(self):
        self.model_list.backward(self.label)

    def train(self):
        """设置模型为训练模式 (启用 Dropout)。"""
        for layer in self.model_list.layers:
            # 检查层是否有 is_training 属性 (即只针对 Dropout 层)
            if hasattr(layer, "is_training"):
                layer.is_training = True

    def eval(self):
        """设置模型为评估/推理模式 (禁用 Dropout)。"""
        for layer in self.model_list.layers:
            if hasattr(layer, "is_training"):
                layer.is_training = False

    def __repr__(self):
        return self.model_list.__repr__()

    def parameter(self):
        all_Parameter = []
        for layer in self.model_list.layers:
            all_Parameter.extend(layer.params)

        return all_Parameter


# 主函数
if __name__ == '__main__':
    # 加载数据
    with open('D:/my code/0.9838.pkl', 'rb') as f:
        model=pickle.load(f)

        root_path='D:/my code/Python/NLP basic/data/test_images'#测试图片根路径
        images_file=os.listdir('D:/my code/Python/NLP basic/data/test_images')#测试图片文件名列表

        test_images=np.zeros((len(images_file),784))#初始化测试图片矩阵
        test_labels=np.zeros((len(images_file),1))#初始化测试标签矩阵

        for fi,image in enumerate(images_file):#遍历测试图片,fi为图片序号
            path=os.path.join(root_path,image)#测试图片路径
            img=cv2.imread(path)#读取测试图片
            img_gray=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)#灰度化
            img=cv2.resize(img_gray,(28,28))#缩放为28*28

            test_images[fi]=img.reshape(-1)#将图片矩阵展平并赋值给测试图片矩阵
            test_labels[fi]=int(image[-5:-4])#获取图片标签并赋值给测试标签矩阵

        batch_size = 1
        test_dataset = Dataset(test_images, test_labels)
        test_dataloader = DataLoader(test_dataset, batch_size)
        right_num = 0
        for x, batch_labels in test_dataloader:

            pre_idx = model.forward(x)

            right_num += np.sum(pre_idx == batch_labels)  # 统计正确个数

        acc = right_num / len(test_images)  # 计算准确率
        print(acc)

image

image

posted @ 2025-10-11 18:54  李大嘟嘟  阅读(14)  评论(0)    收藏  举报