CNN

输入层->卷积层（特征提取）
将输入与N个filter进行卷积计算，得到N个feature_map
〖net〗_o11= conv(input,filter)
神经元的输出采用relu激活函数
〖out〗_o11= activation(〖net〗_o11 )= max⁡(0,〖net〗_o11 )
卷积层->池化层（特征降维）
池化层没有激活函数
Convolution layer output -> max pooling
〖net〗_m11= max⁡(o_11,o_12,o_21,o_22)
〖out〗_m11= 〖net〗_m11
Pooling layer的window是不是要与convolution layer保持一致？
池化层->全连接层
flatten Pooling layer的output的所有element，作为全连接层的input
全连接层->输出层
将全连接层的输出通过softmax函数（activation function）计算后输出到输出层，得到不同类别的概率值，输出概率值最大的即为该图片的类别。

代码示例
Python中的numpy的数据结构就是这样的[h,w,c]，如果你用caffe的话默认[n,c,w,h]，这里n是batch_size，剩下就是通道，宽，高。

点击查看代码

import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

# 加载MNIST数据集
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
# 标准化图像数据
train_images = train_images.reshape((60000, 28, 28, 1)).astype('float32') / 255
test_images = test_images.reshape((10000, 28, 28, 1)).astype('float32') / 255

image_array = np.array(Image.open("C:\\Users\\admin\\Pictures\\Screenshots\\屏幕截图 2025-04-16 093515.png"))
print(image_array.shape)
gray_matrix = np.dot(image_array[..., :3], [0.2989, 0.5870, 0.1140]).astype(np.uint8)
print(gray_matrix.shape)
padded_matrix = (np.pad(gray_matrix, ((0, 0), (0, 6)), 'constant', constant_values=0))[2:, :]
plt.imshow(padded_matrix)
plt.show()
#test_re = (numpy.pad(test_images,((10000- len(test_image)),(),(),()),constant,255)).reshape(10000, 28, 28, 1).astype('float32') / 255

# 将标签转换为one-hot编码
train_labels = tf.keras.utils.to_categorical(train_labels)
test_labels = tf.keras.utils.to_categorical(test_labels)

# 构建卷积神经网络模型
model = models.Sequential()
# 第一层卷积，使用32个3x3的卷积核，激活函数为ReLU
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
# 池化层，使用2x2的池化窗口
model.add(layers.MaxPooling2D((2, 2)))

# 第二层卷积，使用64个3x3的卷积核，激活函数为ReLU
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# 第二个池化层，使用2x2的池化窗口

model.add(layers.MaxPooling2D((2, 2)))
# 第三层卷积，使用64个3x3的卷积核，激活函数为ReLU
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# 展平特征图，为全连接层做准备
model.add(layers.Flatten())
# 全连接层，使用64个神经元，激活函数为ReLU
model.add(layers.Dense(64, activation='relu'))
# 输出层，使用10个神经元，对应10个类别，激活函数为softmax
model.add(layers.Dense(10, activation='softmax'))

# 编译模型
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
# 训练模型
model.fit(train_images, train_labels, epochs=5, batch_size=64, verbose = 0)
# 评估模型
test_loss, test_acc = model.evaluate(test_images, test_labels, verbose = 0)
print(f'测试准确率: {test_acc:.4f}')

# 使用模型进行预测
predictions = model.predict(test_images)
# 获取预测结果
predicted_labels = np.argmax(predictions, axis=1)
true_labels = np.argmax(test_labels, axis=1)

# 打印前10个预测结果和真实标签
for i in range(10):
    print(f'预测结果: {predicted_labels[i]}, 真实标签: {true_labels[i]}')

#截图测试
test = (padded_matrix.reshape(1, 28, 28, 1).astype('float32')) / 255
predictions = model.predict(test)
predicted_labels = np.argmax(predictions, axis=1)
true_labels = np.argmax(test_labels, axis=1)
print(f'预测结果: {predicted_labels}, 真实标签: {true_labels}')

#截图测试AI
image_path = 'C:\\Users\\admin\\Pictures\\Screenshots\\屏幕截图 2025-04-16 093515.png'
image = Image.open(image_path)

# 预处理图片：灰度化、调整尺寸、归一化
image = image.convert('L')
image = image.resize((28, 28))
image_array = np.array(image)
image_array = image_array / 255.0
image_array = image_array.reshape(-1, 28, 28, 1)

# 进行预测
predictions = model.predict(image_array)
predicted_digit = np.argmax(predictions)

print(f'识别结果: {predicted_digit}')

posted @ 2025-04-20 16:19 Jenny43473 阅读(33) 评论(0) 收藏举报

刷新页面返回顶部

JennyLuk

CNN

公告