Jupyter笔记[1]-MNIST手写数字识别

jupyter集成了常用python框架

docker的jupyter/tensorflow-notebook镜像包含了tensorflow,scipy等主流框架
我们还可以在Jupyter内打开终端，用pip或其他工具安装软件包
除了Python,Jupyter还支持其他语言(Julia,markdown等)

主流框架对比

docker环境

[https://jupyter-docker-stacks.readthedocs.io/en/latest/index.html]
解决docker容器启动秒退

#亲测可用
docker pull jupyter/tensorflow-notebook:2022-05-05
#这样运行不会秒退，终端有root权限
docker run -p 91:8888 -v /home/server/files:/home/jovyan/work --user root -e CHOWN_EXTRA=/home/server/files -d jupyter/tensorflow-notebook:2022-05-05 /bin/sh -c "jupyter server --allow-root;while true;do echo hello;sleep 5;done"
docker ps
docker exec -it 7f4f3fc63294 bash
jupyter server list
exit 
#容器仍在后台运行

按照说明打开网址
http://192.168.50.80:91/lab?token=191b708f1701860eed3548c9a0cb15aa5a7dc358c7bbb382
注意端口号和token
http://192.168.50.80:91/lab
这里可以设置密码

token is the secret token printed in the console.

Ctrl+P和Ctrl+Q分别按，可以退出容器，让容器仍运行，
~~不能用的:docker pull docker pull jupyter/tensorflow-notebook:latest~~
镜像是每日自动生成，但是不保证每天的都能用，笔者试了很多总算找到能用的😮‍💨,其他的会有网络错误

mnist数据集

[http://yann.lecun.com/exdb/mnist/]
train-images-idx3-ubyte.gz: training set images (9912422 bytes)
train-labels-idx1-ubyte.gz: training set labels (28881 bytes)
t10k-images-idx3-ubyte.gz: test set images (1648877 bytes)
t10k-labels-idx1-ubyte.gz: test set labels (4542 bytes)

TensorFlow提供了一个库可以对MNIST数据集进行下载和解压。具体的是使用TensorFlow中input_data.py脚本来读取数据及标签，使用这种方式时，可以不用事先下载好数据集，它会自动下载并存放到你指定的位置。

但是新版本tensorflow好像并没有input_data.py文件,需要手动读取数据集.

pycharm加载本地数据集_PyTorch加载自己的数据集
 MNIST或Fashion-MNIST数据集的加载——从本地文件加载(使用torch.utils.data.DataLoader

手动在Jupyter的终端内解压数据集

#进入你放数据集的目录
cd /work/dataset/MNIST 
ls
sudo gzip -dk t10k-images-idx3-ubyte.gz
sudo gzip -dk train-images-idx3-ubyte.gz
sudo gzip -dk t10k-labels-idx1-ubyte.gz
sudo gzip -dk train-labels-idx1-ubyte.gz
#更改后缀名为t10k-images.idx3-ubyte     t10k-labels.idx1-ubyte     train-images.idx3-ubyte     train-labels.idx1-ubyte

读取二进制文件

python -m pip install torchvision

#加载Fashion-MNIST数据集
class LoadFashionMNIST:
    def __init__(self,imset_path,labelset_path=None,train=True):
        self.train = train
        if self.train==True:
            self.train_images_name = imset_path
            self.train_labels_name = labelset_path
        else:
            self.test_images_name = imset_path
            self.test_labels_name = labelset_path

    #解码图像集
    def _decode_idx3_ubyte(self,file_path):
        """
        说明：加载的file_path必须是解压文件，以.idx3-ubyte结尾的文件才能解码正确
        :param file_path:
        :return: images
        """
        #读取二进制数据
        bin_data = open(file_path,'rb').read()

        #解析文件头信息，依次为魔数、图片数量、每张图片的高、宽
        offset =0
        magics,numimgs,rows,cols = struct.unpack_from('>IIII',bin_data,offset)
        print("魔数“%d，图像数量：%d张，图片大小：%d*%d" %(magics,numimgs,rows,cols))

        #解析数据集
        img_size = rows*cols
        offset += struct.calcsize('>iiii')
        print(offset)
        img_fmt = '>'+str(img_size)+'B'    #图像数据像素值的类型为unsignedchar型，对应的format格式为B
        #这里的图像大小为28*28=784，为了读取784个B格式数据，如果没有则只会读取一个值
        print(img_fmt)

        images = np.empty((numimgs,rows,cols))
        #plt.figure()
        for i in range(numimgs):
            if (i+1)%10000==0:
                print('train集已解析%d'%(i+1)+'张')
                #print(offset)
            images[i] = np.array(struct.unpack_from(img_fmt,bin_data,offset)).reshape((rows,cols))
            offset +=struct.calcsize(img_fmt)
            # plt.imshow(images[i])
            # plt.pause(0.00001)
            # plt.show()

        return images

MNIST手写数字识别原理

[https://www.douban.com/note/667487805/]
变分推断(variational inference)：
首先，我们的原始目标是，需要根据已有数据推断需要的分布p；当p不容易表达，不能直接求解时，可以尝试用变分推断的方法，即，寻找容易表达和求解的分布q，当q和p的差距很小的时候，q就可以作为p的近似分布，成为输出结果了。
MNIST手写字体识别最基础的Tensorflow代码例子, 其使用的概念原理就是变分推断,通过所谓的交叉熵最小，实质就是通过反向传播调整参数，实现样本和目标值的统计分布最接近。
[https://www.cnblogs.com/huliangwen/p/7455382.html]

实现

"""
代码：读取Fashion-MNIST数据集
"""
import numpy as np
import struct
from matplotlib import pyplot as plt
from PIL import Image

#加载Fashion-MNIST数据集
class LoadFashionMNIST:
    def __init__(self,imset_path,labelset_path=None,train=True):
        self.train = train
        if self.train==True:
            self.train_images_name = imset_path
            self.train_labels_name = labelset_path
        else:
            self.test_images_name = imset_path
            self.test_labels_name = labelset_path

    #解码图像集
    def _decode_idx3_ubyte(self,file_path):
        """
        说明：加载的file_path必须是解压文件，以.idx3-ubyte结尾的文件才能解码正确
        :param file_path:
        :return: images
        """
        #读取二进制数据
        bin_data = open(file_path,'rb').read()

        #解析文件头信息，依次为魔数、图片数量、每张图片的高、宽
        offset =0
        magics,numimgs,rows,cols = struct.unpack_from('>IIII',bin_data,offset)
        print("魔数“%d，图像数量：%d张，图片大小：%d*%d" %(magics,numimgs,rows,cols))

        #解析数据集
        img_size = rows*cols
        offset += struct.calcsize('>iiii')
        print(offset)
        img_fmt = '>'+str(img_size)+'B'    #图像数据像素值的类型为unsignedchar型，对应的format格式为B
        #这里的图像大小为28*28=784，为了读取784个B格式数据，如果没有则只会读取一个值
        print(img_fmt)

        images = np.empty((numimgs,rows,cols))
        #plt.figure()
        for i in range(numimgs):
            if (i+1)%10000==0:
                print('train集已解析%d'%(i+1)+'张')
                #print(offset)
            images[i] = np.array(struct.unpack_from(img_fmt,bin_data,offset)).reshape((rows,cols))
            offset +=struct.calcsize(img_fmt)
            # plt.imshow(images[i])
            # plt.pause(0.00001)
            # plt.show()

        return images

    #解码标签集
    def _decode_idx1_ubyte(self,file_path):
        """
        :param file_path:  文件必须是解压之后的，以.idx1-ubyte后缀结尾的文件
        :return:
        """
        #读取二进制文件
        bin_data = open(file_path,'rb').read()

        #解析文件头信息，依次为魔数和标签数量
        offset = 0
        header_fmt = '>II'    #文件头格式
        magics,nums = struct.unpack_from(header_fmt,bin_data,offset)
        print('魔数：%d，图片标签数量：%d张'%(magics,nums))

        #解析数据集
        offset += struct.calcsize(header_fmt)
        img_fmt = '>B'
        labels = np.empty(nums)
        for i in range(nums):
            if (i+1)%10000==0:
                print('labels集已解析%d'%(i+1)+'张')
            labels[i] = struct.unpack_from(img_fmt,bin_data,offset)[0]
            offset += struct.calcsize(img_fmt)
        return labels

    #读取FashionMNIST的图像集
    def get_FashionMNIST_images(self):
        """
        加载的FashionMNIST图像是灰度图像
        :return:
        """
        if self.train==True:
            return self._decode_idx3_ubyte(self.train_images_name)
        else:
            return self._decode_idx3_ubyte(self.test_images_name)
    #读取FashionMNIST的标签集
    def get_FashionMNIST_labels(self):
        if self.train==True:
            return self._decode_idx1_ubyte(self.train_labels_name )
        else:
            return self._decode_idx1_ubyte(self.test_labels_name )
    #获取数据集的Image和labels
    def FashionMNIST(self,label=True):
        """
        读取FMNIST数据集的数据，包括图像和标签
        :param label: 是否输出标签
        :return: Imgs,labels
        """
        if label==True:
            if self.train==True:
                Imgs = self._decode_idx3_ubyte(self.train_images_name)
                labels = self._decode_idx1_ubyte(self.train_labels_name)
                return (Imgs,labels)
            else:
                Imgs = self._decode_idx3_ubyte(self.test_images_name)
                labels = self._decode_idx1_ubyte(self.test_labels_name)
                return (Imgs,labels)
        else:
            if self.train==True:
                Imgs = self._decode_idx3_ubyte(self.train_images_name)
                return Imgs
            else:
                Imgs = self._decode_idx3_ubyte(self.test_images_name)
                return Imgs


    #将FashionMNIST图像数据集转为图像并保存
    def ToPILImage(self,save=False,save_path=None):
        """
        FashionMNIST数据集还原图像
        :param outimg: 输出图像，可以为空
        :param save: 是否需要保存图像
        :param save_path: 保存图像路径
        :return: 无返回
        """
        images = self.get_FashionMNIST_images()
        nums = images.shape[0]
        outimg = Image.fromarray(images[1]).convert('RGB')
        for i in range(nums):
            #要保存则必须使用.convert('RGB')
            img = Image.fromarray(images[i]).convert('RGB')
            #img.show()
            if save==True:
                savepath = save_path+"\\"+("0" if i<9 else "" )+ str(i+1)+".jpg"
                print(savepath)
                img.save(savepath)
                del img
        return outimg
    def ToPILImages_Save(self,T=0,save=False,save_path=None):
        """
        FashionMNIST数据集还原图像
        :param outimg: 输出图像，可以为空
        :param save: 是否需要保存图像
        :param save_path: 保存图像路径
        :T=0 表示输出第T张图像
        :return: 无返回
        """
        images = self.get_FashionMNIST_images()
        outimg =self.ToImage(images[T])    #返回第T张图像

        if save ==True:
            nums = images.shape[0]
            for i in range(nums):
                img = Image.fromarray(images[i]).convert('L')#要保存则必须使用.convert('L)
                #img.show()
                savepath = save_path+"\\"+("0" if i<9 else "" )+ str(i+1)+".jpg"
                print(savepath)
                img.save(savepath)
                del img

        return outimg

    #将单个Image的numpy数组转为PIL图像
    def ToImage(self,Img,show=False):
        """
        转为image的方法：
        outimg = Image.fromarray(np.uint8(Img))         #无符号整型,二通道
        outimg2 = Image.fromarray(Img).convert('L')     #整型,二通道
        outimg3 = Image.fromarray(Img)                     #浮点型,二通道
        outimg4 = Image.fromarray(Img).convert('RGB')  #整型，三通道
        将numpy数据转为PIL图像
        :param Img: Img是单张图像的numpy数组
        desc=False:False表示不显示属性
        :return: 图像
        """
        outimg = Image.fromarray(np.uint8(Img))         #无符号整型,二通道
        if show==True:
            #outimg.show()
            plt.imshow(outimg)
            plt.show()
            print("outimg图像的像素值(np.matrix(outimg.getdata()))：")
            print(np.matrix(outimg.getdata()))
            print("outimg图像的尺寸：",outimg.size)
            print("outimg图像的通道数：",len(outimg.split()))
            print("输出outimg图像的类型：",type(outimg))
            print("输入Img图像的类型：",type(Img))

        return outimg

        #将多维矩阵图像合并显示在一张图上
    def Matrix_Images(Imgs,backs,offsets=[1,1]):
        """
        实现多幅图像合并成一个矩阵
        :Imgs:是一个三维矩阵，大小为(T,rows,cols)
        :param backs: 矩阵块的大小=[rows,cols]
        :param offsets: 每副图像之间的间隔像素
        :return: 图像数组
        【应用实例：】
        backs = [2,2]
        imgM = Matrix_Images(imgs,backs,offsets=[1,2])
        plt.imshow(imgM)
        plt.show()

        img = Image.fromarray(np.uint8(imgM))
        img.show()
        """
        imsize = list(Imgs.shape[1:3])         #图像的尺寸

        #计算新矩阵图像尺寸
        Rs,Cs = np.multiply(backs,imsize) + np.multiply(np.array(backs)-1,offsets)
        #print(Rs,Cs)

        #创建一个元素值全为255的矩阵（即白色背景图）
        imgM = np.ones((Rs,Cs),dtype=np.uint8)*255
        #print(imgM)

        #方式一：节约运算时间
        m,n,ccs=0,0,0     #ccs = 0   #计数矩阵块的行数
        for t in range(backs[0]*backs[1]):
            if ccs==backs[1]:
                m +=1
                n,ccs = 0,0
            #计算图像在矩阵中的坐标位置
            r1 = m*imsize[0] + m*offsets[0]
            r2 = (m+1)*imsize[0] + m*offsets[0]
            c1 = n*imsize[1] + n*offsets[1]
            c2 = (n+1)*imsize[1] + n*offsets[1]
            #print((r1,r2),(c1,c2))
            #赋值
            imgM[r1:r2,c1:c2] = Imgs[t]
            del r1,r2,c1,c2

            ccs +=1
            n +=1
        del m,n,ccs,Rs,Cs,imsize
        #方式二：两重循环，耗时
        #t=0
        # for r in range(backs[0]):
        #     for c in range(backs[1]):
        #         ry1 = r*imsize[0] +r*offsets[0]
        #         ry2 = (r+1)*imsize[0] +r*offsets[0]
        #         cx1 = c*imsize[1] + c*offsets[1]
        #         cx2 = (c+1)*imsize[1] + c*offsets[1]
        #         M[ry1:ry2,cx1:cx2] = imgs[t]
        #         t +=1
        #         # ry1=0
        #         # ry2=0
        #         # cx1=0
        #         # cx2=0
        return imgM

if __name__ == '__main__':
    """
    加载FashionMNIST数据集
    """
    path = r"./dataset/MNIST"
    #训练集文件
    train_images_file =path+r"/train-images.idx3-ubyte"
    #训练集标签文件
    train_labels_file =path+r"/train-labels.idx1-ubyte"
    #测试文件
    test_images_file =path+r"/t10k-images.idx3-ubyte"
    #测试集标签文件
    test_labels_file =path+r"/t10k-labels.idx1-ubyte"

    #保存图像文件路径
    save_train_images =path+r"/train_images"
    save_test_images = path+r"/test_images2"

    #（1）定义加载MNIST数据集实例
    fmnist = LoadFashionMNIST(test_images_file,test_labels_file,train=False)

    #（2）获得MNIST数据集，返回numpy数组
    imgs,labels = fmnist.FashionMNIST()

    print("图像和标签的大小：",imgs.shape,labels.shape)
    print("前50个标签值：",labels[0:50])
    print("图形的类型：",type(imgs))
    plt.imshow(imgs[0])
    plt.show()

    #（3）测试单张图像的转换
    T=0
    imt = fmnist.ToImage(imgs[T],show=True)
    imt.show()

    #(4) MNIST数据集转为PIL图像并保存
    img = fmnist.ToPILImages_Save(save=True,save_path=save_test_images)
    plt.imshow(img)
    plt.show()