mnist 提取目录格式和脚本

官网下载地址下载地址:http://yann.lecun.com/exdb/mnist/
提取后的下载地址:https://download.csdn.net/download/Night_MFC/12713219

# -*- coding: utf-8 -*-

from PIL import Image
import struct
import time
import os


for path_num_pic_in in ('mnist/test/', 'mnist/train/'):
    for num_dir in range(10):
        if os.path.exists(path_num_pic_in+str(num_dir)):
            print('文件夹已经存在')
        else:
            os.makedirs(path_num_pic_in+str(num_dir))        #生成10个以数字命名的文件夹


def extract_save_mnist(filename_pics,filename_labels, path_num_pic_in):
    index = 0
    index2 = 0
    with open(filename_pics, 'rb') as f:
        buf=f.read()
    with open(filename_labels, 'rb') as f2:
        buf2=f2.read()
    magic, labels = struct.unpack_from('>II' , buf , index)  #读出有多少个标签,为以后循环定量。
    index2 += struct.calcsize('>II')   #索引跟随
    magic, images, rows, columns = struct.unpack_from('>IIII' , buf , index)
    index += struct.calcsize('>IIII')


    for i in range(labels):
        image = Image.new('L', (columns, rows))
        for x in range(rows):    ## 把一维的数据赋值到二维中,每次读取一个byte,付给一个像素值 struct函数解码的应该是列表形式所以用[0]提取)
            for y in range(columns):
                image.putpixel((y, x), int(struct.unpack_from('>B', buf, index)[0]))
                index += struct.calcsize('>B')   #索引计数用的,主要方便记录读取到哪一个数据值了。
        label_num= struct.unpack_from('>B', buf2, index2)[0]   ##这个本身就是int类型的了,就没必要加int()了。
        index2 += struct.calcsize('>B')  #索引跟随
        image.save(path_num_pic_in+str(label_num)+'/'+str(label_num)+'_'+str(time.time()) + '.png')

if __name__ == '__main__':
    extract_save_mnist(r'train-images.idx3-ubyte',r'train-labels.idx1-ubyte', 'mnist/train/')
    extract_save_mnist(r't10k-images.idx3-ubyte',r't10k-labels.idx1-ubyte', 'mnist/test/')
posted @ 2020-08-14 17:23  mengfu188  阅读(130)  评论(0)    收藏  举报