mnist 提取目录格式和脚本
官网下载地址下载地址:http://yann.lecun.com/exdb/mnist/
提取后的下载地址:https://download.csdn.net/download/Night_MFC/12713219
# -*- coding: utf-8 -*-
from PIL import Image
import struct
import time
import os
for path_num_pic_in in ('mnist/test/', 'mnist/train/'):
for num_dir in range(10):
if os.path.exists(path_num_pic_in+str(num_dir)):
print('文件夹已经存在')
else:
os.makedirs(path_num_pic_in+str(num_dir)) #生成10个以数字命名的文件夹
def extract_save_mnist(filename_pics,filename_labels, path_num_pic_in):
index = 0
index2 = 0
with open(filename_pics, 'rb') as f:
buf=f.read()
with open(filename_labels, 'rb') as f2:
buf2=f2.read()
magic, labels = struct.unpack_from('>II' , buf , index) #读出有多少个标签,为以后循环定量。
index2 += struct.calcsize('>II') #索引跟随
magic, images, rows, columns = struct.unpack_from('>IIII' , buf , index)
index += struct.calcsize('>IIII')
for i in range(labels):
image = Image.new('L', (columns, rows))
for x in range(rows): ## 把一维的数据赋值到二维中,每次读取一个byte,付给一个像素值 struct函数解码的应该是列表形式所以用[0]提取)
for y in range(columns):
image.putpixel((y, x), int(struct.unpack_from('>B', buf, index)[0]))
index += struct.calcsize('>B') #索引计数用的,主要方便记录读取到哪一个数据值了。
label_num= struct.unpack_from('>B', buf2, index2)[0] ##这个本身就是int类型的了,就没必要加int()了。
index2 += struct.calcsize('>B') #索引跟随
image.save(path_num_pic_in+str(label_num)+'/'+str(label_num)+'_'+str(time.time()) + '.png')
if __name__ == '__main__':
extract_save_mnist(r'train-images.idx3-ubyte',r'train-labels.idx1-ubyte', 'mnist/train/')
extract_save_mnist(r't10k-images.idx3-ubyte',r't10k-labels.idx1-ubyte', 'mnist/test/')

浙公网安备 33010602011771号