数据集的处理

# 确认当前环境的版本
import mindspore
print(mindspore.__version__)

1.2.0

#数据集的下载

import os
import moxing as mox

if not os.path.exists("./MNIST_Data.zip"):
mox.file.copy("obs://modelarts-labs-bj4-v2/course/hwc_edu/python_module_framework/datasets/mindspore_data/MNIST_Data.zip", "./MNIST_Data.zip")

#数据集的处理

from mindspore import context
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import mindspore.dataset as ds
#是MindSpore提供的用于数据处理的API模块。它的主要功能是存储样本和标签，同时也集成了一些常见的数据处理方法

# device_target 可选 CPU/GPU/Ascend, 当选择GPU时mindspore规格也需要切换到GPU
#device_target根据脚本信息（--device_target）配置硬件信息；
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
#mode设置运行模式(动态,静态图模式)

# device_id = int(os.getenv("DEVICE_ID"))
# context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id)
train_data_path = "./MNIST_Data/train"
#(对训练集的文件路径)
test_data_path = "./MNIST_Data/test"
mnist_ds = ds.MnistDataset(train_data_path)
#以特定格式（MnistDataset）对训练图片数据进行读取
print('The type of mnist_ds:', type(mnist_ds))
#输出类型
print("Number of pictures contained in the mnist_ds：", mnist_ds.get_dataset_size())
#输出训练目录里的图片数目
dic_ds = mnist_ds.create_dict_iterator()
#一个传遍器，检索到的数据将是一个字典
#数据集上创建迭代器，为字典数据类型，输出的为Tensor类型
item = next(dic_ds) #遍历生成的值放在字典中，生成一个
img = item["image"].asnumpy()
#MindSpore提供的内置数据集处理方法默认输出一般都是在框架中通用的Tensor,
#但是对于非框架优化包含的python库，包括matplotlib，它们就无法处理接受Tensor，
#这是就要采用Tensor类中定义的asnumpy方法将张量转化为numpy数组
label = item["label"].asnumpy()
#同上（label：标签）显示的数
print("The item of mnist_ds:", item.keys())
#输出字典的键值
print("Tensor of image in item:", img.shape)
#图片的属性形状(高度，宽度，维度)
print("The label of item:", label)
#输出显示出的数字，反馈的正确答案
plt.imshow(np.squeeze(img))
#squeeze将shape中为1的维度去掉，2位数，plt.imshow()函数负责对图像进行处理，并显示其格式
plt.title("number:%s"% item["label"].asnumpy())
#标签作为标题
plt.show()
#显示图像

The type of mnist_ds: <class 'mindspore.dataset.engine.datasets.MnistDataset'>

Number of pictures contained in the mnist_ds： 60000

The item of mnist_ds: dict_keys(['image', 'label'])

Tensor of image in item: (28, 28, 1)

The label of item: 3

posted @ 2021-12-27 16:01 MS小白阅读(56) 评论(0) 收藏举报

刷新页面返回顶部

数据集的处理

公告