MindSpore中使用字符级RNN分类名称

循环神经网络(Recurrent Neural Network, RNN)是一类以序列(sequence)数据为输入,在序列的演进方向进行递归(recursion)且所有节点(循环单元)按链式连接的递归神经网络(recursive neural network),常用于NLP领域当中来解决序列化数据的建模问题

from mindspore import context
context.set_context(mode=context.PYNATIVE_MODE, device_target="Ascend")
from io import open
import glob
import os
import unicodedata
import string

def find_files(path):
    return glob.glob(path)

print(find_files('data/names/*.txt'))

输出:

['data/names/German.txt', 'data/names/Dutch.txt', 'data/names/English.txt', 'data/names/Italian.txt', 'data/names/Vietnamese.txt', 'data/names/Portuguese.txt', 'data/names/Korean.txt', 'data/names/Spanish.txt', 'data/names/French.txt', 'data/names/Russian.txt', 'data/names/Greek.txt', 'data/names/Arabic.txt', 'data/names/Irish.txt', 'data/names/Chinese.txt', 'data/names/Czech.txt', 'data/names/Polish.txt', 'data/names/Japanese.txt', 'data/names/Scottish.txt']

image.png


将名称转换为向量

# 导入模块
import numpy as np
from mindspore import Tensor
from mindspore import dtype as mstype
# 定义letter_to_index函数,从all_letters列表中查找字母索引
def letter_to_index(letter):
    return all_letters.find(letter)
# 定义letter_to_tensor函数,将字母转换成维度是<1 x n_letters>的one-hot向量。
def letter_to_tensor(letter):
    tensor = Tensor(np.zeros((1, n_letters)),mstype.float32)
    tensor[0,letter_to_index(letter)] = 1.0
    return tensor
# 定义line_to_tensor函数,将一行转化为<line_length x 1 x n_letters>的one-hot向量。
def line_to_tensor(line):
    tensor = Tensor(np.zeros((len(line), 1, n_letters)),mstype.float32)
    for li, letter in enumerate(line):
        tensor[li,0,letter_to_index(letter)] = 1.0
    return tensor
# 分别将字母A和单词Alex转换为one-hot向量,并打印显示。
print(letter_to_tensor('A'))
print(line_to_tensor('Alex').shape)

image.png

image.png
代码+图片

posted @ 2021-12-25 15:28  MS小白  阅读(7)  评论(0)    收藏  举报