s

import numpy as np

import tensorflow as tf

import pandas as pd

 

1 加载数据集,把对输入和结果进行分开

train = pd.read_csv("train.csv")

images = train.iloc[:,1:].values

labels_flat = train.iloc[:,0].values.ravel()

 

2 对输入进行处理

 

images = images.astype(np.float)

 

images = np.multiply(images, 1.0 / 255.0)

print('输入数据的数量: (%g, %g)' % images.shape)

 

image_size = images.shape[1]

print ('输入数据的维度=> {0}'.format(image_size))

 

image_width = image_height = np.ceil(np.sqrt(image_size)).astype(np.uint8)

print ('图片的长 => {0}\n图片的高 => {1}'.format(image_width,image_height))

 

x = tf.placeholder('float', shape=[None, image_size])

 

3 对结果进行处理

 

labels_count = np.unique(labels_flat).shape[0]

print('结果的种类 => {0}'.format(labels_count))

# 进行One-hot编码

def dense_to_one_hot(labels_dense, num_classes):

    num_labels = labels_dense.shape[0]

    index_offset = np.arange(num_labels) * num_classes

    labels_one_hot = np.zeros((num_labels, num_classes))

    labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1

    return labels_one_hot

 

labels = dense_to_one_hot(labels_flat, labels_count)

labels = labels.astype(np.uint8)

 

print('结果的数量:({0[0]},{0[1]})'.format(labels.shape))

 

y = tf.placeholder('float', shape=[None, labels_count])

 

 

4 把输入数据划分训练集和验证集

# 把40000个数据作为训练集,2000个数据作为验证集

VALIDATION_SIZE = 2000

 

validation_images = images[:VALIDATION_SIZE]

validation_labels = labels[:VALIDATION_SIZE]

 

train_images = images[VALIDATION_SIZE:]

train_labels = labels[VALIDATION_SIZE:]

 

5 对训练集进行分批

batch_size = 100

n_batch = int(len(train_images)/batch_size)

处理完毕后,打印的结果是: 数据预处理的结果

 

数据预处理好了,如果不需要显示结果,可以把Print语句都去掉,不影响建模。

 

2,建立神经网络,设置损失函数,设置梯度下降的优化参数

 

这里只是最简单的一个实现,下篇文章我们会继续对网络进行优化

 

6 创建一个简单的神经网络用来对图片进行识别

weights = tf.Variable(tf.zeros([784,10]))

biases = tf.Variable(tf.zeros([10]))

result = tf.matmul(x,weights)+biases

prediction = tf.nn.softmax(result)

 

7 创建损失函数,以交叉熵的平均值为衡量

loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y, logits =prediction ))

 

8 用梯度下降法优化参数

train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss)

3,初始化变量,设置好准确度的计算方法,在 Session 中运行

 

9 初始化变量

init = tf.global_variables_initializer()

 

10 计算准确度

correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(prediction,1))

accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

 

 

with tf.Session() as sess:

    #初始化

    sess.run(init)

    #循环50轮

    for epoch in range(50):

        for batch in range(n_batch):

            #按照分片取出数据

            batch_x = train_images[batch*batch_size:(batch+1)*batch_size]

            batch_y = train_labels[batch*batch_size:(batch+1)*batch_size]

            #进行训练

            sess.run(train_step,feed_dict = {x:batch_x,y:batch_y})

        #每一轮计算一次准确度

        accuracy_n = sess.run(accuracy,feed_dict={ x: validation_images, y: validation_labels})        

        print ("第" + str(epoch+1)

posted @ 2018-12-27 18:44  牧 天  阅读(219)  评论(0)    收藏  举报