tensorflow入门实例
一、回归算法
import tensorflow as tf import numpy as np #-------------------1. 数据集,变量,占位符------------------------# # 样本,输入列表,正太分布(Normal Destribution),均值为1, 均方误差为0.1, 数据量为100个 x_vals = np.random.normal(1, 0.1, 100) # 样本, 输出列表, 100个值为10.0的列表 y_vals = np.repeat(10.0, 100) #占位符 x_data = tf.placeholder(shape=[None, 1], dtype=tf.float32) y_target = tf.placeholder(shape=[None, 1], dtype= tf.float32) #模型变量 A = tf.Variable(tf.random_normal(shape=[1, 1])) #批量大小 batch_size = 25 #训练数据集的index,从总样本的index,即0~99,选取80个值 train_indices = np.random.choice(len(x_vals), round(len(x_vals) *0.8), replace = False) #测试数据集的index,扣除上面的train_indices,剩下的20个值 test_indices = np.array(list(set(range(len(x_vals))) - set(train_indices))) #训练数据集 & 测试数据集 x_vals_train = x_vals[train_indices] x_vals_test = x_vals[test_indices] y_vals_train = y_vals[train_indices] y_vals_test = y_vals[test_indices] #-----------------2. 模型,损失函数,优化器算法--------------------------# # 我们定义的模型,是一个线型函数,即 y = w * x, 也就是my_output = A * x_data # x_data将用样本x_vals。我们的目标是,算出A的值。 # 其实已经能猜出,y都是10.0的话,x均值为1, 那么A应该是10。哈哈 my_output = tf.multiply(x_data, A) # 损失函数, 用的是模型算的值,减去实际值, 的平方。y_target就是上面的y_vals。 loss = tf.reduce_mean(tf.square(my_output - y_target)) #初始化变量 sess = tf.Session() init = tf.global_variables_initializer() sess.run(init) # 梯度下降算法, 学习率0.02, 可以认为每次迭代修改A,修改一次0.02。比如A初始化为20, 发现不好,于是猜测下一个A为20-0.02 my_opt = tf.train.GradientDescentOptimizer(learning_rate=0.02) train_step = my_opt.minimize(loss)#目标,使得损失函数达到最小值 #-----------------3. 迭代训练--------------------------#
for i in range(100):#0到100,不包括100 # 随机拿25个index rand_index = np.random.choice(len(x_vals_train), size = batch_size) # 从训练集拿出25个样本,转置一下,因为x_data的shape是[None, 1] #注意是[x_vals_train[rand_index]],转为二维的1x20的数组,才能通过transpose转置为20x1的数组,不能写成x_vals_train[rand_index] rand_x = np.transpose([x_vals_train[rand_index]]) rand_y = np.transpose([y_vals_train[rand_index]]) #损失函数引用的placeholder(直接或间接用的都算), x_data使用样本rand_x, y_target用样本rand_y sess.run(train_step, feed_dict={x_data: rand_x, y_target: rand_y}) #打印 if i%25==0: print('step: ' + str(i) + ' A = ' + str(sess.run(A))) print('loss: ' + str(sess.run(loss, feed_dict={x_data: rand_x, y_target: rand_y}))) #-----------------4. 评估模型--------------------------# #以上这种评估,测试集跟训练集是完全分开的。没有用A去评测测试集,只看两种集的均方误差是不是差不多 mse_test = sess.run(loss, feed_dict={x_data: np.transpose([x_vals_test]), y_target: np.transpose([y_vals_test])}) mse_train = sess.run(loss, feed_dict={x_data: np.transpose([x_vals_train]), y_target: np.transpose([y_vals_train])}) print('MSE on test: ' + str(np.round(mse_test, 2))) print('MSE on train: ' + str(np.round(mse_train, 2)))
二、分类算法
#import matplotlib.pyplot as plt import numpy as np import tensorflow as tf #sklearn是机器学习套件,有很多数据集 #安装:pip install -U scikit-learn # sklearn依赖python>=2.7, numpy(python擅长数组处理的数学库), scipy(python算法库和数据工具包) from sklearn import datasets #-------------------1. 数据集,变量,占位符------------------------# iris = datasets.load_iris() print('sample feature: feature_names: ' + str(iris.feature_names) + " data length: " + str(len(iris.data))) print('sample target: target_names: ' + str(iris.target_names) + " target length: " + str(len(iris.target))) #样本数据,一个150x4的二维列表 #print(iris.data) #样本标签,一个长度为150的一维列表 #print(iris.target) #抽取的样本标签, 只要第一种,是第一种,则为1,否则为0 temp = [] for x in iris.target: temp.append(1 if x== 0 else 0) iris_target = np.array(temp)#列表转数组,以上几行,也可以写成:iris_target = np.array([1 if x== 0 else 0 for x in iris.target]) print('iris_target: ') print(iris_target) #抽取的样本输入,只用两个参数,也就是花瓣长度和宽度 iris_2d = np.array([[x[2], x[3]] for x in iris.data]) print('iris_2d: ') print(iris_2d) #将样本分为训练集和测试集 #训练数据集的index,从总样本的index,即0~150,选取120个值 train_indices = np.random.choice(len(iris_2d), round(len(iris_2d) *0.8), replace = False) #测试数据集的index,扣除上面的train_indices,剩下的30个值 test_indices = np.array(list(set(range(len(iris_2d))) - set(train_indices))) #训练数据集 & 测试数据集 x_vals_train = iris_2d[train_indices] x_vals_test = iris_2d[test_indices] y_vals_train = iris_target[train_indices] y_vals_test = iris_target[test_indices] #批量训练大小为20 batch_size = 20 x1_data = tf.placeholder(shape=[None, 1], dtype=tf.float32) x2_data = tf.placeholder(shape=[None, 1], dtype=tf.float32) y_target = tf.placeholder(shape=[None, 1], dtype=tf.float32) A = tf.Variable(tf.random_normal(shape=[1,1])) b = tf.Variable(tf.random_normal(shape=[1,1])) #-----------------2. 模型,损失函数,优化器算法--------------------------# #定义模型my_output = x1 - (A*x2 + b) my_mult = tf.matmul(x2_data, A) my_add = tf.add(my_mult, b) my_output = tf.subtract(x1_data, my_add) #损失函数 xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=my_output, logits=y_target) my_opt = tf.train.GradientDescentOptimizer(0.05) train_step = my_opt.minimize(xentropy) #初始化变量 sess = tf.Session() init = tf.global_variables_initializer() sess.run(init) #-----------------3. 迭代训练--------------------------# #开始迭代,更新模型,也就是计算出A和b for i in range(1000): #从np.arange(len(x_vals_train))生成大小为20的均匀随机样本,如:[ 66 42 96 115 45 127 31 70 148 57 60 127 56 96 7 63 75 127 110 144] rand_index = np.random.choice(len(x_vals_train), size=batch_size) #print('rand_index ' + str(rand_index)) #rand_x为20x2的数组,类似酱紫 [[4.5 1.5] 。。。。[1.3 0.2]] rand_x = x_vals_train[rand_index] #print(' rand_x ' + str(rand_x)) #print(' rand_x shape: ' + str(rand_x.shape)) rand_x1 = np.array([[x[0]] for x in rand_x]) rand_x2 = np.array([[x[1]] for x in rand_x]) #print(' rand_x1 ' + str(rand_x1)) #print(' rand_x2 ' + str(rand_x2)) #rand_y如果直接使用y_vals_train[rand_index],则得到的是[0 0 0 0 0 0 1 0 0 0 0 1 1 1 0 1 1 0 0 0],是一维的, shape是(20,)也就是一维数组,数组有20个元素 #但是我们想要多维数组,也就是shape为(20, 1),因为placeholder也是这样的维度 #[[y] for y in y_vals_train[rand_index]] 得到的是[[0], [0], [0], [0], [0], [0], [1], [0], [1], [0], [0], [0], [0], [1], [1], [1], [1], [1], [0], [0]] #然后,转化为数组,维度是(20, 1) rand_y = np.array([[y] for y in y_vals_train[rand_index]]) #print('rand_y shape ' + str(rand_y.shape)) #print('rand_y ' + str(rand_y)) sess.run(train_step, feed_dict={x1_data: rand_x1, x2_data: rand_x2, y_target: rand_y}) if(i+1)%200==0: print('step: ' + str(i) + ' A = ' + str(sess.run(A)) + ' b = ' + str(sess.run(b))) #-----------------4. 评估模型--------------------------# #用已有的A,b 变量,计算my_output y_prediction = tf.squeeze(tf.round(tf.nn.sigmoid(my_output))) correct_prediction = tf.equal(y_prediction, y_target) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) #测试集数据提取,要转为二维的矩阵 x_vals_test_x1 = np.array([[x[0]] for x in x_vals_test]) x_vals_test_x2 = np.array([[x[1]] for x in x_vals_test]) y_vals_test_predict = np.array([[y] for y in y_vals_test]) #训练集数据提取,要转为二维的矩阵 x_vals_train_x1 = np.array([[x[0]] for x in x_vals_train]) x_vals_train_x2 = np.array([[x[1]] for x in x_vals_train]) y_vals_train_predict = np.array([[y] for y in y_vals_train]) print('x_vals_test ') print(x_vals_test) print('x_vals_test_x1 ') print(x_vals_test_x1) print('x_vals_test_x2 ') print(x_vals_test_x2) print('y_vals_test: ') print(y_vals_test) print('y_vals_test_predict: ') print(y_vals_test_predict) acc_value_test = sess.run(accuracy, feed_dict={x1_data: x_vals_test_x1, x2_data: x_vals_test_x2, y_target: y_vals_test_predict}) acc_value_train = sess.run(accuracy, feed_dict={x1_data: x_vals_train_x1, x2_data: x_vals_train_x2, y_target: y_vals_train_predict}) print('Accuracy on test set ' + str(acc_value_test)) print('Accuracy on train set ' + str(acc_value_train))
本文为博客园博主「yuanjunWu」的原创文章,转载请附上原文出处链接及本声明。
原文链接:https://www.cnblogs.com/AmeliaWu2019/p/11918619.html
浙公网安备 33010602011771号