《Tensorflow 实战 Google 深度学习框架》 MNIST 笔记

大家好，经过一个礼拜，终于把《Tensorflow 实战 Google 深度学习框架》第一版里的代码调通了。本人的主要工作是更换了部分函数，但是对于函数的结构没有完全了解。特别是 placeholder 之后，怎么用 Session() 开启优化算子并使得其起作用，对我来说都是很难理解的事情。我测试了 5 个轮次，发现准确率没往上涨，应该是平铺着选取若干个 Batch 进行模型的。关于这一点，慢点我会上传部分替代方案，就是简单的前传两层神经网络，因为是手动写的，还能自己改动部分。

1.结果，输出 15 个手写数字，准确率 12%。思前想后，为了赶紧交差，现在还是这个准确率，就先记录着。

这是书上的代码，在该换掉部分函数名之后，目前的情况。笔者的学习进度，暂时还理解不了迭代轮数如何对参数进行更新，并且使得模型更加靠近最优值。所以大家可以看到每一轮的准确率是一样的。

函数部分

# (1) BATCH
def next_batch( ind_in, batch_size_in, x_in, y_in ):
    start_pos = ind_in * batch_size_in
    end_pos = (ind_in+1) * batch_size_in
    
    #print( start_pos, end_pos )
    xs = x_in[start_pos: end_pos]
    ys = y_in[start_pos: end_pos]
    return xs, ys



# (2) Function
def inference( input_tensor, avg_class, weights1, biases1, weights2, biases2 ):
    print("Inner: ", input_tensor.shape )
    if avg_class == None:
        layer1 = tf.nn.relu( tf.matmul( input_tensor, weights1 ) + biases1 )  # activate
        tmp = tf.matmul( layer1, weights2 ) + biases2
        print( "inner-11: ", tmp.shape )
        return tmp
    else:
        layer1 = tf.nn.relu( tf.matmul( input_tensor, avg_class.average( weights1 ) ) + avg_class.average( biases1 ) )
        tmp = tf.matmul( layer1, avg_class.average( weights2 ) + avg_class.average( biases2 ) )
        print( "inner-12: ", tmp.shape )
        return tmp

参数设置

# --- 函数主体

tf.compat.v1.disable_eager_execution()  

# (1) params
input_node = 784
output_node = 10

layer1_node = 500
batch_size = 2000

learning_rate_base = 0.8
learning_rate_decay = 0.99
regularization_rate = 0.0001
training_steps = 5
moving_average_decay = 0.9

n_print_sample=15  # 自己设置

训练函数主体

注意，这部分的 Accuracy 暂时无法验证，因为 average_y 不知怎么输出。这就是目前的成绩！

# (3) 训练主函数  
def train(  x_1, y_1, x_2, y_2, n_sample  ):
    # (1) mnist_in - 4 train-num; 5 test-num 
    # (2) y_ placeholder, y inference

    n_batch = n_sample / batch_size
    print("Num of sample sets: ", n_batch)
    
    x  = tf.compat.v1.placeholder( tf.float32, [None, input_node],  name='x-input' )
    y_ = tf.compat.v1.placeholder( tf.float32, [None, output_node], name='y-input' )
    
    weights1 = tf.Variable( tf.random.truncated_normal( [input_node, layer1_node], stddev=0.1 ) )
    biases1  = tf.Variable( tf.constant( 0.1, shape=[layer1_node] ) )
    weights2 = tf.Variable( tf.random.truncated_normal( [layer1_node, output_node], stddev=0.1 ) )
    biases2  = tf.Variable( tf.constant( 0.1, shape=[output_node] ) )
    
    # (1) INFERENCE
    y = inference( x, None, weights1, biases1, weights2, biases2 )   # 输出 10-dim, 自己写的
    global_step = tf.Variable( 0, trainable=False )
         
    # (2) 指数平滑 + Linear Inference
    variable_averages = tf.train.ExponentialMovingAverage( moving_average_decay, global_step )
    variables_averages_op   = variable_averages.apply( tf.compat.v1.trainable_variables() )   # tf.trainable_variables() 
    
    # INFERENCE
    average_y = inference( x, variable_averages, weights1, biases1, weights2, biases2 )
    
    # (3) 交叉熵： tf.arfmax(y, 1) 第二个参数 1 按照行来算，返回列号
    cross_entropy      = tf.nn.sparse_softmax_cross_entropy_with_logits( tf.argmax(y,1), y )
    cross_entropy_mean = tf.reduce_mean( cross_entropy )
    
    # (4) 正则化： loss = 交叉熵 + 正则化
    regularizer    = tf.keras.regularizers.l2( regularization_rate )     # tf.contrib.layer2.l2_regularizer( regularization_rate )
    regularization = regularizer(weights1) + regularizer(weights2)
    loss = cross_entropy_mean + regularization
    
    # (5) 学习率： 指数平滑
    learning_rate = tf.compat.v1.train.exponential_decay( learning_rate_base,  global_step, 
                                                    n_sample / batch_size,     learning_rate_decay )
    # (6) GBDT 优化
    train_step = tf.compat.v1.train.GradientDescentOptimizer(learning_rate).minimize( loss, global_step = global_step )

    # (7) 计算预判准确率
    correct_prediction = tf.equal( tf.argmax( average_y, 1 ), tf.argmax( y_, 1 ) )
    accuracy = tf.reduce_mean( tf.cast( correct_prediction, tf.float32 ) )
    y_pred   = tf.argmax( average_y, axis=1 ) 
    y_pred   = tf.cast( y_pred, tf.int32 )
    
    # (8) batch 相关，但是没理解是什么
    with tf.control_dependencies( [] ):
        train_op = tf.no_op( name='train' )
     
    # (10) 会话
    with tf.compat.v1.Session() as sess:
        sess.run(  tf.compat.v1.global_variables_initializer()  )
        batch_ind = 0   
        for i in range( training_steps ):
            # (1) batch computing
            xs, ys = next_batch( i, batch_size, x_1, y_1 )    
            a=sess.run( train_step, feed_dict={x: xs, y_: ys } )
            # (2) watching
            if i%1 == 0:
                tmp_avg_y = sess.run( average_y, feed_dict={x: xs, y_: ys } )
                tmp_avg_y_1 = sess.run( tf.argmax( tmp_avg_y, axis=1 ) )
                tmp_avg_y_1 = tf.reshape( tmp_avg_y_1, [batch_size, 1] )
                print("y-pred: ", tmp_avg_y_1.shape )                
                validate_acc = sess.run( accuracy, feed_dict={x: xs, y_: ys } )
                print( 'After %d train-step(s), validation accuracy of avg-model: %g' % ( i, validate_acc ))
                            
            sess.run( train_op, feed_dict={x:xs, y_:ys} )
            # END FOR
            
        # 最后一轮
        test_acc = sess.run( accuracy, feed_dict={x: x_2, y_: y_2 } )
        print( "After %d train-step(s), test accuracy of avg-model: %g " % (training_steps, test_acc) )

感兴趣的伙伴，可以点击购买 https://product.dangdang.com/11945507033.html 。

2.笔者重新测试神经网络的映射。即 z1=w1x1+b1; a1=f(z1); z2 = w2a1+b2; a2=f(z2); y-pred=a2。

以下结果这么理解，每轮次的实际预测结果，5轮总的准确率是 30% 左右！所以单一前传，没有 loss + regul 就是这样的预测准确率，没有研究出来 package 怎么迭代，怎么优化！！

欢迎关注 ShoelessCai.com 我们尽力赋能各行业！

posted on 2025-06-15 10:44 Mira_2019 阅读(7) 评论(0) 收藏举报