FCN笔记

FCN.py

  • tensorflow命令行参数
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_integer("batch_size", "2", "batch size for training")
tf.flags.DEFINE_string("logs_dir", "logs/", "path to logs directory")
tf.flags.DEFINE_string("data_dir", "Data_zoo/MIT_SceneParsing/", "path to dataset")
tf.flags.DEFINE_float("learning_rate", "1e-5", "Learning rate for Adam Optimizer")
tf.flags.DEFINE_string("model_dir", "Model_zoo/", "Path to vgg model mat")
tf.flags.DEFINE_bool('debug', "False", "Debug mode: True/ False")
tf.flags.DEFINE_string('mode', "train", "Mode train/ test/ visualize")

深度学习神经网络往往有过多的Hyperparameter需要调优,优化算法、学习率、卷积核尺寸等很多参数都需要不断调整,使用命令行参数是非常方便的。有两种实现方式,一是调用tensorflow自带的app.flags(FCN用到的),一是利用python的argparse包实现。

1.利用tf.app.flags组件

TensorFlow项目例子中经常出现tf.app.flags,这个好像和tf.flags是一样,加不加中间的app没区别(要是不是这样还请大家指出错误之处),它支持应用从命令行接受参数,可以用来指定集群配置等。下面是个例子。

import tensorflow as tf
#调用flags内部的DEFINE_string函数来制定解析规则
tf.flags.DEFINE_string("para_name_1","default_val", "description")
tf.flags.DEFINE_integer("batch_size", 64, "Batch Size (default: 64)")
tf.flags.DEFINE_integer("num_epochs", 10, "Number of training epochs (default: 10)")
#FLAGS是一个对象,保存了解析后的命令行参数
FLAGS = tf.flags.FLAGS
FLAGS._parse_flags()#进行解析,加上这一句可以把FLAGS.__flags变成一个字典
print(FLAGS.batch_size)#运行结果输出64
print(FLAGS.__flags)#运行结果见下图

重点:它可以从命令行接受参数。

python FCN.py --mode=visualize

2.利用python的argparse包

import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--echo", type=str,help="echo the string you use here")
parser.add_argument("--square", type=int, help="display a square of a given number")
args = parser.parse_args()
print(args.echo)
print(args.square**2)

这里第一个参数调用了系统的echo工具,将函数名称后的字符串打印在控制台显示。第二个参数做了平方运算。运行:

python argparse_example.py --echo ‘hello!’ --square 4


  • 调用VGG
def vgg_net(weights, image):
    ## fcn的前五层网络就是vgg网络
    layers = (
        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',

        'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',

        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
        'relu3_3', 'conv3_4', 'relu3_4', 'pool3',

        'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
        'relu4_3', 'conv4_4', 'relu4_4', 'pool4',

        'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
        'relu5_3', 'conv5_4', 'relu5_4'
    )

    net = {}
    current = image
    for i, name in enumerate(layers):
        kind = name[:4]
        if kind == 'conv':
            kernels, bias = weights[i][0][0][0][0]
            # matconvnet: weights are [width, height, in_channels, out_channels]
            # tensorflow: weights are [height, width, in_channels, out_channels]
            kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w")
            bias = utils.get_variable(bias.reshape(-1), name=name + "_b")
            current = utils.conv2d_basic(current, kernels, bias)
        elif kind == 'relu':
            current = tf.nn.relu(current, name=name)
            if FLAGS.debug:
                utils.add_activation_summary(current)
        elif kind == 'pool':
            current = utils.avg_pool_2x2(current)
        net[name] = current

    return net

VGGNet各级别网络结构图,FCN用VGG-19

1.kernels, bias = weights[i][0][0][0][0]

weights 是vgg网络各层的权重集合,存储格式mat:MODEL_URL = ''

kernels的获取方式是weights[第i层][0][0][0][0][0],形状为[width, height, in_channels, out_channels],bias的获取方式是weights[0][0][0][0][0],形状为[1,out_channels]。对于VGG-19的卷积,全部采用了3X3的filters,所以width为3,height为3。注意,这里面的层数i,指的是最细粒度的层数,包括conv、relu、pool、fc各种操作。因此,i=0为卷积核,i=1为relu,i=2为卷积核,i=3为relu,i=4为pool,i=5为卷积核……,i=37为全连接层,以此类推。VGG-19的pooling采用了长宽为2X2的max-pooling。

若是卷积层,如conv1_1

print(weights[0][0][0][0][0]) 

得到的是参数矩阵和偏置:

[ array([[[[ 0.39416704, -0.08419707, -0.03631314, ..., -0.10720515,
          -0.03804016,  0.04690642],
         [ 0.46418372,  0.03355668,  0.10245045, ..., -0.06945956,
          -0.04020201,  0.04048637],
         [ 0.34119523,  0.09563112,  0.0177449 , ..., -0.11436455,
          -0.05099866, -0.00299793]],

        [[ 0.37740308, -0.07876257, -0.04775979, ..., -0.11827433,
          -0.19008617, -0.01889699],
         [ 0.41810837,  0.05260524,  0.09755926, ..., -0.09385028,
          -0.20492788, -0.0573062 ],
         [ 0.33999205,  0.13363543,  0.02129423, ..., -0.13025227,
          -0.16508926, -0.06969624]],

        [[-0.04594866, -0.11583115, -0.14462094, ..., -0.12290562,
          -0.35782176, -0.27979308],
         [-0.04806903, -0.00658076, -0.02234544, ..., -0.0878844 ,
          -0.3915486 , -0.34632796],
         [-0.04484424,  0.06471398, -0.07631404, ..., -0.12629718,
          -0.29905206, -0.28253639]]],

       [[[ 0.2671299 , -0.07969447,  0.05988706, ..., -0.09225675,
           0.31764674,  0.42209673],
         [ 0.30511212,  0.05677647,  0.21688674, ..., -0.06828708,
           0.3440761 ,  0.44033417],
         [ 0.23215917,  0.13365699,  0.12134422, ..., -0.1063385 ,
           0.28406844,  0.35949969]],

        [[ 0.09986369, -0.06240906,  0.07442063, ..., -0.02214639,
           0.25912452,  0.42349899],
         [ 0.10385381,  0.08851637,  0.2392226 , ..., -0.01210995,
           0.27064082,  0.40848857],
         [ 0.08978214,  0.18505956,  0.15264879, ..., -0.04266965,
           0.25779948,  0.35873157]],

        [[-0.34100872, -0.13399366, -0.11510294, ..., -0.11911335,
          -0.23109646, -0.19202407],
         [-0.37314063, -0.00698938,  0.02153259, ..., -0.09827439,
          -0.2535741 , -0.25541356],
         [-0.30331427,  0.08002605, -0.03926321, ..., -0.12958746,
          -0.19778992, -0.21510386]]],


       [[[-0.07573577, -0.07806503, -0.03540679, ..., -0.1208065 ,
           0.20088433,  0.09790061],
         [-0.07646758,  0.03879711,  0.09974211, ..., -0.08732687,
           0.2247974 ,  0.10158388],
         [-0.07260918,  0.10084777,  0.01313597, ..., -0.12594968,
           0.14647409,  0.05009392]],

        [[-0.28034249, -0.07094654, -0.0387974 , ..., -0.08843154,
           0.18996507,  0.07766484],
         [-0.31070709,  0.06031388,  0.10412455, ..., -0.06832542,
           0.20279962,  0.05222717],
         [-0.246675  ,  0.1414054 ,  0.02605635, ..., -0.10128672,
           0.16340195,  0.02832468]],

        [[-0.41602272, -0.11491341, -0.14672887, ..., -0.13079506,
          -0.1379628 , -0.26588449],
         [-0.46453714, -0.00576723, -0.02660675, ..., -0.10017379,
          -0.15603794, -0.32566148],
         [-0.33683276,  0.06601517, -0.08144748, ..., -0.13460518,
          -0.1342358 , -0.27096185]]]], dtype=float32)
 array([[ 0.73017758,  0.06493629,  0.03428847,  0.8260386 ,  0.2578029 ,
         0.54867655, -0.01243854,  0.34789944,  0.55108708,  0.06297145,
         0.60699058,  0.26703122,  0.649414  ,  0.17073655,  0.47723091,
         0.38250586,  0.46373144,  0.21496128,  0.46911287,  0.23825859,
         0.47519219,  0.70606434,  0.27007523,  0.68552732,  0.03216552,
         0.60252881,  0.35034859,  0.446798  ,  0.77326518,  0.58191687,
         0.39083108,  1.75193536,  0.66117406,  0.30213955,  0.53059655,
         0.67737472,  0.33273223,  0.49127793,  0.26548928,  0.18805602,
         0.07412001,  1.10810876,  0.28224325,  0.86755145,  0.19422948,
         0.810332  ,  0.36062282,  0.50720042,  0.42472315,  0.49632648,
         0.15117475,  0.79454446,  0.33494323,  0.47283995,  0.41552398,
         0.08496041,  0.37947032,  0.60067391,  0.47174454,  0.81309211,
         0.45521152,  1.08920074,  0.47757268,  0.4072122 ]], dtype=float32)] 

若是激活函数,如relu1_1

print(weights[1][0][0][0][0])

输出:

relu 

若是池化层,如pool1

print(weights[4][0][0][0][0]) 

输出:

pool1 

2.由于 imagenet-vgg-verydeep-19.mat 中的参数矩阵和我们定义的长宽位置颠倒了,所以需要交换↓

matconvnet: weights are [width, height, in_channels, out_channels]

tensorflow: weights are [height, width, in_channels, out_channels]

np.transpose(kernels, (1, 0, 2, 3))

2.bias.reshape(-1)

numpy.reshape(a, newshape, order='C')[source],参数`newshape`是啥意思?

根据Numpy文档()的解释:

newshape : int or tuple of ints
The new shape should be compatible with the original shape. If an integer, then the result will be a 1-D array of that length. One shape dimension can be -1. In this case, **the value is inferred from the length of the array and remaining dimensions**.
大意是说,数组新的shape属性应该要与原来的配套,如果等于-1的话,那么Numpy会根据剩下的维度计算出数组的另外一个shape属性值。

所以reshape(-1)=不分行列,改成1串;reshape(-1, 1)=我也不知道几行,反正是1列。


  • tensorflow可视化
keep_probability = tf.placeholder(tf.float32, name="keep_probabilty")
image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="input_image")
annotation = tf.placeholder(tf.int32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation")

pred_annotation, logits = inference(image, keep_probability)
tf.summary.image("input_image", image, max_outputs=2)
tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2)
tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2)
loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
labels=tf.squeeze(annotation, squeeze_dims=[3]),
name="entropy")))
loss_summary = tf.summary.scalar("entropy", loss)

trainable_var = tf.trainable_variables()
if FLAGS.debug:
for var in trainable_var:
utils.add_to_regularization_and_summary(var)
train_op = train(loss, trainable_var)

print("Setting up summary op...")
summary_op = tf.summary.merge_all()

print("Setting up image reader...")
train_records, valid_records = scene_parsing.read_dataset(FLAGS.data_dir)
print(len(train_records))
print(len(valid_records))

print("Setting up dataset reader")
image_options = {'resize': True, 'resize_size': IMAGE_SIZE}
if FLAGS.mode == 'train':
train_dataset_reader = dataset.BatchDatset(train_records, image_options)
validation_dataset_reader = dataset.BatchDatset(valid_records, image_options)

sess = tf.Session()

print("Setting up Saver...")
saver = tf.train.Saver()

# create two summary writers to show training loss and validation loss in the same graph
# need to create two folders 'train' and 'validation' inside FLAGS.logs_dir
train_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/train', sess.graph)
validation_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/validation')

sess.run(tf.global_variables_initializer())
ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
print("Model restored...")

1.summary

tensorflow的可视化是使用summarytensorboard合作完成的。

基本用法

首先明确一点,summary也是op.

输出网络结构

with tf.Session() as sess:
  writer = tf.summary.FileWriter(your_dir, sess.graph) 

命令行运行tensorboard --logdir your_dir,然后浏览器输入127.0.1.1:6006注:tf1.1.0 版本的tensorboard端口换了(0.0.0.0:6006)
这样你就可以在tensorboard中看到你的网络结构图了

可视化参数

#ops
loss = ...
tf.summary.scalar("loss", loss)
merged_summary = tf.summary.merge_all()

init = tf.global_variable_initializer()
with tf.Session() as sess:
  writer = tf.summary.FileWriter(your_dir, sess.graph)
  sess.run(init)
  for i in xrange(100):
    _,summary = sess.run([train_op,merged_summary], feed_dict)
    writer.add_summary(summary, i) 

这时,打开tensorboard,在EVENTS可以看到loss随着i的变化了,如果看不到的话,可以在代码最后加上writer.flush()试一下,原因后面说明。

函数介绍

  • tf.summary.merge_all: 将之前定义的所有summary op整合到一起
  • FileWriter: 创建一个file writer用来向硬盘写summary数据,
  • tf.summary.scalar(summary_tags, Tensor/variable, collections=None): 用于标量的 summary
  • tf.summary.image(tag, tensor, max_images=3, collections=None, name=None):tensor,必须4维,形状[batch_size, height, width, channels],max_images(最多只能生成3张图片的summary),觉着这个用在卷积中的kernel可视化很好用.max_images确定了生成的图片是[-max_images: ,height, width, channels],还有一点就是,TensorBord中看到的image summary永远是最后一个global step
  • tf.summary.histogram(tag, values, collections=None, name=None):values,任意形状的tensor,生成直方图summary
  • tf.summary.audio(tag, tensor, sample_rate, max_outputs=3, collections=None, name=None)

  • 训练、可视化、测试(测试自己加的)
if FLAGS.mode == "train":
for itr in xrange(MAX_ITERATION):
train_images, train_annotations = train_dataset_reader.next_batch(FLAGS.batch_size)
feed_dict = {image: train_images, annotation: train_annotations, keep_probability: 0.85}

sess.run(train_op, feed_dict=feed_dict)

if itr % 10 == 0:
train_loss, summary_str = sess.run([loss, loss_summary], feed_dict=feed_dict)
print("Step: %d, Train_loss:%g" % (itr, train_loss))
train_writer.add_summary(summary_str, itr)

if itr % 500 == 0:
valid_images, valid_annotations = validation_dataset_reader.next_batch(FLAGS.batch_size)
valid_loss, summary_sva = sess.run([loss, loss_summary], feed_dict={image: valid_images, annotation: valid_annotations,
keep_probability: 1.0})
print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss))

# add validation loss to TensorBoard
validation_writer.add_summary(summary_sva, itr)
saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr)

elif FLAGS.mode == "visualize":
valid_images, valid_annotations = validation_dataset_reader.get_random_batch(FLAGS.batch_size)
pred = sess.run(pred_annotation, feed_dict={image: valid_images, annotation: valid_annotations,
keep_probability: 1.0})
valid_annotations = np.squeeze(valid_annotations, axis=3)
pred = np.squeeze(pred, axis=3)

for itr in range(FLAGS.batch_size):
utils.save_image(valid_images[itr].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5+itr))
utils.save_image(valid_annotations[itr].astype(np.uint8), FLAGS.logs_dir, name="gt_" + str(5+itr))
utils.save_image(pred[itr].astype(np.uint8), FLAGS.logs_dir, name="pred_" + str(5+itr))
print("Saved image: %d" % itr)

这个部分是容易理解的。

posted @ 2018-10-02 08:11  X18301096  阅读(331)  评论(0编辑  收藏  举报