[Caffe]: 关于concat layer

http://caffe.berkeleyvision.org/tutorial/layers/concat.html

http://blog.csdn.net/cham_3/article/details/58586263

今天,我们看一下caffe的拼接层,即将两个或多个layer进行拼接。 
首先,看一下caffe官方文档。 
concat


同其他layer一样,分为setup、reshape、Forward_cpu、Backward_cpu。

//concat_layer 用来实现两个或者多个blob的链接,即多输入一输出
//支持在num 维度上的链接(concat_dim = 0 : (n1+n2+...+nk)∗c∗h∗w )
//和channel维度上的链接(concat_dim = 1 : n∗(c1+c2+...+ck)∗h∗w)。

//axis ,dim :0 为 num 维度链接,1 为 channel 维度链接
//这里需要给出axis或concat_dim
template <typename Dtype>
void ConcatLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const ConcatParameter& concat_param = this->layer_param_.concat_param();
  CHECK(!(concat_param.has_axis() && concat_param.has_concat_dim()))
      << "Either axis or concat_dim should be specified; not both.";
}

template <typename Dtype>
void ConcatLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  //获取axis,确定拼接哪一维度
  const int num_axes = bottom[0]->num_axes();
  const ConcatParameter& concat_param = this->layer_param_.concat_param();
  //以下都在获取、判断axis的维度
  if (concat_param.has_concat_dim()) {
    concat_axis_ = static_cast<int>(concat_param.concat_dim());
    // Don't allow negative indexing for concat_dim, a uint32 -- almost
    // certainly unintended.
    CHECK_GE(concat_axis_, 0) << "casting concat_dim from uint32 to int32 "
        << "produced negative result; concat_dim must satisfy "
        << "0 <= concat_dim < " << kMaxBlobAxes;
    CHECK_LT(concat_axis_, num_axes) << "concat_dim out of range.";
  } else {
    concat_axis_ = bottom[0]->CanonicalAxisIndex(concat_param.axis());
  }
  // Initialize with the first blob.
  //这里有一点需要解释,可以看到,bottom类型为 vector<Blob<Dtype>*>,这里只需要使用bottom[0]
  //给shape赋值就好,其实botom本身就是一个Blob的vector
  //比如:我要将两个layer拼接,那么久有bottom[0]以及bottom[1]
  vector<int> top_shape = bottom[0]->shape();
  //concat_axis_ = 0 : num_concats_=num;concat_axis_ = 1 : num_concats_=num x channel;
  num_concats_ = bottom[0]->count(0, concat_axis_);
  //concat_axis_ = 0 : concat_input_size_=channel x height x width;
  //concat_axis_ = 1 : concat_input_size_=height x width;
  concat_input_size_ = bottom[0]->count(concat_axis_ + 1);

  int bottom_count_sum = bottom[0]->count();
  //检测num_axes拼接的层是否相同,num_axes为维度信息
  for (int i = 1; i < bottom.size(); ++i) {
    CHECK_EQ(num_axes, bottom[i]->num_axes())
        << "All inputs must have the same #axes.";
    for (int j = 0; j < num_axes; ++j) {
      if (j == concat_axis_) { continue; }
      CHECK_EQ(top_shape[j], bottom[i]->shape(j))
          << "All inputs must have the same shape, except at concat_axis.";
    }
    bottom_count_sum += bottom[i]->count();
    top_shape[concat_axis_] += bottom[i]->shape(concat_axis_);
  }
  top[0]->Reshape(top_shape);
  CHECK_EQ(bottom_count_sum, top[0]->count());
}

 

1、这里有一点需要解释,可以看到,bottom类型为 vector blob,这里只需要使用bottom[0]给shape赋值就好,其实bottom本身就是一个Blob的vector。 
2、CHECK_**,这里给小白们解释一下,就是判断是否相等、小于、大于 
这里写图片描述 
3、 count,这看到有好多的count函数,这些函数在blob层实现,解释如下:

inline int count() const { return count_; }

  /**
   * @brief Compute the volume of a slice; i.e., the product of dimensions
   *        among a range of axes.
   *
   * @param start_axis The first axis to include in the slice.
   *
   * @param end_axis The first axis to exclude from the slice.
   */
  inline int count(int start_axis, int end_axis) const {
    CHECK_LE(start_axis, end_axis);
    CHECK_GE(start_axis, 0);
    CHECK_GE(end_axis, 0);
    CHECK_LE(start_axis, num_axes());
    CHECK_LE(end_axis, num_axes());
    int count = 1;
    for (int i = start_axis; i < end_axis; ++i) {
      count *= shape(i);
    }
    return count;
  }
  /**
   * @brief Compute the volume of a slice spanning from a particular first
   *        axis to the final axis.
   *
   * @param start_axis The first axis to include in the slice.
   */
  inline int count(int start_axis) const {
    return count(start_axis, num_axes());
  }

前向传播就是layer的拼接

template <typename Dtype>
void ConcatLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  Dtype* top_data = top[0]->mutable_cpu_data();
  int offset_concat_axis = 0;
  const int top_concat_axis = top[0]->shape(concat_axis_);
  //遍历所有输入bottom
  for (int i = 0; i < bottom.size(); ++i) {
    const Dtype* bottom_data = bottom[i]->cpu_data();
    const int bottom_concat_axis = bottom[i]->shape(concat_axis_);
    //把 各个bottom data 拷贝到输出 top data 的对应位置
    for (int n = 0; n < num_concats_; ++n) {
      //case 0:num x channel x h x w;case 1: channel x h x w
      //case 0:bottom_data + n x num x channel x h x w ;
      //case 1:bottom_data + n x channel x h x w ;
      caffe_copy(bottom_concat_axis * concat_input_size_,
          bottom_data + n * bottom_concat_axis * concat_input_size_,
          top_data + (n * top_concat_axis + offset_concat_axis)
              * concat_input_size_);
    }
    offset_concat_axis += bottom_concat_axis;
  }
}

反向传播,就是layer层之间diff和data的传播

//反向传播就是对每一个bottom的 diff 做和 data 相同的链接
template <typename Dtype>
void ConcatLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  const Dtype* top_diff = top[0]->cpu_diff();
  int offset_concat_axis = 0;
  const int top_concat_axis = top[0]->shape(concat_axis_);
  for (int i = 0; i < bottom.size(); ++i) {
    if (!propagate_down[i]) { continue; }
    Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
    const int bottom_concat_axis = bottom[i]->shape(concat_axis_);
    for (int n = 0; n < num_concats_; ++n) {
      caffe_copy(bottom_concat_axis * concat_input_size_, top_diff +
          (n * top_concat_axis + offset_concat_axis) * concat_input_size_,
          bottom_diff + n * bottom_concat_axis * concat_input_size_);
    }
    offset_concat_axis += bottom_concat_axis;
  }
}

Concat layer

在Deep Neural Network中,最主要的两种提高模型性能的优化方向就是使模型wider or deeper。 
在使模型变宽时,常需要把多个分支合并起来作为后续层的输入。它就是今天要介绍的concat layer。

按照惯例,我们先来看下concat layer的参数。

message ConcatParameter {
  // The axis along which to concatenate -- may be negative to index from the
  // end (e.g., -1 for the last axis).  Other axes must have the
  // same dimension for all the bottom blobs.
  // By default, ConcatLayer concatenates blobs along the "channels" axis (1).
  optional int32 axis = 2 [default = 1]; //caffe中,blobs一般表示成NxCxHxW. 也就是说,axis默认在channel维度来进行concat.

  // DEPRECATED: alias for "axis" -- does not support negative indexing. 已弃用,axis的别名,不支持负数索引
  optional uint32 concat_dim = 1 [default = 1]; 
}

 

concat作为链接多个输入的工具层,其参数很少,只有一个指定是根据N维度还是根据C维度来进行链接的参数。 该层要求至少有两个输入,即bottom的size >= 2,如下所示: 

x1:=N×C×H×Wx2:=N×C×H×Wxk:=N×C×H×Woutput:=kN×C×H×Woroutput:=N×kC×H×W


至此,我们大致了解了concat层怎么用呢。接下来,我们介绍介绍它的实现。

 

向前传播时,实现比较简单。

template <typename Dtype>
void ConcatLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  if (bottom.size() == 1) { return; }     \\如果只有一个输入,不执行操作
  Dtype* top_data = top[0]->mutable_cpu_data();
  int offset_concat_axis = 0;
  const int top_concat_axis = top[0]->shape(concat_axis_);
  for (int i = 0; i < bottom.size(); ++i) {
    const Dtype* bottom_data = bottom[i]->cpu_data();  \\第i个输入的读指针
    const int bottom_concat_axis = bottom[i]->shape(concat_axis_);
    for (int n = 0; n < num_concats_; ++n) {
      caffe_copy(bottom_concat_axis * concat_input_size_,
          bottom_data + n * bottom_concat_axis * concat_input_size_,
          top_data + (n * top_concat_axis + offset_concat_axis)
              * concat_input_size_);   \\把所有输入根据指定的axis连接起来
    }
    offset_concat_axis += bottom_concat_axis;
  }
}

单看主要函数显然有些不清不楚,接下来我们看看layersetup和reshape就能明白它具体是怎么做的了。

template <typename Dtype>
void ConcatLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const ConcatParameter& concat_param = this->layer_param_.concat_param();  \\获取concat参数,即axis或者concat_dim,不能同时指定。
  CHECK(!(concat_param.has_axis() && concat_param.has_concat_dim()))
      << "Either axis or concat_dim should be specified; not both.";
}

template <typename Dtype>
void ConcatLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const int num_axes = bottom[0]->num_axes();                               \\获取输入维度数
  const ConcatParameter& concat_param = this->layer_param_.concat_param();
  if (concat_param.has_concat_dim()) {                                      \\如果指定concat_dim,判断是否非负
    concat_axis_ = static_cast<int>(concat_param.concat_dim());
    // Don't allow negative indexing for concat_dim, a uint32 -- almost
    // certainly unintended.
    CHECK_GE(concat_axis_, 0) << "casting concat_dim from uint32 to int32 "
        << "produced negative result; concat_dim must satisfy "
        << "0 <= concat_dim < " << kMaxBlobAxes;
    CHECK_LT(concat_axis_, num_axes) << "concat_dim out of range.";         \\concat_dim不能超过输入的维度数
  } else {
    concat_axis_ = bottom[0]->CanonicalAxisIndex(concat_param.axis());      \\指定了axis,转换成非负索引得到concat_axis
  }
  // Initialize with the first blob.
  vector<int> top_shape = bottom[0]->shape();                               \\初始化输出,shape与输入一致
  num_concats_ = bottom[0]->count(0, concat_axis_);                         \\需要concat的个数,
  concat_input_size_ = bottom[0]->count(concat_axis_ + 1);                  \\每个concat的数据量大小
  int bottom_count_sum = bottom[0]->count();                                \\输入总的特征值个数,初始时只有第一个输入的个数
  for (int i = 1; i < bottom.size(); ++i) {                                 \\
    CHECK_EQ(num_axes, bottom[i]->num_axes())                               \\判断每个输入维度是否一致
        << "All inputs must have the same #axes.";
    for (int j = 0; j < num_axes; ++j) {                                    \\除了进行concat的那个维度外,其他维度的大小是否保持一致
      if (j == concat_axis_) { continue; }
      CHECK_EQ(top_shape[j], bottom[i]->shape(j))
          << "All inputs must have the same shape, except at concat_axis.";
    }
    bottom_count_sum += bottom[i]->count();                                 \\累加第i个输入的个数
    top_shape[concat_axis_] += bottom[i]->shape(concat_axis_);              \\累加输出的指定axis的值
  }
  top[0]->Reshape(top_shape);                                               \\reshape输出blob
  CHECK_EQ(bottom_count_sum, top[0]->count());                              \\检查bottom_count_sum和top_count的数据量是否一致
  if (bottom.size() == 1) {
    top[0]->ShareData(*bottom[0]);                                          \\只有一个输入,直接复制成输出
    top[0]->ShareDiff(*bottom[0]);                                          \\梯度shape也和输入一致
  }
}

 

源码解析这里基本上就明白concat层的原理了,最后我们来看下它的后向传播。其原理十分简单,把输出求得的梯度直接复制给对应的输入即可。

template <typename Dtype>
void ConcatLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
  if (bottom.size() == 1) { return; }
  const Dtype* top_diff = top[0]->cpu_diff();
  int offset_concat_axis = 0;
  const int top_concat_axis = top[0]->shape(concat_axis_);
  for (int i = 0; i < bottom.size(); ++i) {
    const int bottom_concat_axis = bottom[i]->shape(concat_axis_);          \\从输出的梯度直接复制到对应的输入
    if (propagate_down[i]) {
      Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
      for (int n = 0; n < num_concats_; ++n) {
        caffe_copy(bottom_concat_axis * concat_input_size_, top_diff +
            (n * top_concat_axis + offset_concat_axis) * concat_input_size_,
            bottom_diff + n * bottom_concat_axis * concat_input_size_);
      }
    }
    offset_concat_axis += bottom_concat_axis;
  }
}

对与不熟悉blob类的成员函数可以参考这里

posted on 2017-11-18 15:06  塔上的樹  阅读(2114)  评论(0)    收藏  举报