MobileNet DepthwiseConvolution、ShuffleNet shuffle channel、CenterLoss在Caffe下实现

最新推荐文章于 2024-09-24 15:22:35 发布

jsk_learner

最新推荐文章于 2024-09-24 15:22:35 发布

阅读量1.5k

点赞数 1

分类专栏： Caffe 文章标签： Caffe DepthwiseConvolution shuffle channel CenterLoss

本文链接：https://blog.csdn.net/jsk_learner/article/details/95661552

版权

针对Caffe下特殊操作实现

MobileNet-DepthwiseConvolution在Caffe下实现
ShuffleNet通道混洗操作（shuffle channel）在Caffe下的实现
损失函数CenterLoss在Caffe下的实现

本篇博客主要讲解Caffe下一些特殊操作的实现，主要涉及MobileNet深度可分离卷积操作的实现、ShuffleNet的通道混洗操作、CenterLoss损失函数的实现

系统：Linux-Ubuntu

MobileNet-DepthwiseConvolution在Caffe下实现

我用的是Github上shicai的源码，可在以下链接进行下载：Github上DepthwiseConvolution实现源码下载

深度可分离卷积操作即（DepthwiseConvolution）的实现不需要对Caffe目录下的/src/caffe/proto/caffe.proto进行修改。

下载链接中的代码后，在目录caffe下有两个文件夹：include和src

在两个文件夹下分别有我们需要的源码：

include：depthwise_conv_layer.hpp
src：depthwise_conv_layer.cpp、depthwise_conv_layer.cu

文件名字	文件用途
depthwise_conv_layer.hpp	头文件
depthwise_conv_layer.cpp	DepthwiseConvolution的CPU实现
depthwise_conv_layer.cu	DepthwiseConvolution的GPU实现

实现步骤

我们需要做的操作就是：

将include下的depthwise_conv_layer.hpp放到/caffeMS/include/caffe/layers/目录下

将src下的depthwise_conv_layer.cpp和 depthwise_conv_layer.cu放到/caffeMS/src/caffe/layers/目录下。

然后重新编译Caffe即可。

    make all -j8
    make test -j8
    make runtest -j8

实际使用：
对dw层，即group参数大于1的层，将其type由"Convolution"改为 “DepthwiseConvolution”

layer {
   
  name: "conv2_1/dw"
  type: "DepthwiseConvolution"
  bottom: "conv1"
  top: "conv2_1/dw"
  param {
   
    lr_mult: 1.0
    decay_mult: 1.0
  }
  convolution_param {
   
    num_output: 32
    bias_term: false
    pad: 1
    kernel_size: 3
    group: 32
    stride: 1
    weight_filler {
   
      type: "msra"
    }
    engine: CAFFE
  }
}

不过，链接中下载的文件“transferTypeToDepthwiseConvolution.py”可以直接完成这个操作

python2 transferTypeToDepthwiseConvolution.py mobilenet_train.prototxt mobilenet_train_dw.prototxt

import caffe.proto.caffe_pb2 as caffe_pb2
from google.protobuf.text_format import Merge
import argparse
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('source_prototxt')
    parser.add_argument('target_prototxt')

    args = parser.parse_args()
    net = caffe_pb2.NetParameter()
    Merge(open(args.source_prototxt, 'r').read(), net)
    for layer in net.layer:
        if layer.type == "Convolution":
            if layer.convolution_param.group !=1:
                layer.type = "DepthwiseConvolution"
    with open(args.target_prototxt, 'w') as tf:
        tf.write(str(net))

源码：

depthwise_conv_layer.hpp

/*
 * depthwise_conv_layer.hpp
 *
 *  Created on: May 23, 2017
 *      Author: liuhao
 */

#ifndef CAFFE_DEPTHWISE_CONV_LAYER_HPP_
#define CAFFE_DEPTHWISE_CONV_LAYER_HPP_



#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

#include "caffe/layers/base_conv_layer.hpp"

namespace caffe {
   

/**
 * @brief Convolves the input image with a bank of learned filters,
 *        and (optionally) adds biases.
 *
 *   Caffe convolves by reduction to matrix multiplication. This achieves
 *   high-throughput and generality of input and filter dimensions but comes at
 *   the cost of memory for matrices. This makes use of efficiency in BLAS.
 *
 *   The input is "im2col" transformed to a channel K' x H x W data matrix
 *   for multiplication with the N x K' x H x W filter matrix to yield a
 *   N' x H x W output matrix that is then "col2im" restored. K' is the
 *   input channel * kernel height * kernel width dimension of the unrolled
 *   inputs so that the im2col matrix has a column for each input region to
 *   be filtered. col2im restores the output spatial structure by rolling up
 *   the output channel N' columns of the output matrix.
 */
template <typename Dtype>
class DepthwiseConvolutionLayer : public BaseConvolutionLayer<Dtype> {
   
 public:
  /**
   * @param param provides ConvolutionParameter convolution_param,
   *    with ConvolutionLayer options:
   *  - num_output. The number of filters.
   *  - kernel_size / kernel_h / kernel_w. The filter dimensions, given by
   *  kernel_size for square filters or kernel_h and kernel_w for rectangular
   *  filters.
   *  - stride / stride_h / stride_w (\b optional, default 1). The filter
   *  stride, given by stride_size for equal dimensions or stride_h and stride_w
   *  for different strides. By default the convolution is dense with stride 1.
   *  - pad / pad_h / pad_w (\b optional, default 0). The zero-padding for
   *  convolution, given by pad for equal dimensions or pad_h and pad_w for
   *  different padding. Input padding is computed implicitly instead of
   *  actually padding.
   *  - dilation (\b optional, default 1). The filter
   *  dilation, given by dilation_size for equal dimensions for different
   *  dilation. By default the convolution has dilation 1.
   *  - group (\b optional, default 1). The number of filter groups. Group
   *  convolution is a method for reducing parameterization by selectively
   *  connecting input and output channels. The input and output channel dimensions must be divisible
   *  by the number of groups. For group @f$ \geq 1 @f$, the
   *  convolutional filters' input and output channels are separated s.t. each
   *  group takes 1 / group of the input channels and makes 1 / group of the
   *  output channels. Concretely 4 input channels, 8 output channels, and
   *  2 groups separate input channels 1-2 and output channels 1-4 into the
   *  first group and input channels 3-4 and output channels 5-8 into the second
   *  group.
   *  - bias_term (\b optional, default true). Whether to have a bias.
   *  - engine: convolution has CAFFE (matrix multiplication) and CUDNN (library
   *    kernels + stream parallelism) engines.
   */
  explicit DepthwiseConvolutionLayer(const LayerParameter& param)
      : BaseConvolutionLayer<Dtype>(param) {
   }

  virtual inline const char* type() const {
    return "DepthwiseConvolution"; }

 protected:
  virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top);
  virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
  virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
  virtual inline bool reverse_dimensions() {
    return false; }
  virtual void compute_output_shape();
};

}  // namespace caffe



#endif /* INCLUDE_CAFFE_LAYERS_DEPTHWISE_CONV_LAYER_HPP_ */

depthwise_conv_layer.cpp

#include <vector>
#include "caffe/layers/depthwise_conv_layer.hpp"

namespace caffe {
   

template <typename Dtype>
void DepthwiseConvolutionLayer<Dtype>::compute_output_shape() {
   
  const int* kernel_shape_data = this->kernel_shape_.cpu_data();
  const int* stride_data = this->stride_.cpu_data();
  const int* pad_data = this->pad_.cpu_data();
  const int* dilation_data = this->dilation_.cpu_data();
  this->output_shape_.clear();
  for (int i = 0; i < this->num_spatial_axes_; ++i) {
   
    // i + 1 to skip channel axis
    const int input_dim = this->input_shape(i + 1);
    const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1;
    const int output_dim = (input_dim + 2 * pad_data[i] - kernel_extent)
        / stride_data[i] + 1;
    this->output_shape_.push_back(output_dim);
  }
}

template <typename Dtype>
void DepthwiseConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
   
	const Dtype* weight = this->blobs_[0]->cpu_data();
  for (int i = 0; i < bottom.size(); ++i) {
   
    const Dtype* bottom_data = bottom[i]->cpu_data();
    Dtype* top_data = top[i]->mutable_cpu_data();
    for (int n = 0; n < this->num_; ++n) {
   
      this->forward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight,
          top_data + n * this->top_dim_);
      if (this->bias_term_) {
   
        const Dtype* bias = this->blobs_[1]->cpu_data();
        this->forward_cpu_bias(top_data + n * this->top_dim_, bias);
      }
    }
  }
}

template <typename Dtype>
void DepthwiseConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
   
  const Dtype* weight = this->blobs_[0]->cpu_data();
  Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
  for (int i = 0; i < top.size(); ++i) {
   
    const Dtype* top_diff = top[i]->cpu_diff();
    const Dtype* bottom_data = bottom[i]->cpu_data();
    Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
    // Bias gradient, if necessary.
    if (this->bias_term_ && this->param_propagate_down_[1]) {
   
      Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff();
      for (int n = 0; n < this->num_; ++n) {
   
        this->backward_cpu_bias(bias_diff, top_diff + n * this->top_dim_);
      }
    }
    if (this->param_propagate_down_[0] || propagate_down[i]) {
   
      for (int n = 0; n < this->num_; ++n) {
   
        // gradient w.r.t. weight. Note that we will accumulate diffs.
        if (this->param_propagate_down_[0]) {
   
          this->weight_cpu_gemm(bottom_data + n * this->bottom_dim_,
              top_diff + n * this->top_dim_, weight_diff);
        }
        // gradient w.r.t. bottom data, if necessary.
        if (propagate_down[i]) {
   
          this->backward_cpu_gemm(top_diff + n * this->top_dim_, weight,
              bottom_diff + n * this->bottom_dim_);
        }
      }
    }
  }
}

#ifdef CPU_ONLY
STUB_GPU(DepthwiseConvolutionLayer);
#endif

INSTANTIATE_CLASS(DepthwiseConvolutionLayer);
REGISTER_LAYER_CLASS(DepthwiseConvolution);
}  // namespace caffe

depthwise_conv_layer.cu

#include <vector>
#include <algorithm>
#include <cfloat>
#include "caffe/layers/depthwise_conv_layer.hpp"
#include "caffe/util/math_functions.hpp"


/*
 * The depthwise layer for mobilenet.   only for stride 1
 */

namespace caffe {
   

template <typename Dtype>
__global__ void ConvForward(const int nthreads,
		const Dtype* const bottom_data, const int num, const int channels,
		const int height, const int width,const int conved_height,
		const int conved_width,const int kernel_h, const int kernel_w,
		const int stride_h, const int stride_w, const int pad_h, const int pad_w,
		Dtype* const top_data,const Dtype* const weight,const Dtype* const bias,const bool bias_term_) {
   
	CUDA_KERNEL_LOOP(index, nthreads) {
   

		const int pw = index % conved_width;
		const int ph = (index / conved_width) % conved_height;
		const int c = (index / conved_width / conved_height) % channels;
		const int n = index / conved_width / conved_height / channels;
		int hstart = ph * stride_h - pad_h;
		int wstart = pw * stride_w - pad_w;
		int hend = min(hstart + kernel_h, height + pad_h);
		int wend = min(wstart + kernel_w, width + pad_w);
//		const int pool_size = (hend - hstart) * (wend - wstart);
		hstart = max(hstart, 0);
		wstart = max(wstart, 0);
		hend = min(hend, height);
		wend = min(wend, width);
		Dtype aveval = 0;
		const Dtype* const bottom_slice =
		bottom_data + (n * channels + c) * height * width;
		const Dtype* const weight_slice =
		weight + c * kernel_h * kernel_w;
//		if (index==1) {
   
//			printf("pw%d ph%d c%d n%d \n",pw,ph,c,n);
//			printf("hstart%d wstart%d hend%d wend%d \n",hstart,wstart,hend,wend);
//		}

		int khstart=hend<kernel_h?kernel_h-hend:0;
		int kwstart=wend<kernel_w?kernel_w-wend:0;
		for (int h = hstart; h < hend; ++h) {
   
			for (int w = wstart; w < wend; ++w) {
   

				aveval += bottom_slice[h * width + w]*weight_slice[(khstart+h-hstart) * kernel_w + (kwstart+w-wstart)];
//				if (index==1) {
   
//					printf("pos:h%d w%d\n",h,w);
//					printf("cal:bottom%f weight%f\n",bottom_slice[h * width + w],weight_slice[(h-hstart) * kernel_w + (w-wstart)]);
//				}
			}
		}
		if(bias_term_) {
   
			aveval+=bias[c];
		}
		top_data[index] = aveval;
	}
}

template<typename Dtype>
void DepthwiseConvolutionLayer<Dtype>::Forward_gpu(
		const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
   
//	std::cout << "fp" << std::endl;
	const Dtype* weight = this

最低0.47元/天解锁文章