针对Caffe下特殊操作实现
本篇博客主要讲解Caffe下一些特殊操作的实现,主要涉及MobileNet深度可分离卷积操作的实现、ShuffleNet的通道混洗操作、CenterLoss损失函数的实现
系统:Linux-Ubuntu
MobileNet-DepthwiseConvolution在Caffe下实现
我用的是Github上shicai的源码,可在以下链接进行下载:Github上DepthwiseConvolution实现源码下载
深度可分离卷积操作即(DepthwiseConvolution)的实现不需要对Caffe目录下的/src/caffe/proto/caffe.proto进行修改。
下载链接中的代码后,在目录caffe下有两个文件夹:include和src
在两个文件夹下分别有我们需要的源码:
- include:depthwise_conv_layer.hpp
- src:depthwise_conv_layer.cpp、depthwise_conv_layer.cu
文件名字 | 文件用途 |
---|---|
depthwise_conv_layer.hpp | 头文件 |
depthwise_conv_layer.cpp | DepthwiseConvolution的CPU实现 |
depthwise_conv_layer.cu | DepthwiseConvolution的GPU实现 |
实现步骤
-
我们需要做的操作就是:
将include下的depthwise_conv_layer.hpp放到/caffeMS/include/caffe/layers/目录下
将src下的depthwise_conv_layer.cpp和 depthwise_conv_layer.cu放到/caffeMS/src/caffe/layers/目录下。
然后重新编译Caffe即可。
make all -j8
make test -j8
make runtest -j8
实际使用:
对dw层,即group参数大于1的层,将其type由"Convolution"改为 “DepthwiseConvolution”
layer {
name: "conv2_1/dw"
type: "DepthwiseConvolution"
bottom: "conv1"
top: "conv2_1/dw"
param {
lr_mult: 1.0
decay_mult: 1.0
}
convolution_param {
num_output: 32
bias_term: false
pad: 1
kernel_size: 3
group: 32
stride: 1
weight_filler {
type: "msra"
}
engine: CAFFE
}
}
- 不过,链接中下载的文件“transferTypeToDepthwiseConvolution.py”可以直接完成这个操作
python2 transferTypeToDepthwiseConvolution.py mobilenet_train.prototxt mobilenet_train_dw.prototxt
import caffe.proto.caffe_pb2 as caffe_pb2
from google.protobuf.text_format import Merge
import argparse
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('source_prototxt')
parser.add_argument('target_prototxt')
args = parser.parse_args()
net = caffe_pb2.NetParameter()
Merge(open(args.source_prototxt, 'r').read(), net)
for layer in net.layer:
if layer.type == "Convolution":
if layer.convolution_param.group !=1:
layer.type = "DepthwiseConvolution"
with open(args.target_prototxt, 'w') as tf:
tf.write(str(net))
源码:
depthwise_conv_layer.hpp
/*
* depthwise_conv_layer.hpp
*
* Created on: May 23, 2017
* Author: liuhao
*/
#ifndef CAFFE_DEPTHWISE_CONV_LAYER_HPP_
#define CAFFE_DEPTHWISE_CONV_LAYER_HPP_
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/layers/base_conv_layer.hpp"
namespace caffe {
/**
* @brief Convolves the input image with a bank of learned filters,
* and (optionally) adds biases.
*
* Caffe convolves by reduction to matrix multiplication. This achieves
* high-throughput and generality of input and filter dimensions but comes at
* the cost of memory for matrices. This makes use of efficiency in BLAS.
*
* The input is "im2col" transformed to a channel K' x H x W data matrix
* for multiplication with the N x K' x H x W filter matrix to yield a
* N' x H x W output matrix that is then "col2im" restored. K' is the
* input channel * kernel height * kernel width dimension of the unrolled
* inputs so that the im2col matrix has a column for each input region to
* be filtered. col2im restores the output spatial structure by rolling up
* the output channel N' columns of the output matrix.
*/
template <typename Dtype>
class DepthwiseConvolutionLayer : public BaseConvolutionLayer<Dtype> {
public:
/**
* @param param provides ConvolutionParameter convolution_param,
* with ConvolutionLayer options:
* - num_output. The number of filters.
* - kernel_size / kernel_h / kernel_w. The filter dimensions, given by
* kernel_size for square filters or kernel_h and kernel_w for rectangular
* filters.
* - stride / stride_h / stride_w (\b optional, default 1). The filter
* stride, given by stride_size for equal dimensions or stride_h and stride_w
* for different strides. By default the convolution is dense with stride 1.
* - pad / pad_h / pad_w (\b optional, default 0). The zero-padding for
* convolution, given by pad for equal dimensions or pad_h and pad_w for
* different padding. Input padding is computed implicitly instead of
* actually padding.
* - dilation (\b optional, default 1). The filter
* dilation, given by dilation_size for equal dimensions for different
* dilation. By default the convolution has dilation 1.
* - group (\b optional, default 1). The number of filter groups. Group
* convolution is a method for reducing parameterization by selectively
* connecting input and output channels. The input and output channel dimensions must be divisible
* by the number of groups. For group @f$ \geq 1 @f$, the
* convolutional filters' input and output channels are separated s.t. each
* group takes 1 / group of the input channels and makes 1 / group of the
* output channels. Concretely 4 input channels, 8 output channels, and
* 2 groups separate input channels 1-2 and output channels 1-4 into the
* first group and input channels 3-4 and output channels 5-8 into the second
* group.
* - bias_term (\b optional, default true). Whether to have a bias.
* - engine: convolution has CAFFE (matrix multiplication) and CUDNN (library
* kernels + stream parallelism) engines.
*/
explicit DepthwiseConvolutionLayer(const LayerParameter& param)
: BaseConvolutionLayer<Dtype>(param) {
}
virtual inline const char* type() const {
return "DepthwiseConvolution"; }
protected:
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual inline bool reverse_dimensions() {
return false; }
virtual void compute_output_shape();
};
} // namespace caffe
#endif /* INCLUDE_CAFFE_LAYERS_DEPTHWISE_CONV_LAYER_HPP_ */
depthwise_conv_layer.cpp
#include <vector>
#include "caffe/layers/depthwise_conv_layer.hpp"
namespace caffe {
template <typename Dtype>
void DepthwiseConvolutionLayer<Dtype>::compute_output_shape() {
const int* kernel_shape_data = this->kernel_shape_.cpu_data();
const int* stride_data = this->stride_.cpu_data();
const int* pad_data = this->pad_.cpu_data();
const int* dilation_data = this->dilation_.cpu_data();
this->output_shape_.clear();
for (int i = 0; i < this->num_spatial_axes_; ++i) {
// i + 1 to skip channel axis
const int input_dim = this->input_shape(i + 1);
const int kernel_extent = dilation_data[i] * (kernel_shape_data[i] - 1) + 1;
const int output_dim = (input_dim + 2 * pad_data[i] - kernel_extent)
/ stride_data[i] + 1;
this->output_shape_.push_back(output_dim);
}
}
template <typename Dtype>
void DepthwiseConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* weight = this->blobs_[0]->cpu_data();
for (int i = 0; i < bottom.size(); ++i) {
const Dtype* bottom_data = bottom[i]->cpu_data();
Dtype* top_data = top[i]->mutable_cpu_data();
for (int n = 0; n < this->num_; ++n) {
this->forward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight,
top_data + n * this->top_dim_);
if (this->bias_term_) {
const Dtype* bias = this->blobs_[1]->cpu_data();
this->forward_cpu_bias(top_data + n * this->top_dim_, bias);
}
}
}
}
template <typename Dtype>
void DepthwiseConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
const Dtype* weight = this->blobs_[0]->cpu_data();
Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
for (int i = 0; i < top.size(); ++i) {
const Dtype* top_diff = top[i]->cpu_diff();
const Dtype* bottom_data = bottom[i]->cpu_data();
Dtype* bottom_diff = bottom[i]->mutable_cpu_diff();
// Bias gradient, if necessary.
if (this->bias_term_ && this->param_propagate_down_[1]) {
Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff();
for (int n = 0; n < this->num_; ++n) {
this->backward_cpu_bias(bias_diff, top_diff + n * this->top_dim_);
}
}
if (this->param_propagate_down_[0] || propagate_down[i]) {
for (int n = 0; n < this->num_; ++n) {
// gradient w.r.t. weight. Note that we will accumulate diffs.
if (this->param_propagate_down_[0]) {
this->weight_cpu_gemm(bottom_data + n * this->bottom_dim_,
top_diff + n * this->top_dim_, weight_diff);
}
// gradient w.r.t. bottom data, if necessary.
if (propagate_down[i]) {
this->backward_cpu_gemm(top_diff + n * this->top_dim_, weight,
bottom_diff + n * this->bottom_dim_);
}
}
}
}
}
#ifdef CPU_ONLY
STUB_GPU(DepthwiseConvolutionLayer);
#endif
INSTANTIATE_CLASS(DepthwiseConvolutionLayer);
REGISTER_LAYER_CLASS(DepthwiseConvolution);
} // namespace caffe
depthwise_conv_layer.cu
#include <vector>
#include <algorithm>
#include <cfloat>
#include "caffe/layers/depthwise_conv_layer.hpp"
#include "caffe/util/math_functions.hpp"
/*
* The depthwise layer for mobilenet. only for stride 1
*/
namespace caffe {
template <typename Dtype>
__global__ void ConvForward(const int nthreads,
const Dtype* const bottom_data, const int num, const int channels,
const int height, const int width,const int conved_height,
const int conved_width,const int kernel_h, const int kernel_w,
const int stride_h, const int stride_w, const int pad_h, const int pad_w,
Dtype* const top_data,const Dtype* const weight,const Dtype* const bias,const bool bias_term_) {
CUDA_KERNEL_LOOP(index, nthreads) {
const int pw = index % conved_width;
const int ph = (index / conved_width) % conved_height;
const int c = (index / conved_width / conved_height) % channels;
const int n = index / conved_width / conved_height / channels;
int hstart = ph * stride_h - pad_h;
int wstart = pw * stride_w - pad_w;
int hend = min(hstart + kernel_h, height + pad_h);
int wend = min(wstart + kernel_w, width + pad_w);
// const int pool_size = (hend - hstart) * (wend - wstart);
hstart = max(hstart, 0);
wstart = max(wstart, 0);
hend = min(hend, height);
wend = min(wend, width);
Dtype aveval = 0;
const Dtype* const bottom_slice =
bottom_data + (n * channels + c) * height * width;
const Dtype* const weight_slice =
weight + c * kernel_h * kernel_w;
// if (index==1) {
// printf("pw%d ph%d c%d n%d \n",pw,ph,c,n);
// printf("hstart%d wstart%d hend%d wend%d \n",hstart,wstart,hend,wend);
// }
int khstart=hend<kernel_h?kernel_h-hend:0;
int kwstart=wend<kernel_w?kernel_w-wend:0;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
aveval += bottom_slice[h * width + w]*weight_slice[(khstart+h-hstart) * kernel_w + (kwstart+w-wstart)];
// if (index==1) {
// printf("pos:h%d w%d\n",h,w);
// printf("cal:bottom%f weight%f\n",bottom_slice[h * width + w],weight_slice[(h-hstart) * kernel_w + (w-wstart)]);
// }
}
}
if(bias_term_) {
aveval+=bias[c];
}
top_data[index] = aveval;
}
}
template<typename Dtype>
void DepthwiseConvolutionLayer<Dtype>::Forward_gpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
// std::cout << "fp" << std::endl;
const Dtype* weight = this