公式
SmoothL1LossLayer計算一張圖片的損失函數,對應于下圖加號右邊部分 :
- i是mini-batch中anchor的索引
- pi是目標的預測概率
- pi*是groundtruth,有物體時 為1,否則為0
- ti是一個向量,預測坐標
- ti* 是一個向量,是gt包圍盒的坐標
Lreg的公式就是下圖,另外x=ti-ti* :
相關內容
- caffe_add caffe_sub caffe_mul caffe_div 函數
- caffe_cpu_asum 函數
- caffeine_cup_axpby 函數
代碼功能描述
Forward
smooth_L1_Loss是Faster RCNN提出來的計算距離的loss,文章中提到對噪聲點更加魯棒。輸入四個bottom,分別是predict,target,inside_weight,outside_weight。與論文并不完全一致,代碼中實現的是更加general的版本,公式為:
Backword
源碼分析
// ------------------------------------------------------------------
// Fast R-CNN
// copyright (c) 2015 Microsoft
// Licensed under The MIT License [see fast-rcnn/LICENSE for details]
// Written by Ross Girshick
// Modified by Wei Liu
// ------------------------------------------------------------------
#include <vector>
#include "caffe/layers/smooth_L1_loss_layer.hpp"
#include "caffe/util/math_functions.hpp"
namespace caffe {
//是整個層的初始化
template <typename Dtype>
void SmoothL1LossLayer<Dtype>::LayerSetUp(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
LossLayer<Dtype>::LayerSetUp(bottom, top);
//bottem[0]代表的是預測坐標,也就是ti
//bottem[1]代表的是groundtruth的坐標,也就是ti*
//bottem[2]代表的是pi*,當有物體時取1,沒有物體時取0
//所以在這里如果bottem的size是3,那么可以說bottem[2]中存放的是我們的weight
has_weights_ = (bottom.size() == 3);
}
template <typename Dtype>
void SmoothL1LossLayer<Dtype>::Reshape(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
LossLayer<Dtype>::Reshape(bottom, top);
CHECK_EQ(bottom[0]->channels(), bottom[1]->channels());
CHECK_EQ(bottom[0]->height(), bottom[1]->height());
CHECK_EQ(bottom[0]->width(), bottom[1]->width());
if (has_weights_) {
CHECK_EQ(bottom[0]->channels(), bottom[2]->channels());
CHECK_EQ(bottom[0]->height(), bottom[2]->height());
CHECK_EQ(bottom[0]->width(), bottom[2]->width());
}
diff_.Reshape(bottom[0]->num(), bottom[0]->channels(),
bottom[0]->height(), bottom[0]->width());
errors_.Reshape(bottom[0]->num(), bottom[0]->channels(),
bottom[0]->height(), bottom[0]->width());
}
template <typename Dtype>
void SmoothL1LossLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
int count = bottom[0]->count();
caffe_sub(
count,
bottom[0]->cpu_data(), //ti
bottom[1]->cpu_data(), //ti*
diff_.mutable_cpu_data()); //Blob定義了兩種數據訪問方式:const方式只讀,不允許改寫數據;
//mutable方式可以改寫數據(對diff_的訪問也是類似的)
//d := ti-ti*
if (has_weights_) { //乘上相關的權重,對應于(1)式中的pi*,有目標時為1
caffe_mul(
count,
bottom[2]->cpu_data(), //pi*
diff_.cpu_data(),
diff_.mutable_cpu_data()); // d := w * (b0 - b1)
//d := w_in * (b0 - b1)
}
const Dtype* diff_data = diff_.cpu_data();
Dtype* error_data = errors_.mutable_cpu_data();
//計算SmoothL1
for (int i = 0; i < count; ++i) {
Dtype val = diff_data[i];
Dtype abs_val = fabs(val);
if (abs_val < 1.) {
error_data[i] = 0.5 * val * val;
} else {
error_data[i] = abs_val - 0.5;
}
}
top[0]->mutable_cpu_data()[0] =
caffe_cpu_asum(count, errors_.cpu_data()) / bottom[0]->num();
}
template <typename Dtype>
void SmoothL1LossLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
int count = diff_.count();
Dtype* diff_data = diff_.mutable_cpu_data();
for (int i = 0; i < count; ++i) {
Dtype val = diff_data[i];
// f'(x) = x if |x| < 1
// = sign(x) otherwise
if (fabs(val) < 1.) {
diff_data[i] = val;
} else {
diff_data[i] = (Dtype(0) < val) - (val < Dtype(0)); //看不懂!
}
}
for (int i = 0; i < 2; ++i) {
if (propagate_down[i]) {
const Dtype sign = (i == 0) ? 1 : -1;
const Dtype alpha = sign * top[0]->cpu_diff()[0] / bottom[i]->num();
caffe_cpu_axpby( //b = alpha * a + beta * b
bottom[i]->count(), // count
alpha, // alpha
diff_.cpu_data(), // a
Dtype(0), // beta
bottom[i]->mutable_cpu_diff()); // b
}
}
}
#ifdef CPU_ONLY
STUB_GPU(SmoothL1LossLayer);
#endif
INSTANTIATE_CLASS(SmoothL1LossLayer);
REGISTER_LAYER_CLASS(SmoothL1Loss);
} // namespace caffe