Skip to content

Commit f6e582a

Browse files
committed
Make backward pass work when global stats is active for BatchNormLayer
including minor code cleaning
1 parent 0ad1d8a commit f6e582a

2 files changed

Lines changed: 12 additions & 8 deletions

File tree

src/caffe/layers/batch_norm_layer.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,7 @@ void BatchNormLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
8484
}
8585

8686
if (use_global_stats_) {
87-
// use the stored mean/variance estimates. TODO(cdoersch): allow an option
88-
// to use an unbiased variance estimate, like the paper does.
87+
// use the stored mean/variance estimates.
8988
const Dtype scale_factor = this->blobs_[2]->cpu_data()[0] == 0 ?
9089
0 : 1 / this->blobs_[2]->cpu_data()[0];
9190
caffe_cpu_scale(variance_.count(), scale_factor,
@@ -158,16 +157,19 @@ template <typename Dtype>
158157
void BatchNormLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
159158
const vector<bool>& propagate_down,
160159
const vector<Blob<Dtype>*>& bottom) {
161-
CHECK(!use_global_stats_);
162160
const Dtype* top_diff;
163161
if (bottom[0] != top[0]) {
164162
top_diff = top[0]->cpu_diff();
165163
} else {
166164
caffe_copy(x_norm_.count(), top[0]->cpu_diff(), x_norm_.mutable_cpu_diff());
167165
top_diff = x_norm_.cpu_diff();
168166
}
169-
const Dtype* top_data = x_norm_.cpu_data();
170167
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
168+
if (use_global_stats_) {
169+
caffe_div(temp_.count(), top_diff, temp_.cpu_data(), bottom_diff);
170+
return;
171+
}
172+
const Dtype* top_data = x_norm_.cpu_data();
171173
int num = bottom[0]->shape()[0];
172174
int spatial_dim = bottom[0]->count()/(bottom[0]->shape(0)*channels_);
173175
// if Y = (X-mean(X))/(sqrt(var(X)+eps)), then

src/caffe/layers/batch_norm_layer.cu

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@ void BatchNormLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
2020

2121

2222
if (use_global_stats_) {
23-
// use the stored mean/variance estimates. TODO(cdoersch): allow an option
24-
// to use an unbiased variance estimate, like the paper does.
23+
// use the stored mean/variance estimates.
2524
const Dtype scale_factor = this->blobs_[2]->cpu_data()[0] == 0 ?
2625
0 : 1 / this->blobs_[2]->cpu_data()[0];
2726
caffe_gpu_scale(variance_.count(), scale_factor,
@@ -94,16 +93,19 @@ template <typename Dtype>
9493
void BatchNormLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
9594
const vector<bool>& propagate_down,
9695
const vector<Blob<Dtype>*>& bottom) {
97-
CHECK(!use_global_stats_);
9896
const Dtype* top_diff;
9997
if (bottom[0] != top[0]) {
10098
top_diff = top[0]->gpu_diff();
10199
} else {
102100
caffe_copy(x_norm_.count(), top[0]->gpu_diff(), x_norm_.mutable_gpu_diff());
103101
top_diff = x_norm_.gpu_diff();
104102
}
105-
const Dtype* top_data = x_norm_.gpu_data();
106103
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
104+
if (use_global_stats_) {
105+
caffe_gpu_div(temp_.count(), top_diff, temp_.gpu_data(), bottom_diff);
106+
return;
107+
}
108+
const Dtype* top_data = x_norm_.gpu_data();
107109
int num = bottom[0]->shape()[0];
108110
int spatial_dim = bottom[0]->count()/(channels_*bottom[0]->shape(0));
109111
// if Y = (X-mean(X))/(sqrt(var(X)+eps)), then

0 commit comments

Comments
 (0)