Make backward pass work when global stats is active for BatchNormLayer

kkhoot · kkhoot · commit f6e582a38dee · 2015-11-13T02:20:02.000+09:00
including minor code cleaning
diff --git a/src/caffe/layers/batch_norm_layer.cpp b/src/caffe/layers/batch_norm_layer.cpp
@@ -84,8 +84,7 @@ void BatchNormLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
   }
 
   if (use_global_stats_) {
-    // use the stored mean/variance estimates.  TODO(cdoersch): allow an option
-    // to use an unbiased variance estimate, like the paper does.
+    // use the stored mean/variance estimates.
     const Dtype scale_factor = this->blobs_[2]->cpu_data()[0] == 0 ?
         0 : 1 / this->blobs_[2]->cpu_data()[0];
     caffe_cpu_scale(variance_.count(), scale_factor,
@@ -158,16 +157,19 @@ template <typename Dtype>
 void BatchNormLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
     const vector<bool>& propagate_down,
     const vector<Blob<Dtype>*>& bottom) {
-  CHECK(!use_global_stats_);
   const Dtype* top_diff;
   if (bottom[0] != top[0]) {
     top_diff = top[0]->cpu_diff();
   } else {
     caffe_copy(x_norm_.count(), top[0]->cpu_diff(), x_norm_.mutable_cpu_diff());
     top_diff = x_norm_.cpu_diff();
   }
-  const Dtype* top_data = x_norm_.cpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
+  if (use_global_stats_) {
+    caffe_div(temp_.count(), top_diff, temp_.cpu_data(), bottom_diff);
+    return;
+  }
+  const Dtype* top_data = x_norm_.cpu_data();
   int num = bottom[0]->shape()[0];
   int spatial_dim = bottom[0]->count()/(bottom[0]->shape(0)*channels_);
   // if Y = (X-mean(X))/(sqrt(var(X)+eps)), then
diff --git a/src/caffe/layers/batch_norm_layer.cu b/src/caffe/layers/batch_norm_layer.cu
@@ -20,8 +20,7 @@ void BatchNormLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 
 
   if (use_global_stats_) {
-    // use the stored mean/variance estimates.  TODO(cdoersch): allow an option
-    // to use an unbiased variance estimate, like the paper does.
+    // use the stored mean/variance estimates.
     const Dtype scale_factor = this->blobs_[2]->cpu_data()[0] == 0 ?
         0 : 1 / this->blobs_[2]->cpu_data()[0];
     caffe_gpu_scale(variance_.count(), scale_factor,
@@ -94,16 +93,19 @@ template <typename Dtype>
 void BatchNormLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
     const vector<bool>& propagate_down,
     const vector<Blob<Dtype>*>& bottom) {
-  CHECK(!use_global_stats_);
   const Dtype* top_diff;
   if (bottom[0] != top[0]) {
     top_diff = top[0]->gpu_diff();
   } else {
     caffe_copy(x_norm_.count(), top[0]->gpu_diff(), x_norm_.mutable_gpu_diff());
     top_diff = x_norm_.gpu_diff();
   }
-  const Dtype* top_data = x_norm_.gpu_data();
   Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
+  if (use_global_stats_) {
+    caffe_gpu_div(temp_.count(), top_diff, temp_.gpu_data(), bottom_diff);
+    return;
+  }
+  const Dtype* top_data = x_norm_.gpu_data();
   int num = bottom[0]->shape()[0];
   int spatial_dim = bottom[0]->count()/(channels_*bottom[0]->shape(0));
   // if Y = (X-mean(X))/(sqrt(var(X)+eps)), then