Skip to content

Commit 1339297

Browse files
rbgirshickshelhamer
authored andcommitted
support for tightest square mode while finetuning
1 parent 5cb7c23 commit 1339297

5 files changed

Lines changed: 21 additions & 2 deletions

File tree

models/pascal_finetune.prototxt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ layers {
88
batchsize: 128
99
cropsize: 227
1010
context_pad: 16
11+
crop_mode: "warp"
1112
mirror: true
1213
det_fg_threshold: 0.5
1314
det_bg_threshold: 0.5

models/pascal_finetune_solver.prototxt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,4 @@ max_iter: 100000
1111
momentum: 0.9
1212
weight_decay: 0.0005
1313
snapshot: 10000
14-
snapshot_prefix: "./snapshots/pascal_context16_finetune_train"
14+
snapshot_prefix: "./snapshots/pascal_warp_context16_finetune_train"

models/pascal_finetune_val.prototxt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ layers {
88
batchsize: 128
99
cropsize: 227
1010
context_pad: 16
11+
crop_mode: "warp"
1112
mirror: true
1213
det_fg_threshold: 0.5
1314
det_bg_threshold: 0.5

src/caffe/layers/window_data_layer.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
4747
const int mean_width = layer->data_mean_.width();
4848
const int mean_height = layer->data_mean_.height();
4949
cv::Size cv_crop_size(cropsize, cropsize);
50+
const string& crop_mode = layer->layer_param_.crop_mode();
51+
52+
bool use_square = (crop_mode == "square") ? true : false;
5053

5154
// zero out batch
5255
memset(top_data, 0, sizeof(Dtype)*layer->prefetch_data_->count());
@@ -93,7 +96,7 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
9396

9497
int pad_w = 0;
9598
int pad_h = 0;
96-
if (context_pad > 0) {
99+
if (context_pad > 0 || use_square) {
97100
// scale factor by which to expand the original region
98101
// such that after warping the expanded region to cropsize x cropsize
99102
// there's exactly context_pad amount of padding on each side
@@ -105,6 +108,13 @@ void* WindowDataLayerPrefetch(void* layer_pointer) {
105108
Dtype half_width = static_cast<Dtype>(x2-x1+1)/2.0;
106109
Dtype center_x = static_cast<Dtype>(x1) + half_width;
107110
Dtype center_y = static_cast<Dtype>(y1) + half_height;
111+
if (use_square) {
112+
if (half_height > half_width) {
113+
half_width = half_height;
114+
} else {
115+
half_height = half_width;
116+
}
117+
}
108118
x1 = static_cast<int>(round(center_x - half_width*context_scale));
109119
x2 = static_cast<int>(round(center_x + half_width*context_scale));
110120
y1 = static_cast<int>(round(center_y - half_height*context_scale));
@@ -339,6 +349,8 @@ void WindowDataLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
339349
LOG(INFO) << "Amount of context padding: "
340350
<< this->layer_param_.context_pad();
341351

352+
LOG(INFO) << "Crop mode: " << this->layer_param_.crop_mode();
353+
342354
// image
343355
int cropsize = this->layer_param_.cropsize();
344356
CHECK_GT(cropsize, 0);

src/caffe/proto/caffe.proto

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,11 @@ message LayerParameter {
108108
// (used only by the window_data_layer)
109109
optional uint32 context_pad = 58 [default = 0];
110110

111+
// Mode for cropping out a detection window
112+
// warp: cropped window is warped to a fixed size and aspect ratio
113+
// square: the tightest square around the window is cropped
114+
optional string crop_mode = 59 [default = "warp"];
115+
111116
// For ReshapeLayer, one needs to specify the new dimensions.
112117
optional int32 new_num = 60 [default = 0];
113118
optional int32 new_channels = 61 [default = 0];

0 commit comments

Comments
 (0)