Incorrect results for certain models (Batch Normalization Issue)

There is a bug in the reading of Batch Norm weights that cause some models to produce incorrect results. These are example models that produce incorrect results: https://github.com/cvjena/cnn-models/tree/master/ResNet_preact

Solution: This issue has been fixed in NVIDIA/caffe in the lines starting here: https://github.com/NVIDIA/caffe/blob/caffe-0.17/src/caffe/net.cpp#L1161 I have placed the fix below. In this repo, in the file src/caffe/net.cpp you can find a function called CopyTrainedLayersFrom(). Copy that below code and replace in that file.

void Net::CopyTrainedLayersFrom(const NetParameter& param) { int num_source_layers = param.layer_size(); for (int i = 0; i < num_source_layers; ++i) { const LayerParameter& source_layer = param.layer(i); const string& source_layer_name = source_layer.name(); const string& source_layer_type = source_layer.type(); const bool ignore_shapemismatch = ((solver==NULL) || solver_->param().ignore_shape_mismatch()); int target_layer_id = 0; while (target_layer_id != layernames.size() && layernames[target_layer_id] != source_layer_name) { ++target_layer_id; } if (target_layer_id == layernames.size()) { LOG(INFO) << "Ignoring source layer " << source_layer_name; continue; } DLOG(INFO) << "Copying source layer " << source_layer_name; vector<shared_ptr >& targetblobs = layers[target_layer_id]->blobs(); CHECK_EQ(target_blobs.size(), source_layer.blobs_size()) << "Incompatible number of blobs for layer " << source_layer_name; LOG(INFO) << "Copying source layer " << source_layer_name << " Type:" << source_layer_type << " #blobs=" << source_layer.blobs_size(); // check if BN is in legacy DIGITS format? if (source_layer_type == "BatchNorm") { for (int j = 0; j < target_blobs.size(); ++j) { const bool kReshape = true; target_blobs[j]->FromProto(source_layer.blobs(j), kReshape); } if (source_layer.blobs_size() == 5 && target_blobs[4]->count() == 1) { // old format: 0 - scale , 1 - bias, 2 - mean , 3 - var, 4 - reserved // new format: 0 - mean , 1 - var, 2 - reserved , 3- scale, 4 - bias LOG(INFO) << "BN legacy DIGITS format detected ... "; std::swap(target_blobs[0], target_blobs[2]); std::swap(target_blobs[1], target_blobs[3]); // ==> 0 - mean , 1 -var, 2 - scale , 3 - bias; 4 - reserved std::swap(target_blobs[2], target_blobs[4]); std::swap(target_blobs[3], target_blobs[4]); LOG(INFO) << "BN Transforming to new format completed."; } if (source_layer.blobs_size() == 3) { const float scale_factor = target_blobs[2]->cpu_data()[0] == 0.F ? 0.F : 1.F / target_blobs[2]->cpu_data()[0]; caffe_cpu_scale(target_blobs[0]->count(), scale_factor, target_blobs[0]->cpu_data(), target_blobs[0]->mutable_cpu_data()); caffe_cpu_scale(target_blobs[1]->count(), scale_factor, target_blobs[1]->cpu_data(), target_blobs[1]->mutable_cpu_data()); target_blobs[2]->mutable_cpu_data()[0] = 1.F; } } else { for (int j = 0; j < target_blobs.size(); ++j) { if (!target_blobs[j]->ShapeEquals(source_layer.blobs(j))) { shared_ptr source_blob = Blob::create(target_blobs[j]->data_type(), target_blobs[j]->diff_type()); const bool kReshape = true; source_blob->FromProto(source_layer.blobs(j), kReshape); if(ignore_shape_mismatch) { LOG(WARNING) << "Cannot copy param " << j << " weights from layer '" << source_layer_name << "'; shape mismatch. Source param shape is " << source_blob->shape_string() << "; target param shape is " << target_blobs[j]->shape_string() << ". " << "To learn this layer's parameters from scratch rather than " << "copying from a saved net, rename the layer."; } else { LOG(FATAL) << "Cannot copy param " << j << " weights from layer '" << source_layer_name << "'; shape mismatch. Source param shape is " << source_blob->shape_string() << "; target param shape is " << target_blobs[j]->shape_string() << ". " << "To learn this layer's parameters from scratch rather than " << "copying from a saved net, rename the layer."; } } const bool kReshape = false; target_blobs[j]->FromProto(source_layer.blobs(j), kReshape); } } } CopyQuantizationRangeInLayers(); }

mathmanu / caffe-jacinto

Incorrect results for certain models (Batch Normalization Issue) #31