Solution: This issue has been fixed in NVIDIA/caffe in the lines starting here: https://github.com/NVIDIA/caffe/blob/caffe-0.17/src/caffe/net.cpp#L1161
I have placed the fix below. In this repo, in the file src/caffe/net.cpp you can find a function called CopyTrainedLayersFrom(). Copy that below code and replace in that file.
void Net::CopyTrainedLayersFrom(const NetParameter& param) {
int num_source_layers = param.layer_size();
for (int i = 0; i < num_source_layers; ++i) {
const LayerParameter& source_layer = param.layer(i);
const string& source_layer_name = source_layer.name();
const string& source_layer_type = source_layer.type();
const bool ignore_shapemismatch = ((solver==NULL) || solver_->param().ignore_shape_mismatch());
int target_layer_id = 0;
while (target_layer_id != layernames.size() &&
layernames[target_layer_id] != source_layer_name) {
++target_layer_id;
}
if (target_layer_id == layernames.size()) {
LOG(INFO) << "Ignoring source layer " << source_layer_name;
continue;
}
DLOG(INFO) << "Copying source layer " << source_layer_name;
vector<shared_ptr >& targetblobs =
layers[target_layer_id]->blobs();
CHECK_EQ(target_blobs.size(), source_layer.blobs_size())
<< "Incompatible number of blobs for layer " << source_layer_name;
LOG(INFO) << "Copying source layer " << source_layer_name << " Type:"
<< source_layer_type << " #blobs=" << source_layer.blobs_size();
// check if BN is in legacy DIGITS format?
if (source_layer_type == "BatchNorm") {
for (int j = 0; j < target_blobs.size(); ++j) {
const bool kReshape = true;
target_blobs[j]->FromProto(source_layer.blobs(j), kReshape);
}
if (source_layer.blobs_size() == 5 && target_blobs[4]->count() == 1) {
// old format: 0 - scale , 1 - bias, 2 - mean , 3 - var, 4 - reserved
// new format: 0 - mean , 1 - var, 2 - reserved , 3- scale, 4 - bias
LOG(INFO) << "BN legacy DIGITS format detected ... ";
std::swap(target_blobs[0], target_blobs[2]);
std::swap(target_blobs[1], target_blobs[3]);
// ==> 0 - mean , 1 -var, 2 - scale , 3 - bias; 4 - reserved
std::swap(target_blobs[2], target_blobs[4]);
std::swap(target_blobs[3], target_blobs[4]);
LOG(INFO) << "BN Transforming to new format completed.";
}
if (source_layer.blobs_size() == 3) {
const float scale_factor = target_blobs[2]->cpu_data()[0] == 0.F ?
0.F : 1.F / target_blobs[2]->cpu_data()[0];
caffe_cpu_scale(target_blobs[0]->count(), scale_factor,
target_blobs[0]->cpu_data(),
target_blobs[0]->mutable_cpu_data());
caffe_cpu_scale(target_blobs[1]->count(), scale_factor,
target_blobs[1]->cpu_data(),
target_blobs[1]->mutable_cpu_data());
target_blobs[2]->mutable_cpu_data()[0] = 1.F;
}
} else {
for (int j = 0; j < target_blobs.size(); ++j) {
if (!target_blobs[j]->ShapeEquals(source_layer.blobs(j))) {
shared_ptr source_blob = Blob::create(target_blobs[j]->data_type(),
target_blobs[j]->diff_type());
const bool kReshape = true;
source_blob->FromProto(source_layer.blobs(j), kReshape);
if(ignore_shape_mismatch) {
LOG(WARNING) << "Cannot copy param " << j << " weights from layer '"
<< source_layer_name << "'; shape mismatch. Source param shape is "
<< source_blob->shape_string() << "; target param shape is "
<< target_blobs[j]->shape_string() << ". "
<< "To learn this layer's parameters from scratch rather than "
<< "copying from a saved net, rename the layer.";
} else {
LOG(FATAL) << "Cannot copy param " << j << " weights from layer '"
<< source_layer_name << "'; shape mismatch. Source param shape is "
<< source_blob->shape_string() << "; target param shape is "
<< target_blobs[j]->shape_string() << ". "
<< "To learn this layer's parameters from scratch rather than "
<< "copying from a saved net, rename the layer.";
}
}
const bool kReshape = false;
target_blobs[j]->FromProto(source_layer.blobs(j), kReshape);
}
}
}
CopyQuantizationRangeInLayers();
}
There is a bug in the reading of Batch Norm weights that cause some models to produce incorrect results. These are example models that produce incorrect results: https://github.com/cvjena/cnn-models/tree/master/ResNet_preact
Solution: This issue has been fixed in NVIDIA/caffe in the lines starting here: https://github.com/NVIDIA/caffe/blob/caffe-0.17/src/caffe/net.cpp#L1161 I have placed the fix below. In this repo, in the file src/caffe/net.cpp you can find a function called CopyTrainedLayersFrom(). Copy that below code and replace in that file.
void Net::CopyTrainedLayersFrom(const NetParameter& param) { int num_source_layers = param.layer_size(); for (int i = 0; i < num_source_layers; ++i) { const LayerParameter& source_layer = param.layer(i); const string& source_layer_name = source_layer.name(); const string& source_layer_type = source_layer.type(); const bool ignore_shapemismatch = ((solver==NULL) || solver_->param().ignore_shape_mismatch()); int target_layer_id = 0; while (target_layer_id != layernames.size() && layernames[target_layer_id] != source_layer_name) { ++target_layer_id; } if (target_layer_id == layernames.size()) { LOG(INFO) << "Ignoring source layer " << source_layer_name; continue; } DLOG(INFO) << "Copying source layer " << source_layer_name; vector<shared_ptr >& targetblobs =
layers[target_layer_id]->blobs();
CHECK_EQ(target_blobs.size(), source_layer.blobs_size())
<< "Incompatible number of blobs for layer " << source_layer_name;
LOG(INFO) << "Copying source layer " << source_layer_name << " Type:"
<< source_layer_type << " #blobs=" << source_layer.blobs_size();
// check if BN is in legacy DIGITS format?
if (source_layer_type == "BatchNorm") {
for (int j = 0; j < target_blobs.size(); ++j) {
const bool kReshape = true;
target_blobs[j]->FromProto(source_layer.blobs(j), kReshape);
}
if (source_layer.blobs_size() == 5 && target_blobs[4]->count() == 1) {
// old format: 0 - scale , 1 - bias, 2 - mean , 3 - var, 4 - reserved
// new format: 0 - mean , 1 - var, 2 - reserved , 3- scale, 4 - bias
LOG(INFO) << "BN legacy DIGITS format detected ... ";
std::swap(target_blobs[0], target_blobs[2]);
std::swap(target_blobs[1], target_blobs[3]);
// ==> 0 - mean , 1 -var, 2 - scale , 3 - bias; 4 - reserved
std::swap(target_blobs[2], target_blobs[4]);
std::swap(target_blobs[3], target_blobs[4]);
LOG(INFO) << "BN Transforming to new format completed.";
}
if (source_layer.blobs_size() == 3) {
const float scale_factor = target_blobs[2]->cpu_data()[0] == 0.F ?
0.F : 1.F / target_blobs[2]->cpu_data()[0];
caffe_cpu_scale(target_blobs[0]->count(), scale_factor,
target_blobs[0]->cpu_data(),
target_blobs[0]->mutable_cpu_data());
caffe_cpu_scale(target_blobs[1]->count(), scale_factor,
target_blobs[1]->cpu_data(),
target_blobs[1]->mutable_cpu_data());
target_blobs[2]->mutable_cpu_data()[0] = 1.F;
}
} else {
for (int j = 0; j < target_blobs.size(); ++j) {
if (!target_blobs[j]->ShapeEquals(source_layer.blobs(j))) {
shared_ptr source_blob = Blob::create(target_blobs[j]->data_type(),
target_blobs[j]->diff_type());
const bool kReshape = true;
source_blob->FromProto(source_layer.blobs(j), kReshape);
if(ignore_shape_mismatch) {
LOG(WARNING) << "Cannot copy param " << j << " weights from layer '"
<< source_layer_name << "'; shape mismatch. Source param shape is "
<< source_blob->shape_string() << "; target param shape is "
<< target_blobs[j]->shape_string() << ". "
<< "To learn this layer's parameters from scratch rather than "
<< "copying from a saved net, rename the layer.";
} else {
LOG(FATAL) << "Cannot copy param " << j << " weights from layer '"
<< source_layer_name << "'; shape mismatch. Source param shape is "
<< source_blob->shape_string() << "; target param shape is "
<< target_blobs[j]->shape_string() << ". "
<< "To learn this layer's parameters from scratch rather than "
<< "copying from a saved net, rename the layer.";
}
}
const bool kReshape = false;
target_blobs[j]->FromProto(source_layer.blobs(j), kReshape);
}
}
}
CopyQuantizationRangeInLayers();
}