Open Sirius083 opened 5 years ago
Any update? Thanks a lot @Lyken17
My implementation of sparsenet-BC on cifar100 (d=100, k=32,64,128) validation error is 20.24%, which is 2% percent higher than 18.22%, I think it is due to the layer_per_stage(parameter in my implementaion is 17.34M, while is 16.7M in the github results, I Tried different layer-per-stage, however didnot get 16,7M), which the paper did not give. Since I want to cite your paper's results for my model, therefore I need to reimplement your network and get similar results first
Looking forward to your reply @Lyken17
For other network settings, I get similiary outputs in paper For example, model-BC d_100_k_24 is 22.71%(your), 22.97%(my) @Lyken17
@Lyken17
I asked the author through e-mail, and he says the parameter layer_per_stage was not enabled.
Hi, I add the densenet-bottleneck tensorflow version
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
import numpy as np
import tensorflow as tf
import argparse
import os
from tensorpack import *
from tensorpack.tfutils.symbolic_functions import *
from tensorpack.tfutils.summary import *
# import math
"""
CIFAR10 DenseNet example. See: http://arxiv.org/abs/1608.06993
Code is developed based on Yuxin Wu's ResNet implementation: https://github.com/ppwwyyxx/tensorpack/tree/master/examples/ResNet
Results using DenseNet (L=40, K=12) on Cifar10 with data augmentation: ~5.77% test error.
Running time:
On one TITAN X GPU (CUDA 7.5 and cudnn 5.1), the code should run ~5iters/s on a batch size 64.
Reimplementation notes:
"""
BATCH_SIZE = 64
class Model(ModelDesc):
def __init__(self, depth, growth_rate, fetch,
bottleneck, compression=1, dropout=0,
num_classes=10):
super(Model, self).__init__()
self.N = int((depth - 4) / 3)
self.growthRate = growth_rate
self.fetch = fetch
self.bottleneck = bottleneck
self.compression = compression
self.num_classes = num_classes
# print('*** self.fetch', self.fetch)
def _get_inputs(self):
return [InputDesc(tf.float32, [None, 32, 32, 3], 'input'),
InputDesc(tf.int32, [None], 'label')]
def _build_graph(self, input_vars):
image, label = input_vars
image = image / 128.0 - 1 # convert range to -1 ~ 1
from .utils import conv3x3, conv1x1, add_layer, add_layer_without_concat, add_transition, add_bottleneck_transition,add_bottleneck_without_concat
def dense_net(name, num_classes=10):
l = conv3x3('conv0', image, 16, 1)
from .utils import linearFetch, expFetch
if self.fetch == "dense":
fetch = linearFetch
else:
fetch = expFetch
# print('***fetch', fetch)
# stage 1
with tf.variable_scope('block1') as scope:
saved_activations = [l]
for i in range(self.N):
if not self.bottleneck:
l = add_layer_without_concat('densen_layer.{}'.format(i), l, self.growthRate[0])
saved_activations.append(l)
l = tf.concat(fetch(saved_activations), 3)
# l = add_transition('transition1', l)
else:
l = add_bottleneck_without_concat('densen_layer.{}'.format(i), l, self.growthRate[0])
saved_activations.append(l)
l = tf.concat(fetch(saved_activations), 3)
# l = add_bottleneck_transition('transition1', l)
if not self.bottleneck:
l = add_transition('transition1', l)
else:
l = add_bottleneck_transition('transition1', l)
# print('*** after stage1, l', l) # (?,16,16,96)
# stage 2
with tf.variable_scope('block2') as scope:
saved_activations = [l]
for i in range(self.N):
if not self.bottleneck:
l = add_layer_without_concat('densen_layer.{}'.format(i), l, self.growthRate[1])
saved_activations.append(l)
l = tf.concat(fetch(saved_activations), 3)
# l = add_transition('transition2', l)
else:
l = add_bottleneck_without_concat('densen_layer.{}'.format(i), l, self.growthRate[1])
# print('*** l', i, l)
saved_activations.append(l)
l = tf.concat(fetch(saved_activations), 3)
# l = add_bottleneck_transition('transition2', l)
if not self.bottleneck:
l = add_transition('transition2', l)
else:
l = add_bottleneck_transition('transition2', l)
# stage 3
with tf.variable_scope('block3') as scope:
saved_activations = [l]
for i in range(self.N):
if not self.bottleneck:
l = add_layer_without_concat('densen_layer.{}'.format(i), l, self.growthRate[2])
saved_activations.append(l)
l = tf.concat(fetch(saved_activations), 3)
else:
l = add_bottleneck_without_concat('densen_layer.{}'.format(i), l, self.growthRate[2])
saved_activations.append(l)
l = tf.concat(fetch(saved_activations), 3)
l = BatchNorm('bn_last', l)
l = tf.nn.relu(l)
l = GlobalAvgPooling('gap', l)
logits = FullyConnected('linear', l, out_dim=self.num_classes, nl=tf.identity)
return logits
logits = dense_net("dense_net")
prob = tf.nn.softmax(logits, name='output')
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
cost = tf.reduce_mean(cost, name='cross_entropy_loss')
# wrong = prediction_incorrect(logits, label)
wrong = tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), tf.float32, name='wrong_vector')
# monitor training error
add_moving_summary(tf.reduce_mean(wrong, name='train_error'))
# weight decay on all W
wd_cost = tf.multiply(1e-4, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost')
add_moving_summary(cost, wd_cost)
add_param_summary(('.*/W', ['histogram'])) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost')
def _get_optimizer(self):
lr = tf.get_variable('learning_rate', initializer=0.1, trainable=False)
tf.summary.scalar('learning_rate', lr)
return tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True)
However, the total parameter of the network is 4.087M on cifar100 and 4.052M on cifar10 Can you have a look at this program? Looking forward to your reply
Hi I notice there is a "layer_per_stage" parameter in cifar100 pytorch implementation: https://github.com/Lyken17/SparseNet/blob/master/src/pytorch/denselink.py#L154 model1-BC (depth=100, k1=16, k2=32, k3=64) model2-BC (depth=100, k1=32,k2=64,k3=128) What is the exact parameter in these two models? Thanks a lot