niecongchong / DANet-keras

keras-Dual Attention Network for Scene Segmentation
67 stars 34 forks source link

PAM层中卷积层参数是否可以训练? #4

Open AlanLu0808 opened 4 years ago

AlanLu0808 commented 4 years ago

image 我将DA模块插入自己的网络中,效果好像没有提升。

但是我发现将PAM层插入自己的网络中,发现网络的可训练参数量只增加了1,这个1应该是gamma。PAM中的卷积层中的参数好像是不可训练的。这可能是个问题

zsk-tech commented 4 years ago

确实是,我也觉得这个卷积层的参数无法训练,请问您解决这个问题了吗?

wujiayi commented 4 years ago

我改写成这样可以了:

from keras.layers import Activation, Conv2D import keras.backend as K import tensorflow as tf from keras.layers import Layer

class PAM(Layer):

def __init__(self,
             beta_initializer   = tf.zeros_initializer(),
             beta_regularizer   = None,
             beta_constraint    = None,
             kernal_initializer = 'he_normal',
             kernal_regularizer = None,
             kernal_constraint  = None,
             **kwargs):
    super(PAM, self).__init__(**kwargs)

    self.beta_initializer = beta_initializer
    self.beta_regularizer = beta_regularizer
    self.beta_constraint  = beta_constraint

    self.kernal_initializer = kernal_initializer
    self.kernal_regularizer = kernal_regularizer
    self.kernal_constraint  = kernal_constraint

def build(self, input_shape):

    _, h, w, filters = input_shape

    self.beta = self.add_weight(shape=(1, ),
                                 initializer=self.beta_initializer,
                                 name='beta',
                                 regularizer=self.beta_regularizer,
                                 constraint=self. beta_constraint,
                                 trainable=True)
    #print(self.beta)

    self.kernel_b = self.add_weight(shape=(filters, filters // 8),
                                  initializer=self.kernal_initializer,
                                  name='kernel_b',
                                  regularizer=self.kernal_regularizer,
                                  constraint =self.kernal_constraint,
                                  trainable=True)

    self.kernel_c = self.add_weight(shape=(filters, filters // 8),
                                    initializer=self.kernal_initializer ,
                                    name='kernel_c',
                                    regularizer=self.kernal_regularizer,
                                    constraint=self.kernal_constraint,
                                    trainable=True)

    self.kernel_d = self.add_weight(shape=(filters, filters),
                                    initializer=self.kernal_initializer,
                                    name='kernel_d',
                                    regularizer=self.kernal_regularizer,
                                    constraint=self.kernal_constraint,
                                    trainable=True)

    self.built = True

def compute_output_shape(self, input_shape):
    return input_shape

def call(self, inputs):

    input_shape = inputs.get_shape().as_list()
    _, h, w, filters = input_shape

    b = K.dot(inputs, self.kernel_b)
    c = K.dot(inputs, self.kernel_c)
    d = K.dot(inputs, self.kernel_d)
    vec_b       = K.reshape(b, (-1, h * w, filters // 8))
    vec_cT      = K.permute_dimensions(K.reshape(c, (-1, h * w, filters // 8)), (0, 2, 1))
    bcT         = K.batch_dot(vec_b, vec_cT)
    softmax_bcT = Activation('softmax')(bcT)
    vec_d       = K.reshape(d, (-1, h * w, filters))
    bcTd        = K.batch_dot(softmax_bcT, vec_d)
    bcTd        = K.reshape(bcTd, (-1, h, w, filters))

    out   = self.beta*bcTd + inputs
    #print(self.beta)
    return out

class CAM(Layer):

def __init__(self,
             gamma_initializer=tf.zeros_initializer(),
             gamma_regularizer=None,
             gamma_constraint=None,
             **kwargs):
    super(CAM, self).__init__(**kwargs)
    self.gamma_initializer = gamma_initializer
    self.gamma_regularizer = gamma_regularizer
    self.gamma_constraint  = gamma_constraint

def build(self, input_shape):
    self.gamma = self.add_weight(shape=(1, ),
                                 initializer=self.gamma_initializer,
                                 name='gamma',
                                 regularizer=self.gamma_regularizer,
                                 constraint=self.gamma_constraint)
    #print(self.gamma)

    self.built = True

def compute_output_shape(self, input_shape):
    return input_shape

def call(self, inputs):
    input_shape = inputs.get_shape().as_list()
    _, h, w, filters = input_shape

    vec_a  = K.reshape(inputs, (-1, h * w, filters))
    vec_aT = K.permute_dimensions(K.reshape(vec_a, (-1, h * w, filters)), (0, 2, 1))
    aTa    = K.batch_dot(vec_aT, vec_a)
    softmax_aTa = Activation('softmax')(aTa)
    aaTa        = K.batch_dot(vec_a, softmax_aTa)
    aaTa        = K.reshape(aaTa, (-1, h, w, filters))

    out = self.gamma*aaTa + inputs
    #print(self.gamma)
    return out
AlanLu0808 commented 4 years ago

看起来挺好的!有测试吗?性能有提升吗?

我之前测试发现了这个问题,一直没有解决,就把这个问题搁置了。

------------------ 原始邮件 ------------------ 发件人: wujiayi <notifications@github.com> 发送时间: 2020年7月28日 16:35 收件人: niecongchong/DANet-keras <DANet-keras@noreply.github.com> 抄送: AlanLu0808 <1120509093@qq.com>, Mention <mention@noreply.github.com> 主题: 回复:[niecongchong/DANet-keras] PAM层中卷积层参数是否可以训练? (#4)

我改写成这样可以了:

from keras.layers import Activation, Conv2D import keras.backend as K import tensorflow as tf from keras.layers import Layer

class PAM(Layer): def init(self, beta_initializer = tf.zeros_initializer(), beta_regularizer = None, beta_constraint = None, kernal_initializer = 'he_normal', kernal_regularizer = None, kernal_constraint = None, kwargs): super(PAM, self).init(kwargs) self.beta_initializer = beta_initializer self.beta_regularizer = beta_regularizer self.beta_constraint = beta_constraint self.kernal_initializer = kernal_initializer self.kernal_regularizer = kernal_regularizer self.kernal_constraint = kernal_constraint def build(self, inputshape): , h, w, filters = input_shape self.beta = self.add_weight(shape=(1, ), initializer=self.beta_initializer, name='beta', regularizer=self.beta_regularizer, constraint=self. beta_constraint, trainable=True) #print(self.beta) self.kernel_b = self.add_weight(shape=(filters, filters // 8), initializer=self.kernal_initializer, name='kernel_b', regularizer=self.kernal_regularizer, constraint =self.kernal_constraint, trainable=True) self.kernel_c = self.add_weight(shape=(filters, filters // 8), initializer=self.kernal_initializer , name='kernel_c', regularizer=self.kernal_regularizer, constraint=self.kernal_constraint, trainable=True) self.kernel_d = self.add_weight(shape=(filters, filters), initializer=self.kernal_initializer, name='kernel_d', regularizer=self.kernal_regularizer, constraint=self.kernal_constraint, trainable=True) self.built = True def compute_output_shape(self, input_shape): return input_shape def call(self, inputs): input_shape = inputs.get_shape().aslist() , h, w, filters = input_shape b = K.dot(inputs, self.kernel_b) c = K.dot(inputs, self.kernel_c) d = K.dot(inputs, self.kernel_d) vec_b = K.reshape(b, (-1, h w, filters // 8)) vec_cT = K.permute_dimensions(K.reshape(c, (-1, h w, filters // 8)), (0, 2, 1)) bcT = K.batch_dot(vec_b, vec_cT) softmax_bcT = Activation('softmax')(bcT) vec_d = K.reshape(d, (-1, h w, filters)) bcTd = K.batch_dot(softmax_bcT, vec_d) bcTd = K.reshape(bcTd, (-1, h, w, filters)) out = self.betabcTd + inputs #print(self.beta) return out
class CAM(Layer): def init(self, gamma_initializer=tf.zeros_initializer(), gamma_regularizer=None, gamma_constraint=None, kwargs): super(CAM, self).init(kwargs) self.gamma_initializer = gamma_initializer self.gamma_regularizer = gamma_regularizer self.gamma_constraint = gamma_constraint def build(self, input_shape): self.gamma = self.add_weight(shape=(1, ), initializer=self.gamma_initializer, name='gamma', regularizer=self.gamma_regularizer, constraint=self.gamma_constraint) #print(self.gamma) self.built = True def compute_output_shape(self, input_shape): return input_shape def call(self, inputs): input_shape = inputs.get_shape().aslist() , h, w, filters = input_shape vec_a = K.reshape(inputs, (-1, h w, filters)) vec_aT = K.permute_dimensions(K.reshape(vec_a, (-1, h w, filters)), (0, 2, 1)) aTa = K.batch_dot(vec_aT, vec_a) softmax_aTa = Activation('softmax')(aTa) aaTa = K.batch_dot(vec_a, softmax_aTa) aaTa = K.reshape(aaTa, (-1, h, w, filters)) out = self.gamma*aaTa + inputs #print(self.gamma) return out
— You are receiving this because you were mentioned. Reply to this email directly, view it on GitHub, or unsubscribe.

wujiayi commented 4 years ago

看起来挺好的!有测试吗?性能有提升吗? 我之前测试发现了这个问题,一直没有解决,就把这个问题搁置了。 我试了,是可以train的,而且参数我也算了是对的。但是效果提升并不是很明显。不知道是不是还有别的什么问题。

karryor commented 4 years ago

你好,你改了之后有提升吗