yifanjiang19 / sppnet-pytorch

A simple Spatial Pyramid Pooling layer which could be added in CNN
Apache License 2.0
329 stars 133 forks source link

I want to know how to use your code. #6

Open Funny-dot opened 5 years ago

Funny-dot commented 5 years ago

I want to know how to use your code. This is my code `import tensorflow as tf import numpy as np import os from PIL import Image import random import math import torch import torch.nn as nn from torch.nn import init class CNN(object): def init(self, image_height, image_width, max_captcha, char_set, model_save_dir): self.image_height = image_height self.image_width = image_width self.max_captcha = max_captcha self.char_set = char_set self.char_set_len = len(char_set) self.model_save_dir = model_save_dir # 模型路径 with tf.name_scope('parameters'): self.w_alpha = 0.01 self.b_alpha = 0.1

tf初始化占位符

    with tf.name_scope('data'):
        self.X = tf.placeholder(tf.float32, [None, self.image_height * self.image_width])  # 特征向量
        self.Y = tf.placeholder(tf.float32, [None, self.max_captcha * self.char_set_len])  # 标签
        self.keep_prob = tf.placeholder(tf.float32)  # dropout值

@staticmethod
def convert2gray(img):

    if len(img.shape) > 2:
        r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]
        gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
        return gray
    else:
        return img

def text2vec(self, text):
    """
    转标签为oneHot编码
    :param text: str
    :return: numpy.array
    """
    text_len = len(text)
    if text_len > self.max_captcha:
        raise ValueError('验证码最长{}个字符'.format(self.max_captcha))

    vector = np.zeros(self.max_captcha * self.char_set_len)

    for i, ch in enumerate(text):
        idx = i * self.char_set_len + self.char_set.index(ch)
        vector[idx] = 1
    return vector

def spatial_pyramid_pool(self, previous_conv, num_sample, previous_conv_size, out_pool_size):
    '''
    previous_conv: a tensor vector of previous convolution layer
    num_sample: an int number of image in the batch
    previous_conv_size: an int vector [height, width] of the matrix features size of previous convolution layer
    out_pool_size: a int vector of expected output size of max pooling layer

    returns: a tensor vector with shape [1 x n] is the concentration of multi-level pooling
    '''
    # print(previous_conv.size())
    for i in range(len(out_pool_size)):
        # print(previous_conv_size)
        h_wid = int(math.ceil(previous_conv_size[0] / out_pool_size[i]))
        w_wid = int(math.ceil(previous_conv_size[1] / out_pool_size[i]))
        h_pad = (h_wid * out_pool_size[i] - previous_conv_size[0] + 1) / 2
        w_pad = (w_wid * out_pool_size[i] - previous_conv_size[1] + 1) / 2
        maxpool = nn.MaxPool2d((h_wid, w_wid), stride=(h_wid, w_wid), padding=(h_pad, w_pad))
        x = maxpool(previous_conv)
        if (i == 0):
            spp = x.view(num_sample, -1)
            # print("spp size:",spp.size())
        else:
            # print("size:",spp.size())
            spp = torch.cat((spp, x.view(num_sample, -1)), 1)
    return spp
def model(self):
    x = tf.reshape(self.X, shape=[-1, self.image_height, self.image_width, 1])
    print(">>> input x: {}".format(x))

    # Convolution layer1
    wc1 = tf.get_variable(name='wc1', shape=[3, 3, 1, 32], dtype=tf.float32,
                          initializer=tf.contrib.layers.xavier_initializer())
    bc1 = tf.Variable(self.b_alpha * tf.random_normal([32]))
    conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, wc1, strides=[1, 1, 1, 1], padding='SAME'), bc1))
    conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    conv1 = tf.nn.dropout(conv1, self.keep_prob)

    # Convolution layer 2
    wc2 = tf.get_variable(name='wc2', shape=[3, 3, 32, 64], dtype=tf.float32,
                          initializer=tf.contrib.layers.xavier_initializer())
    bc2 = tf.Variable(self.b_alpha * tf.random_normal([64]))
    conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv1, wc2, strides=[1, 1, 1, 1], padding='SAME'), bc2))
    conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    conv2 = tf.nn.dropout(conv2, self.keep_prob)

    # Convolution layer 3
    wc3 = tf.get_variable(name='wc3', shape=[3, 3, 64, 128], dtype=tf.float32,
                          initializer=tf.contrib.layers.xavier_initializer())
    bc3 = tf.Variable(self.b_alpha * tf.random_normal([128]))
    conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(conv2, wc3, strides=[1, 1, 1, 1], padding='SAME'), bc3))
    conv3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    conv3 = tf.nn.dropout(conv3, self.keep_prob)
    print(">>> convolution 3: ", conv3.shape)
    next_shape = conv3.shape[1] * conv3.shape[2] * conv3.shape[3]

    #I want to know how to use your code.

    #
    # Fully connected layer 1
    wd1 = tf.get_variable(name='wd1', shape=[next_shape, 1024], dtype=tf.float32,
                          initializer=tf.contrib.layers.xavier_initializer())
    bd1 = tf.Variable(self.b_alpha * tf.random_normal([1024]))
    dense = tf.reshape(conv3, [-1, wd1.get_shape().as_list()[0]])
    dense = tf.nn.relu(tf.add(tf.matmul(dense, wd1), bd1))
    dense = tf.nn.dropout(dense, self.keep_prob)

    # Fully connected layer 2
    wout = tf.get_variable('name', shape=[1024, self.max_captcha * self.char_set_len], dtype=tf.float32,
                           initializer=tf.contrib.layers.xavier_initializer())
    bout = tf.Variable(self.b_alpha * tf.random_normal([self.max_captcha * self.char_set_len]))

    with tf.name_scope('y_prediction'):
        y_predict = tf.add(tf.matmul(dense, wout), bout)

    return y_predict

`

Funny-dot commented 5 years ago

thank you.