Open Rouchestand opened 1 year ago
Thanks for your positive comment. We follow the dataset split in MSCap and MemCap to train a TextCNN classifier. Details of splits and code of TextCNN are as follows:
class textCNN(nn.Module):
def __init__(self, kernel_num, vocab_size, kernel_size, embed_dim, dropout, class_num):
super(textCNN, self).__init__()
ci = 1 # input chanel size
self.embed = nn.Embedding(vocab_size, embed_dim, padding_idx=1)
self.conv11 = nn.Conv2d(ci, kernel_num, (kernel_size[0], embed_dim))
self.conv12 = nn.Conv2d(ci, kernel_num, (kernel_size[1], embed_dim))
self.conv13 = nn.Conv2d(ci, kernel_num, (kernel_size[2], embed_dim))
self.dropout = nn.Dropout(dropout)
self.fc1 = nn.Linear(len(kernel_size) * kernel_num, class_num)
def init_embed(self, embed_matrix):
self.embed.weight = nn.Parameter(torch.Tensor(embed_matrix))
@staticmethod
def conv_and_pool(x, conv):
# x: (batch, 1, sentence_length, )
x = conv(x)
# x: (batch, kernel_num, H_out, 1)
x = F.relu(x.squeeze(3))
# x: (batch, kernel_num, H_out)
x = F.max_pool1d(x, x.size(2)).squeeze(2)
# (batch, kernel_num)
return x
def forward(self, x):
# x: (batch, sentence_length)
x = self.embed(x)
# x: (batch, sentence_length, embed_dim)
# TODO init embed matrix with pre-trained
x = x.unsqueeze(1)
# x: (batch, 1, sentence_length, embed_dim)
x1 = self.conv_and_pool(x, self.conv11) # (batch, kernel_num)
x2 = self.conv_and_pool(x, self.conv12) # (batch, kernel_num)
x3 = self.conv_and_pool(x, self.conv13) # (batch, kernel_num)
x = torch.cat((x1, x2, x3), 1) # (batch, 3 * kernel_num)
x = self.dropout(x)
logit = self.fc1(x)
return logit
def get_batch_captions_style_scores(self, captions, tokenizer, device):
input_ids = tokenizer.batch_encode_plus(captions, padding=True)['input_ids']
input_ids_ = torch.tensor(input_ids).to(device)
logits = self.forward(input_ids_)
probs = F.softmax(logits, dim=-1)
predicts = logits.argmax(-1)
return probs, predicts
def init_weight(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
if m.bias is not None:
m.bias.data.zero_()
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.weight.data.normal_(0, 0.01)
m.bias.data.zero_()
Your reply has been a great help to me. Thank you very much.
Dear author, I am reproduce your experiment of table 4. I set the parameter sentimenttype ,candidate k, num iterations, α, β ,γ, sentence len is positive, 200, 15, 0.02, 2, 5, and 10. The test results show that BLEU-3 is 1.45, MENTOR is 7.65, and CLIP-S is 0.99, which is a little different from the results in Table 4. In addition, I trained a texCNN sentiment classifier to calculate Acc, but I was unable to obtain the Acc in the table. Could you tell me more about the experimental details in Table 4 and provide the complete code for calculating Acc?
Dear author, I have the same question about the results in Table 4, could you provide the complete code for training of TextCNN classifier and calculating Acc?
Can you tell me where the factual captions came from? Did you randomly choose from the coco test or somewhere else?
Dear author, I am reproduce your experiment of table 4. I set the parameter sentimenttype ,candidate k, num iterations, α, β ,γ, sentence len is positive, 200, 15, 0.02, 2, 5, and 10. The test results show that BLEU-3 is 1.45, MENTOR is 7.65, and CLIP-S is 0.99, which is a little different from the results in Table 4. In addition, I trained a texCNN sentiment classifier to calculate Acc, but I was unable to obtain the Acc in the table. Could you tell me more about the experimental details in Table 4 and provide the complete code for calculating Acc?
Can you tell me where the factual captions came from? Did you randomly choose from the coco test or somewhere else?
Dear author, your paper is very creative and I am very interested in it. I am preparing to reproduce your experiment, but the code you provided does not seem to explain how to use the SentiCap dataset and FlickrStyle10k dataset. Can you give me some guidance? I would appreciate it if you could take the time to reply to me.