How to Reproduce the Transfer Task (MCNC) Result?

YuboFeng2023 commented 1 year ago

Hi,

How to reproduce the transfer task, Multiple Choice Narrative Cloze, (MCNC) result?

Based on the instruction of the readme.md and the test.sh, I got the same result as the paper, in the Event Similarity Tasks.

But there is no description, no manual, and no dataset for MCNC task. So could you please release some information for this~

Thank you!

imgaojun commented 1 year ago

Hi, we used the same test set and evaluation metric provided by the previous work (https://arxiv.org/pdf/1805.05081.pdf) for the MCNC task, where the code and data are available at https://github.com/eecrazy/ConstructingNEEG_IJCAI_2018

YuboFeng2023 commented 1 year ago

Thanks for your reply!

But it is still confusing me...

hope those codes can be released in this repository.

imgaojun commented 1 year ago

You can refer to the following code:

import argparse
from functools import reduce
import importlib
from typing import Any
from warnings import catch_warnings

import torch
import torch.nn as nn
from texar.torch.run import *
from pathlib import Path
import misc_utils
from misc_utils import init_logger, logger

import texar.torch as tx

from model import SWCC
import data_utils
import numpy as np
import torch.nn.functional as F
from sklearn.metrics.pairwise import cosine_similarity,euclidean_distances,manhattan_distances,nan_euclidean_distances

import pickle
from tqdm import tqdm
import csv
parser = argparse.ArgumentParser()
parser.add_argument(
    '--config-model', type=str, default="config_model",
    help="The model config.")
parser.add_argument(
    '--config-data', type=str, default="config_data",
    help="The dataset config.")
parser.add_argument(
    "--do-train", action="store_true", help="Whether to run training.")
parser.add_argument(
    "--do-eval", action="store_true",
    help="Whether to run eval on the dev set.")
parser.add_argument(
    "--output-dir", type=str, default="./outputs1/",
    help="Path to save the trained model and logs.")
parser.add_argument(
    "--log-file", type=str, default="exp.log",
    help="Path to save the trained model and logs.")

parser.add_argument(
    '--checkpoint', type=str, default=None,
    help="Model checkpoint to load model weights from.")
args = parser.parse_args()

config_model: Any = importlib.import_module(args.config_model)
config_data: Any = importlib.import_module(args.config_data)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

make_deterministic(config_model.random_seed)

train_data = data_utils.TrainData(config_data.train_hparams,device=device)
model = SWCC(config_model=config_model, config_data=config_data)

if args.checkpoint:
    print(f"loading checkpoint {args.checkpoint}...")
    model.load_state_dict(torch.load(args.checkpoint)['model'])
model.to(device)

tokenizer = tx.data.BERTTokenizer(pretrained_model_name="bert-base-uncased")
test_data = pickle.load(open("corpus_index_test", "rb"))
# cosine_similarity = nn.CosineSimilarity(dim=1, eps=1e-6)
def get_events(raw_list):
    all_events = []
    for ex in raw_list:
        event = []
        selected_id = [3,0,4,5]
        for i in selected_id:
            if ex[i] is not None:
                event.append(ex[i].replace('+',' '))

        all_events.append(event)
    return all_events

def convert_events_to_embeddings(events):
    events = [data_utils.map_evt_to_tokens_for_text(evt) for evt in events]    
    evt_ids = [tokenizer.map_text_to_id(evt) for evt in events]
    evt_ids, evt_lengths = tx.data.padded_batch(
            evt_ids, pad_value=tokenizer.map_token_to_id(tokenizer.pad_token))

    evt_ids = torch.from_numpy(evt_ids).to(device)
    evt_lengths = torch.tensor(evt_lengths).to(device)
    # evt_emb = model.encoder_q.get_glove_embedding(evt_ids,evt_lengths)
    evt_emb = model.encoder_q(evt_ids,evt_lengths)
    return evt_emb

model.eval()
results = {1:[],2:[],3:[],4:[],5:[]}

with open('outputs.csv', 'w', newline='') as csvfile:
    fieldnames = ['context', 'choices', 'answer', 'label']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for example in tqdm(test_data):
        context = get_events(example[0])
        candidates = get_events(example[1])
        label = example[2]

        context_embs = convert_events_to_embeddings(context)
        cand_embs = convert_events_to_embeddings(candidates)
        context_emb = context_embs.sum(dim=0,keepdim=True)
        # sim_scores = cosine_similarity(cand_embs.tolist(), cand_embs.tolist())
        sim_scores = cosine_similarity(context_emb.tolist(), cand_embs.tolist())

        # sim_scores = sim_scores.mean(axis=1)

        topk_preds = np.argsort(-sim_scores[0])
        # print(topk_preds)
        # raise BaseException("break")
        pred = topk_preds[0]
        if pred == label:
            writer.writerow({'context': '\n'.join([' '.join(ex) for ex in context]), 'choices': '\n'.join([' '.join(ex) for ex in candidates]), 'answer': ' '.join(candidates[label]),'label':1})
        else:
            writer.writerow({'context': '\n'.join([' '.join(ex) for ex in context]), 'choices': '\n'.join([' '.join(ex) for ex in candidates]), 'answer': ' '.join(candidates[label]),'label':0})
        for k in range(1,6):
            if label in topk_preds[:k]:
                results[k].append(1)
            else:
                results[k].append(0)
    print(sum(results[1])/len(results[1]))
    print(sum(results[2])/len(results[2]))
    print(sum(results[3])/len(results[3]))
    print(sum(results[4])/len(results[4]))
    print(sum(results[5])/len(results[5]))

YuboFeng2023 commented 1 year ago

Really appreciate!

Let me have a try with it!

YuboFeng2023 commented 1 year ago

Hi！

I appreciate the code and data that you offered!

According to the paper, I set hyper-parameters as follows:

batch size = 256
learning rate (event representation) = 2e-7
learning rate (prototype memory) = 2e-5
temperature = 0.3
prototypes numbers = 10
epoch = 2

Finally, the model reached the performance with the paper on the Event Similarity Task. But the model can not achieve the same performance as the paper on MCNC Task.

Could you please release the detailed manual to reproduce the performance of the MCNC task?

imgaojun commented 1 year ago

Could you report the results obtained by the original pretrained BERT?

imgaojun commented 1 year ago

I checked our paper and I am very sorry that I provided you with incorrect data.

We used the data provided by this paper (Multi-relational script learning for discourse relations) and the data is available at https://github.com/doug919/multi_relational_script_learning

I apologize for any inconvenience or frustration that my mistake has caused

YuboFeng2023 commented 1 year ago

I checked our paper and I am very sorry that I provided you with incorrect data.

We used the data provided by this paper (Multi-relational script learning for discourse relations) and the data is available at https://github.com/doug919/multi_relational_script_learning

I apologize for any inconvenience or frustration that my mistake has caused

Thanks for your reply~

which part of the data you used from this paper? the training set, dev set, or test set?

Best wishes!

imgaojun commented 1 year ago

Just test set. Note that we didn't fine-tune our model on the MCNC training set. For the MCNC task, we train our model on the NYT corpus and directly test the model on the MCNC test set.

YuboFeng2023 commented 1 year ago

Hi,

The evaluation script you have released, which is corresponds to Constructing Narrative Event Evolutionary Graph for Script Event Prediction.

Since you have updated the MCNC test data, and those two data formats are different.

So could you please release the new evaluation script to adapt to the new data format of Multi-Relation Script Learning for Discourse Relations?

Thank you very much!

imgaojun commented 1 year ago

The experimental results and data are on the previous company's server, which I currently do not have access to. Sorry I'm a bit confused which script I used. Can you tell me the results you got on MCNC data (https://github.com/eecrazy/ConstructingNEEG_IJCAI_2018) with this code so I can help you find the problem.

You can refer to the following code:

import argparse
from functools import reduce
import importlib
from typing import Any
from warnings import catch_warnings

import torch
import torch.nn as nn
from texar.torch.run import *
from pathlib import Path
import misc_utils
from misc_utils import init_logger, logger

import texar.torch as tx

from model import SWCC
import data_utils
import numpy as np
import torch.nn.functional as F
from sklearn.metrics.pairwise import cosine_similarity,euclidean_distances,manhattan_distances,nan_euclidean_distances

import pickle
from tqdm import tqdm
import csv
parser = argparse.ArgumentParser()
parser.add_argument(
    '--config-model', type=str, default="config_model",
    help="The model config.")
parser.add_argument(
    '--config-data', type=str, default="config_data",
    help="The dataset config.")
parser.add_argument(
    "--do-train", action="store_true", help="Whether to run training.")
parser.add_argument(
    "--do-eval", action="store_true",
    help="Whether to run eval on the dev set.")
parser.add_argument(
    "--output-dir", type=str, default="./outputs1/",
    help="Path to save the trained model and logs.")
parser.add_argument(
    "--log-file", type=str, default="exp.log",
    help="Path to save the trained model and logs.")

parser.add_argument(
    '--checkpoint', type=str, default=None,
    help="Model checkpoint to load model weights from.")
args = parser.parse_args()

config_model: Any = importlib.import_module(args.config_model)
config_data: Any = importlib.import_module(args.config_data)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

make_deterministic(config_model.random_seed)

train_data = data_utils.TrainData(config_data.train_hparams,device=device)
model = SWCC(config_model=config_model, config_data=config_data)

if args.checkpoint:
    print(f"loading checkpoint {args.checkpoint}...")
    model.load_state_dict(torch.load(args.checkpoint)['model'])
model.to(device)

tokenizer = tx.data.BERTTokenizer(pretrained_model_name="bert-base-uncased")
test_data = pickle.load(open("corpus_index_test", "rb"))
# cosine_similarity = nn.CosineSimilarity(dim=1, eps=1e-6)
def get_events(raw_list):
    all_events = []
    for ex in raw_list:
        event = []
        selected_id = [3,0,4,5]
        for i in selected_id:
            if ex[i] is not None:
                event.append(ex[i].replace('+',' '))

        all_events.append(event)
    return all_events

def convert_events_to_embeddings(events):
    events = [data_utils.map_evt_to_tokens_for_text(evt) for evt in events]    
    evt_ids = [tokenizer.map_text_to_id(evt) for evt in events]
    evt_ids, evt_lengths = tx.data.padded_batch(
            evt_ids, pad_value=tokenizer.map_token_to_id(tokenizer.pad_token))

    evt_ids = torch.from_numpy(evt_ids).to(device)
    evt_lengths = torch.tensor(evt_lengths).to(device)
    # evt_emb = model.encoder_q.get_glove_embedding(evt_ids,evt_lengths)
    evt_emb = model.encoder_q(evt_ids,evt_lengths)
    return evt_emb

model.eval()
results = {1:[],2:[],3:[],4:[],5:[]}

with open('outputs.csv', 'w', newline='') as csvfile:
    fieldnames = ['context', 'choices', 'answer', 'label']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()

    for example in tqdm(test_data):
        context = get_events(example[0])
        candidates = get_events(example[1])
        label = example[2]

        context_embs = convert_events_to_embeddings(context)
        cand_embs = convert_events_to_embeddings(candidates)
        context_emb = context_embs.sum(dim=0,keepdim=True)
        # sim_scores = cosine_similarity(cand_embs.tolist(), cand_embs.tolist())
        sim_scores = cosine_similarity(context_emb.tolist(), cand_embs.tolist())

        # sim_scores = sim_scores.mean(axis=1)

        topk_preds = np.argsort(-sim_scores[0])
        # print(topk_preds)
        # raise BaseException("break")
        pred = topk_preds[0]
        if pred == label:
            writer.writerow({'context': '\n'.join([' '.join(ex) for ex in context]), 'choices': '\n'.join([' '.join(ex) for ex in candidates]), 'answer': ' '.join(candidates[label]),'label':1})
        else:
            writer.writerow({'context': '\n'.join([' '.join(ex) for ex in context]), 'choices': '\n'.join([' '.join(ex) for ex in candidates]), 'answer': ' '.join(candidates[label]),'label':0})
        for k in range(1,6):
            if label in topk_preds[:k]:
                results[k].append(1)
            else:
                results[k].append(0)
    print(sum(results[1])/len(results[1]))
    print(sum(results[2])/len(results[2]))
    print(sum(results[3])/len(results[3]))
    print(sum(results[4])/len(results[4]))
    print(sum(results[5])/len(results[5]))

imgaojun / SWCC4Event

How to Reproduce the Transfer Task (MCNC) Result? #2