microsoft / promptbench

A unified evaluation framework for large language models
http://aka.ms/promptbench
MIT License
2.46k stars 183 forks source link

In order to be able to read the local GLUE data set, I modified the GLUE code so that the attack evaluation can be carried out, but the current score of the output is always 0. I want to know why? #82

Open zl-comment opened 2 months ago

zl-comment commented 2 months ago

class GLUE(Dataset): """ GLUE class is a dataset class for the General Language Understanding Evaluation benchmark, supporting multiple natural language understanding tasks.

Examples:
[{'content': "it 's a charming and often affecting journey . ", 'label': 1}, {'content': 'unflinchingly bleak and desperate ', 'label': 0}, ...]
"""
# def __init__(self, task):
#     self.data = []
#     self.supported_tasks = ["sst2", "cola", "qqp", "mnli",
#                             "mnli_matched", "mnli_mismatched", "qnli", "wnli", "rte", "mrpc"]
#     assert task in self.supported_tasks
#
#     self.task = task
#
#     if self.task == "mnli":
#         from datasets import concatenate_datasets
#         matched = load_dataset('glue', 'mnli')["validation_matched"]
#         mismatched = load_dataset("glue", "mnli")["validation_mismatched"]
#         data = concatenate_datasets([matched, mismatched])
#     else:
#         data = load_dataset("glue", task)["validation"]
#
#     for d in data:
#         if task == "sst2" or task == "cola":
#             content = d['sentence']
#         elif task == 'qqp':
#             content = 'Question 1: ' + \
#                 d['question1'] + ' Question 2: ' + \
#                 d['question2']
#         elif task == 'mnli' or task == 'mnli_matched' or task == 'mnli_mismatched':
#             content = 'Premise: ' + \
#                 d['premise'] + ' Hypothesis: ' + \
#                 d['hypothesis']
#         elif task == 'qnli':
#             content = 'Question: ' + \
#                 d['question'] + ' Context: ' + \
#                 d['sentence']
#         elif task == 'rte' or task == 'mrpc' or task == "wnli":
#             content = 'Sentence 1: ' + \
#                 d['sentence1'] + ' Sentence 2: ' + \
#                 d['sentence2']
#         else:
#             raise NotImplementedError
#
#         self.data.append({"content": content, "label": d['label']})

def __init__(self, task, local_path=None):
    self.data = []
    self.supported_tasks = ["sst2", "cola", "qqp", "mnli",
                            "mnli_matched", "mnli_mismatched", "qnli", "wnli", "rte", "mrpc"]
    assert task in self.supported_tasks

    self.task = task

    if local_path:
        if self.task == "mnli":
            matched_df = pd.read_parquet(f"{local_path}/mnli_matched")
            mismatched_df = pd.read_parquet(f"{local_path}/mnli_mismatched")
            #将pandas dataframe转换为datasets.Dataset
            matched=datasets.Dataset.from_pandas(matched_df)
            mismatched=datasets.Dataset.from_pandas(mismatched_df)

            from datasets import concatenate_datasets
            data = concatenate_datasets([matched, mismatched])
        else:
            #读取其他任务的数据
            df = pd.read_parquet(f"{local_path}/{task}")
            data=datasets.Dataset.from_pandas(df)
    else:
        #从huggingface datasets加载数据 如果没有本地数据
        if self.task == "mnli":
            from datasets import concatenate_datasets
            matched = load_dataset('glue', 'mnli')["validation_matched"]
            mismatched = load_dataset("glue", "mnli")["validation_mismatched"]
            data = concatenate_datasets([matched, mismatched])
        else:
            data = load_dataset("glue", task)["validation"]

    for d in data:
        if task == "sst2" or task == "cola":
            content = d['sentence']
        elif task == 'qqp':
            content = 'Question 1: ' + \
                d['question1'] + ' Question 2: ' + \
                d['question2']
        elif task == 'mnli' or task == 'mnli_matched' or task == 'mnli_mismatched':
            content = 'Premise: ' + \
                d['premise'] + ' Hypothesis: ' + \
                d['hypothesis']
        elif task == 'qnli':
            content = 'Question: ' + \
                d['question'] + ' Context: ' + \
                d['sentence']
        elif task == 'rte' or task == 'mrpc' or task == "wnli":
            content = 'Sentence 1: ' + \
                d['sentence1']+' Sentence 2: ' + \
                d['sentence2']
        else:
            raise NotImplementedError

        self.data.append({"content": content, "label": d['label']})
zl-comment commented 2 months ago

解决了问题 for d in data: if task == "sst2" or task == "cola": content = d['sentence'] elif task == 'qqp': content = 'Question 1: ' + \ d['question1'] + ' Question 2: ' + \ d['question2'] elif task == 'mnli' or task == 'mnli_matched' or task == 'mnli_mismatched': content = 'Premise: ' + \ d['premise'] + ' Hypothesis: ' + \ d['hypothesis'] elif task == 'qnli': content = 'Question: ' + \ d['question'] + ' Context: ' + \ d['sentence'] elif task == 'rte' or task == 'mrpc' or task == "wnli": content = 'Sentence 1: ' + \ d['sentence1']+' Sentence 2: ' + \ d['sentence2'] else: raise NotImplementedError

    self.data.append({"content": content, "label": d['label']})

这个代码表述很准确但是在实际操作中,最后一句的self.data.append({"content": content, "label": d['label']})添加的标签并不是正确的0 1而是-1 因此需要加两个强制判断语句

Immortalise commented 2 months ago

Thank you very much for the contribution! We will look into this.

zl-comment commented 1 month ago

class GLUE(Dataset): def init(self, task, local_path=None): self.data = [] self.supported_tasks = ["sst2", "cola", "qqp", "mnli", "mnli_matched", "mnli_mismatched", "qnli", "wnli", "rte", "mrpc"] assert task in self.supported_tasks

    self.task = task

    if local_path:
        if self.task == "mnli":
            matched_df = pd.read_parquet(f"{local_path}/mnli_matched")
            mismatched_df = pd.read_parquet(f"{local_path}/mnli_mismatched")
            #将pandas dataframe转换为datasets.Dataset
            matched=datasets.Dataset.from_pandas(matched_df)
            mismatched=datasets.Dataset.from_pandas(mismatched_df)

            from datasets import concatenate_datasets
            data = concatenate_datasets([matched, mismatched])
        else:
            #读取其他任务的数据
            df = pd.read_parquet(f"{local_path}/{task}")
            data=datasets.Dataset.from_pandas(df)
    else:
        #从huggingface datasets加载数据 如果没有本地数据
        if self.task == "mnli":
            from datasets import concatenate_datasets
            matched = load_dataset('glue', 'mnli')["validation_matched"]
            mismatched = load_dataset("glue", "mnli")["validation_mismatched"]
            data = concatenate_datasets([matched, mismatched])
        else:
            data = load_dataset("glue", task)["validation"]

    for d in data:
        if task == "sst2" or task == "cola":
            content = d['sentence']

        elif task == 'qqp':
            content = 'Question 1: ' + \
                d['question1'] + ' Question 2: ' + \
                d['question2']
        elif task == 'mnli' or task == 'mnli_matched' or task == 'mnli_mismatched':
            content = 'Premise: ' + \
                d['premise'] + ' Hypothesis: ' + \
                d['hypothesis']
        elif task == 'qnli':
            content = 'Question: ' + \
                d['question'] + ' Context: ' + \
                d['sentence']
        elif task == 'rte' or task == 'mrpc' or task == "wnli":
            content = 'Sentence 1: ' + \
                d['sentence1'] + ' Sentence 2: ' + \
                d['sentence2']
        else:
            raise NotImplementedError

        if(d['label']==1):
            self.data.append({"content": content, "label": 1})
        elif (d['label']==0):
            self.data.append({"content": content, "label": 0})