Open zl-comment opened 2 months ago
解决了问题 for d in data: if task == "sst2" or task == "cola": content = d['sentence'] elif task == 'qqp': content = 'Question 1: ' + \ d['question1'] + ' Question 2: ' + \ d['question2'] elif task == 'mnli' or task == 'mnli_matched' or task == 'mnli_mismatched': content = 'Premise: ' + \ d['premise'] + ' Hypothesis: ' + \ d['hypothesis'] elif task == 'qnli': content = 'Question: ' + \ d['question'] + ' Context: ' + \ d['sentence'] elif task == 'rte' or task == 'mrpc' or task == "wnli": content = 'Sentence 1: ' + \ d['sentence1']+' Sentence 2: ' + \ d['sentence2'] else: raise NotImplementedError
self.data.append({"content": content, "label": d['label']})
这个代码表述很准确但是在实际操作中,最后一句的self.data.append({"content": content, "label": d['label']})添加的标签并不是正确的0 1而是-1 因此需要加两个强制判断语句
Thank you very much for the contribution! We will look into this.
class GLUE(Dataset): def init(self, task, local_path=None): self.data = [] self.supported_tasks = ["sst2", "cola", "qqp", "mnli", "mnli_matched", "mnli_mismatched", "qnli", "wnli", "rte", "mrpc"] assert task in self.supported_tasks
self.task = task
if local_path:
if self.task == "mnli":
matched_df = pd.read_parquet(f"{local_path}/mnli_matched")
mismatched_df = pd.read_parquet(f"{local_path}/mnli_mismatched")
#将pandas dataframe转换为datasets.Dataset
matched=datasets.Dataset.from_pandas(matched_df)
mismatched=datasets.Dataset.from_pandas(mismatched_df)
from datasets import concatenate_datasets
data = concatenate_datasets([matched, mismatched])
else:
#读取其他任务的数据
df = pd.read_parquet(f"{local_path}/{task}")
data=datasets.Dataset.from_pandas(df)
else:
#从huggingface datasets加载数据 如果没有本地数据
if self.task == "mnli":
from datasets import concatenate_datasets
matched = load_dataset('glue', 'mnli')["validation_matched"]
mismatched = load_dataset("glue", "mnli")["validation_mismatched"]
data = concatenate_datasets([matched, mismatched])
else:
data = load_dataset("glue", task)["validation"]
for d in data:
if task == "sst2" or task == "cola":
content = d['sentence']
elif task == 'qqp':
content = 'Question 1: ' + \
d['question1'] + ' Question 2: ' + \
d['question2']
elif task == 'mnli' or task == 'mnli_matched' or task == 'mnli_mismatched':
content = 'Premise: ' + \
d['premise'] + ' Hypothesis: ' + \
d['hypothesis']
elif task == 'qnli':
content = 'Question: ' + \
d['question'] + ' Context: ' + \
d['sentence']
elif task == 'rte' or task == 'mrpc' or task == "wnli":
content = 'Sentence 1: ' + \
d['sentence1'] + ' Sentence 2: ' + \
d['sentence2']
else:
raise NotImplementedError
if(d['label']==1):
self.data.append({"content": content, "label": 1})
elif (d['label']==0):
self.data.append({"content": content, "label": 0})
class GLUE(Dataset): """ GLUE class is a dataset class for the General Language Understanding Evaluation benchmark, supporting multiple natural language understanding tasks.