See the console output for PyABSA, Torch, Transformers Version
### ABSADataset Version (Required if you use integrated datasets)
See the console output for ABSADataset Version
2023-09-22 18:02:43,259 INFO: Local dataset version: 2023.03.14
### Code To Reproduce (Required)
Paste buggy code here, **text-only, no screen shots here
def prepare_dependency_graph(dataset_list, graph_path, max_seq_len):
if 'train' in dataset_list[0].lower():
append_name = 'trainset{}x{}.graph'.format(max_seq_len, max_seq_len)
elif 'test' in dataset_list[0].lower():
append_name = 'testset{}x{}.graph'.format(max_seq_len, max_seq_len)
elif 'val' in dataset_list[0].lower():
append_name = 'valset{}x{}.graph'.format(max_seq_len, max_seq_len)
else:
append_name = 'unrecognizedset{}x{}.graph'.format(max_seq_len, max_seq_len)
graph_path = os.path.join(graph_path, append_name)
if os.path.isfile(graph_path):
return graph_path
idx2graph = {}
if os.path.isdir(graph_path):
fout = open(os.path.join(graph_path, append_name), 'wb')
graph_path = os.path.join(graph_path, append_name)
elif os.path.isfile(graph_path):
return graph_path
else:
fout = open(graph_path, 'wb')
for filename in dataset_list:
try:
print('parsing dependency matrix:', filename)
fin = open(filename, 'r', encoding='utf-8', newline='\n', errors='ignore')
lines = fin.readlines()
fin.close()
for i in tqdm.tqdm(range(0, len(lines), 3), postfix='Construct graph for {}'.format(filename)):
text_left, _, text_right = [s.strip() for s in lines[i].partition("$T$")]
aspect = lines[i + 1].strip()
adj_matrix = dependency_adj_matrix(text_left + ' ' + aspect + ' ' + text_right)
text = text_left + ' ' + aspect + ' ' + text_right
idx2graph[text.lower()] = adj_matrix
except Exception as e:
print(e)
print('unprocessed:', filename)
pickle.dump(idx2graph, fout)
fout.close()
return graph_path
### Full Console Output (Required)
KeyError Traceback (most recent call last)
Cell In[36], line 3
1 config.num_epoch = 1
2 config.model = APCModelList.DLCF_GCN
----> 3 trainer = APCTrainer(
4 config=config,
5 dataset=dataset,
6 from_checkpoint="english",
7 # if you want to resume training from our pretrained checkpoints, you can pass the checkpoint name here
8 auto_device=DeviceTypeOption.AUTO,
9 path_to_save=None, # set a path to save checkpoints, if it is None, save checkpoints at 'checkpoints' folder
10 checkpoint_save_mode=ModelSaveOption.SAVE_MODEL_STATE_DICT,
11 load_aug=False,
12 # there are some augmentation dataset for integrated datasets, you use them by setting load_aug=True to improve performance
13 )
Cell In[14], line 205, in Trainer._run(self)
203 self.config.seed = s
204 if self.config.checkpoint_save_mode:
--> 205 model_path.append(self.training_instructor(self.config).run())
206 else:
207 # always return the last trained model if you don't save trained model
208 model = self.inference_model_class(
209 checkpoint=self.training_instructor(self.config).run()
210 )
Cell In[34], line 15, in APCTrainingInstructor.init(self, config)
12 def init(self, config):
13 super().init(config)
---> 15 self._load_dataset_and_prepare_dataloader()
17 self._init_misc()
Please provide the REQUIRED information. Otherwise, It is almost impossible to locate the problem. DO NOT CHANGE THE FORM.
PyABSA Version (Required)
Python Version: 3.8.10 PyABSA Version: Requirement already satisfied: pyabsa in ./miniconda3/lib/python3.8/site-packages (2.3.3) Torch Version: Torch version: 2.0.0+cu118+cuda11.8 Transformers Version: Transformers version: 4.29.0 Other:
See the console output for PyABSA, Torch, Transformers Version
See the console output for ABSADataset Version
Paste buggy code here, **text-only, no screen shots here def prepare_dependency_graph(dataset_list, graph_path, max_seq_len): if 'train' in dataset_list[0].lower(): append_name = 'trainset{}x{}.graph'.format(max_seq_len, max_seq_len) elif 'test' in dataset_list[0].lower(): append_name = 'testset{}x{}.graph'.format(max_seq_len, max_seq_len) elif 'val' in dataset_list[0].lower(): append_name = 'valset{}x{}.graph'.format(max_seq_len, max_seq_len) else: append_name = 'unrecognizedset{}x{}.graph'.format(max_seq_len, max_seq_len)
KeyError Traceback (most recent call last) Cell In[36], line 3 1 config.num_epoch = 1 2 config.model = APCModelList.DLCF_GCN ----> 3 trainer = APCTrainer( 4 config=config, 5 dataset=dataset, 6 from_checkpoint="english", 7 # if you want to resume training from our pretrained checkpoints, you can pass the checkpoint name here 8 auto_device=DeviceTypeOption.AUTO, 9 path_to_save=None, # set a path to save checkpoints, if it is None, save checkpoints at 'checkpoints' folder 10 checkpoint_save_mode=ModelSaveOption.SAVE_MODEL_STATE_DICT, 11 load_aug=False, 12 # there are some augmentation dataset for integrated datasets, you use them by setting load_aug=True to improve performance 13 )
Cell In[35], line 46, in APCTrainer.init(self, config, dataset, from_checkpoint, checkpoint_save_mode, auto_device, path_to_save, load_aug) 41 self.config.task_code = TaskCodeOption.Aspect_Polarity_Classification 42 self.config.task_name = TaskNameOption().get( 43 TaskCodeOption.Aspect_Polarity_Classification 44 ) ---> 46 self._run()
Cell In[14], line 205, in Trainer._run(self) 203 self.config.seed = s 204 if self.config.checkpoint_save_mode: --> 205 model_path.append(self.training_instructor(self.config).run()) 206 else: 207 # always return the last trained model if you don't save trained model 208 model = self.inference_model_class( 209 checkpoint=self.training_instructor(self.config).run() 210 )
Cell In[34], line 15, in APCTrainingInstructor.init(self, config) 12 def init(self, config): 13 super().init(config) ---> 15 self._load_dataset_and_prepare_dataloader() 17 self._init_misc()
Cell In[34], line 3, in APCTrainingInstructor._load_dataset_and_prepare_dataloader(self) 2 def _load_dataset_and_prepare_dataloader(self): ----> 3 self.model = APCEnsembler(self.config) 4 self.tokenizer = self.model.tokenizer 6 self.train_set = self.model.train_set
Cell In[32], line 118, in APCEnsembler.init(self, config, load_dataset, **kwargs) 110 exit(-1) 112 if ( 113 load_dataset 114 and not os.path.exists(cache_path) 115 or self.config.overwrite_cache 116 ): 117 self.train_set = ( --> 118 ABSADataset(self.config, self.tokenizer, dataset_type="train") 119 if not self.train_set 120 else self.train_set 121 ) 122 self.test_set = ( 123 ABSADataset(self.config, self.tokenizer, dataset_type="test") 124 if not self.test_set 125 else self.test_set 126 ) 127 self.valid_set = ( 128 ABSADataset(self.config, self.tokenizer, dataset_type="valid") 129 if not self.valid_set 130 else self.valid_set 131 )
Cell In[27], line 164, in ABSADataset.init(self, config, tokenizer, dataset_type) 163 def init(self, config, tokenizer, dataset_type="train"): --> 164 super().init(config=config, tokenizer=tokenizer, dataset_type=dataset_type)
Cell In[24], line 53, in PyABSADataset.init(self, config, tokenizer, dataset_type, kwargs) 46 self.data = self.covert_to_tensor(self.data) 48 elif ( 49 self.config.get("dataset_file") 50 and dataset_type in self.config.dataset_file 51 and self.config.dataset_file[dataset_type] 52 ): ---> 53 self.load_data_from_file( 54 self.config.dataset_file, dataset_type=dataset_type, kwargs 55 ) 56 self.data = self.covert_to_tensor(self.data) 57 self.data = self.data[ 58 : self.config.get("data_num", None) 59 if self.config.get("data_num", None) 60 else None 61 ]
Cell In[27], line 143, in ABSADataset.load_data_from_file(self, file_path, **kwargs) 140 check_and_fix_labels(label_set, "polarity", all_data, self.config) 141 self.config.output_dim = len(label_set) --> 143 all_data = build_sentiment_window( 144 all_data, 145 self.tokenizer, 146 self.config.similarity_threshold, 147 input_demands=self.config.inputs_cols, 148 ) 149 for data in all_data: 150 cluster_ids = []
Cell In[18], line 45, in build_sentiment_window(examples, tokenizer, similarity_threshold, input_demands) 42 def build_sentiment_window( 43 examples, tokenizer, similarity_threshold, input_demands=None 44 ): ---> 45 copy_side_aspect("left", examples[0], examples[0], examples, input_demands) 46 for idx in range(1, len(examples)): 47 if is_similar( 48 examples[idx - 1]["text_indices"], 49 examples[idx]["text_indices"], 50 tokenizer=tokenizer, 51 similarity_threshold=similarity_threshold, 52 ):
Cell In[18], line 93, in copy_side_aspect(direct, target, source, examples, input_demands) 91 elif dataitem.startswith("right") or dataitem.startswith("left"): 92 continue ---> 93 target[direct + "_" + data_item] = source[data_item] 94 target[direct + "_dist"] = int( 95 abs( 96 np.average(list(source["aspect_position"])) 97 - np.average(list(target["aspect_position"])) 98 ) 99 )
KeyError: 'dependency_graph'
训练模型中运用到图卷积都无法生成图结构,无法往下训练。
我希望能够训练运用到图卷积的一些模型,但是DLCF_DCA_BERT模型和PyABSA都无法生成相关图结构,PyABSA-2应该是可以生成的,但是我没有成功。在运用新代码时运行from pyabsa import AspectPolarityClassification as APC就无法往下进行,所以选择按需导入代码块,但是一直未能完整的跑通一个模型(估计是无法导入PyABSAVersion才出的错,手动导入无效。由于无法使用from pyabsa import --version--方式导入,并赋值PyABSAVersion=‘--version--’,所以只能手动赋值了,但代码运行第三行没有按您的一样显示版本,最后到这一步也就停下来了)。如果您能帮我在前两种模型下能生成图结构,或者在最新模型快速导入失败时训练模型的方法,我将非常感激您!非常感谢您!