I want to train a model myself and I follow your default settings in train.py. But I met with a problem and I couldn't solve it.
I run the train.py and the exception is as follows:
Training a model from scratch
Extracting...
method_name='fairseq_train_and_evaluate'
args=()
kwargs={'arch': 'transformer', 'warmup_updates': 4000, 'parametrization_budget': 256, 'beam': 8, 'dataset': 'wikilarge', 'dropout': 0.2, 'fp16': False, 'label_smoothing': 0.54, 'lr': 0.00011, 'lr_scheduler': 'fixed', 'max_epoch': 100, 'max_tokens': 5000, 'metrics_coefs': [0, 1, 0], 'optimizer': 'adam', 'preprocessors_kwargs': {'LengthRatioPreprocessor': {'target_ratio': 0.8}, 'LevenshteinPreprocessor': {'target_ratio': 0.8}, 'WordRankRatioPreprocessor': {'target_ratio': 0.8}, 'DependencyTreeDepthRatioPreprocessor': {'target_ratio': 0.8}, 'SentencePiecePreprocessor': {'vocab_size': 10000}}}
Creating /home/access-main/resources/datasets/_f56b9888a6a6550a1d060813416e5298...
Creating preprocessed dataset with LengthRatioPreprocessor(target_ratio=0.8): wikilarge -> _f56b9888a6a6550a1d060813416e5298
Creating /home/access-main/resources/datasets/_d6002a05838f1e5b3a3fc0d98c9fa7bd...
Creating preprocessed dataset with LevenshteinPreprocessor(bucket_size=0.05, noise_std=0, target_ratio=0.8): _f56b9888a6a6550a1d060813416e5298 -> _d6002a05838f1e5b3a3fc0d98c9fa7bd
Creating /home/access-main/resources/datasets/_e382828c4d4db04ef23094dbd9e38f9c...
Creating preprocessed dataset with WordRankRatioPreprocessor(target_ratio=0.8): _d6002a05838f1e5b3a3fc0d98c9fa7bd -> _e382828c4d4db04ef23094dbd9e38f9c
Error: Rolling back creation of directory /home/access-main/resources/datasets/_e382828c4d4db04ef23094dbd9e38f9c
Traceback (most recent call last):
File "./scripts/train.py", line 49, in
fairseq_train_and_evaluate(kwargs)
File "/home/access-main/access/utils/training.py", line 18, in wrapped_func
return func(*args, *kwargs)
File "/home/access-main/access/utils/training.py", line 29, in wrapped_func
return func(args, kwargs)
File "/home/access-main/access/utils/training.py", line 38, in wrapped_func
result = func(*args, *kwargs)
File "/home/access-main/access/utils/training.py", line 50, in wrapped_func
result = func(args, **kwargs)
File "/home/access-main/access/fairseq/main.py", line 117, in fairseq_train_and_evaluate
dataset = create_preprocessed_dataset(dataset, preprocessors, n_jobs=1)
File "/home/access-main/access/resources/datasets.py", line 72, in create_preprocessed_dataset
dataset = create_preprocessed_dataset_one_preprocessor(dataset, preprocessor, n_jobs)
File "/home/access-main/access/resources/datasets.py", line 55, in create_preprocessed_dataset_one_preprocessor
new_filepaths_dict[phase, 'complex'], new_filepaths_dict[phase, 'simple'])
File "/home/access-main/access/preprocessors.py", line 144, in encode_file_pair
output_files.write(self.encode_sentence_pair(complex_line, simple_line))
File "/home/access-main/access/preprocessors.py", line 244, in encode_sentence_pair
remove_special_tokens(simple_sentence))))
File "/home/access-main/access/preprocessors.py", line 277, in get_feature_value
return min(safe_division(self.feature_extractor(simple_sentence), self.feature_extractor(complex_sentence)), 2)
File "/home/access-main/access/feature_extraction.py", line 44, in get_lexical_complexity_score
words = [word for word in words if word in get_word2rank()]
File "/home/access-main/access/feature_extraction.py", line 44, in
words = [word for word in words if word in get_word2rank()]
File "/home/access-main/access/feature_extraction.py", line 25, in get_word2rank
next(line_generator) # Skip the first line (header)
File "/home/access-main/access/utils/helpers.py", line 77, in yield_lines
with open(filepath, 'r') as f:
IsADirectoryError: [Errno 21] Is a directory: '/home/access-main/resources/various/fasttext-vectors/wiki.en.vec'
Hi:
I want to train a model myself and I follow your default settings in train.py. But I met with a problem and I couldn't solve it.
I run the train.py and the exception is as follows:
Training a model from scratch Extracting... method_name='fairseq_train_and_evaluate' args=() kwargs={'arch': 'transformer', 'warmup_updates': 4000, 'parametrization_budget': 256, 'beam': 8, 'dataset': 'wikilarge', 'dropout': 0.2, 'fp16': False, 'label_smoothing': 0.54, 'lr': 0.00011, 'lr_scheduler': 'fixed', 'max_epoch': 100, 'max_tokens': 5000, 'metrics_coefs': [0, 1, 0], 'optimizer': 'adam', 'preprocessors_kwargs': {'LengthRatioPreprocessor': {'target_ratio': 0.8}, 'LevenshteinPreprocessor': {'target_ratio': 0.8}, 'WordRankRatioPreprocessor': {'target_ratio': 0.8}, 'DependencyTreeDepthRatioPreprocessor': {'target_ratio': 0.8}, 'SentencePiecePreprocessor': {'vocab_size': 10000}}} Creating /home/access-main/resources/datasets/_f56b9888a6a6550a1d060813416e5298... Creating preprocessed dataset with LengthRatioPreprocessor(target_ratio=0.8): wikilarge -> _f56b9888a6a6550a1d060813416e5298 Creating /home/access-main/resources/datasets/_d6002a05838f1e5b3a3fc0d98c9fa7bd... Creating preprocessed dataset with LevenshteinPreprocessor(bucket_size=0.05, noise_std=0, target_ratio=0.8): _f56b9888a6a6550a1d060813416e5298 -> _d6002a05838f1e5b3a3fc0d98c9fa7bd Creating /home/access-main/resources/datasets/_e382828c4d4db04ef23094dbd9e38f9c... Creating preprocessed dataset with WordRankRatioPreprocessor(target_ratio=0.8): _d6002a05838f1e5b3a3fc0d98c9fa7bd -> _e382828c4d4db04ef23094dbd9e38f9c Error: Rolling back creation of directory /home/access-main/resources/datasets/_e382828c4d4db04ef23094dbd9e38f9c Traceback (most recent call last): File "./scripts/train.py", line 49, in
fairseq_train_and_evaluate(kwargs)
File "/home/access-main/access/utils/training.py", line 18, in wrapped_func
return func(*args, *kwargs)
File "/home/access-main/access/utils/training.py", line 29, in wrapped_func
return func(args, kwargs)
File "/home/access-main/access/utils/training.py", line 38, in wrapped_func
result = func(*args, *kwargs)
File "/home/access-main/access/utils/training.py", line 50, in wrapped_func
result = func(args, **kwargs)
File "/home/access-main/access/fairseq/main.py", line 117, in fairseq_train_and_evaluate
dataset = create_preprocessed_dataset(dataset, preprocessors, n_jobs=1)
File "/home/access-main/access/resources/datasets.py", line 72, in create_preprocessed_dataset
dataset = create_preprocessed_dataset_one_preprocessor(dataset, preprocessor, n_jobs)
File "/home/access-main/access/resources/datasets.py", line 55, in create_preprocessed_dataset_one_preprocessor
new_filepaths_dict[phase, 'complex'], new_filepaths_dict[phase, 'simple'])
File "/home/access-main/access/preprocessors.py", line 144, in encode_file_pair
output_files.write(self.encode_sentence_pair(complex_line, simple_line))
File "/home/access-main/access/preprocessors.py", line 244, in encode_sentence_pair
remove_special_tokens(simple_sentence))))
File "/home/access-main/access/preprocessors.py", line 277, in get_feature_value
return min(safe_division(self.feature_extractor(simple_sentence), self.feature_extractor(complex_sentence)), 2)
File "/home/access-main/access/feature_extraction.py", line 44, in get_lexical_complexity_score
words = [word for word in words if word in get_word2rank()]
File "/home/access-main/access/feature_extraction.py", line 44, in
words = [word for word in words if word in get_word2rank()]
File "/home/access-main/access/feature_extraction.py", line 25, in get_word2rank
next(line_generator) # Skip the first line (header)
File "/home/access-main/access/utils/helpers.py", line 77, in yield_lines
with open(filepath, 'r') as f:
IsADirectoryError: [Errno 21] Is a directory: '/home/access-main/resources/various/fasttext-vectors/wiki.en.vec'
Looking forward to your reply
Best Regards