Open GreatGBL opened 3 months ago
Hello, I wanna try the code run on Google Colab with A100, however it does not work here is the error info below
Resolving data files: 100% 1024/1024 [00:00<00:00, 344479.25it/s] Traceback (most recent call last): File "/content/BiLLM/run.py", line 273, in dataloader, testloader = get_loaders( File "/content/BiLLM/datautils.py", line 117, in get_loaders loaders= get_c4(nsamples, seed, seqlen, model, tokenizer) File "/content/BiLLM/datautils.py", line 73, in get_c4 traindata = load_dataset( File "/usr/local/lib/python3.10/dist-packages/datasets/load.py", line 2129, in load_dataset builder_instance = load_dataset_builder( File "/usr/local/lib/python3.10/dist-packages/datasets/load.py", line 1852, in load_dataset_builder builder_instance: DatasetBuilder = builder_cls( File "/usr/local/lib/python3.10/dist-packages/datasets/builder.py", line 373, in init self.config, self.config_id = self._create_builder_config( File "/usr/local/lib/python3.10/dist-packages/datasets/builder.py", line 539, in _create_builder_config raise ValueError( ValueError: BuilderConfig 'allenai--c4' not found. Available: ['en', 'en.noblocklist', 'en.noclean', 'realnewslike', 'multilingual', 'af', 'am', 'ar', 'az', 'be', 'bg', 'bg-Latn', 'bn', 'ca', 'ceb', 'co', 'cs', 'cy', 'da', 'de', 'el', 'el-Latn', 'en-multi', 'eo', 'es', 'et', 'eu', 'fa', 'fi', 'fil', 'fr', 'fy', 'ga', 'gd', 'gl', 'gu', 'ha', 'haw', 'hi', 'hi-Latn', 'hmn', 'ht', 'hu', 'hy', 'id', 'ig', 'is', 'it', 'iw', 'ja', 'ja-Latn', 'jv', 'ka', 'kk', 'km', 'kn', 'ko', 'ku', 'ky', 'la', 'lb', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'no', 'ny', 'pa', 'pl', 'ps', 'pt', 'ro', 'ru', 'ru-Latn', 'sd', 'si', 'sk', 'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'st', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'tr', 'uk', 'und', 'ur', 'uz', 'vi', 'xh', 'yi', 'yo', 'zh', 'zh-Latn', 'zu']
Hello, I wanna try the code run on Google Colab with A100, however it does not work here is the error info below
Resolving data files: 100% 1024/1024 [00:00<00:00, 344479.25it/s] Traceback (most recent call last): File "/content/BiLLM/run.py", line 273, in
dataloader, testloader = get_loaders(
File "/content/BiLLM/datautils.py", line 117, in get_loaders
loaders= get_c4(nsamples, seed, seqlen, model, tokenizer)
File "/content/BiLLM/datautils.py", line 73, in get_c4
traindata = load_dataset(
File "/usr/local/lib/python3.10/dist-packages/datasets/load.py", line 2129, in load_dataset
builder_instance = load_dataset_builder(
File "/usr/local/lib/python3.10/dist-packages/datasets/load.py", line 1852, in load_dataset_builder
builder_instance: DatasetBuilder = builder_cls(
File "/usr/local/lib/python3.10/dist-packages/datasets/builder.py", line 373, in init
self.config, self.config_id = self._create_builder_config(
File "/usr/local/lib/python3.10/dist-packages/datasets/builder.py", line 539, in _create_builder_config
raise ValueError(
ValueError: BuilderConfig 'allenai--c4' not found. Available: ['en', 'en.noblocklist', 'en.noclean', 'realnewslike', 'multilingual', 'af', 'am', 'ar', 'az', 'be', 'bg', 'bg-Latn', 'bn', 'ca', 'ceb', 'co', 'cs', 'cy', 'da', 'de', 'el', 'el-Latn', 'en-multi', 'eo', 'es', 'et', 'eu', 'fa', 'fi', 'fil', 'fr', 'fy', 'ga', 'gd', 'gl', 'gu', 'ha', 'haw', 'hi', 'hi-Latn', 'hmn', 'ht', 'hu', 'hy', 'id', 'ig', 'is', 'it', 'iw', 'ja', 'ja-Latn', 'jv', 'ka', 'kk', 'km', 'kn', 'ko', 'ku', 'ky', 'la', 'lb', 'lo', 'lt', 'lv', 'mg', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'no', 'ny', 'pa', 'pl', 'ps', 'pt', 'ro', 'ru', 'ru-Latn', 'sd', 'si', 'sk', 'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'st', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'tr', 'uk', 'und', 'ur', 'uz', 'vi', 'xh', 'yi', 'yo', 'zh', 'zh-Latn', 'zu']