Im getting this error while evaluating on hf dataset:
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
/Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/datasets/load.py:2554: FutureWarning: 'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0.
You can remove this warning by passing 'token=' instead.
warnings.warn(
Decode Progress: 0it [00:01, ?it/s]
Traceback (most recent call last):
File "/Users/prox/PycharmProjects/liveWhisper/testing.py", line 227, in
main(args)
File "/Users/prox/PycharmProjects/liveWhisper/testing.py", line 102, in main
for out in tqdm(whisper_asr(data(dataset), batch_size=args.batch_size), desc='Decode Progress'):
File "/Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/tqdm/std.py", line 1181, in iter
for obj in iterable:
File "/Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/transformers/pipelines/pt_utils.py", line 124, in next
item = next(self.iterator)
^^^^^^^^^^^^^^^^^^^
File "/Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/transformers/pipelines/pt_utils.py", line 269, in next
processed = self.infer(next(self.iterator), **self.params)
^^^^^^^^^^^^^^^^^^^
File "/Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 631, in next
data = self._next_data()
^^^^^^^^^^^^^^^^^
File "/Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 675, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 42, in fetch
return self.collate_fn(data)
^^^^^^^^^^^^^^^^^^^^^
File "/Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/transformers/pipelines/base.py", line 194, in inner
padded[key] = _pad(items, key, _padding_value, padding_side)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/transformers/pipelines/base.py", line 100, in _pad
max_length = max(item[key].shape[1] for item in items)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/transformers/pipelines/base.py", line 100, in
max_length = max(item[key].shape[1] for item in items)
Im getting this error while evaluating on hf dataset: Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained. /Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/datasets/load.py:2554: FutureWarning: 'use_auth_token' was deprecated in favor of 'token' in version 2.14.0 and will be removed in 3.0.0. You can remove this warning by passing 'token=' instead.
warnings.warn(
Decode Progress: 0it [00:01, ?it/s]
Traceback (most recent call last):
File "/Users/prox/PycharmProjects/liveWhisper/testing.py", line 227, in
main(args)
File "/Users/prox/PycharmProjects/liveWhisper/testing.py", line 102, in main
for out in tqdm(whisper_asr(data(dataset), batch_size=args.batch_size), desc='Decode Progress'):
File "/Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/tqdm/std.py", line 1181, in iter
for obj in iterable:
File "/Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/transformers/pipelines/pt_utils.py", line 124, in next
item = next(self.iterator)
^^^^^^^^^^^^^^^^^^^
File "/Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/transformers/pipelines/pt_utils.py", line 269, in next
processed = self.infer(next(self.iterator), **self.params)
^^^^^^^^^^^^^^^^^^^
File "/Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 631, in next
data = self._next_data()
^^^^^^^^^^^^^^^^^
File "/Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 675, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 42, in fetch
return self.collate_fn(data)
^^^^^^^^^^^^^^^^^^^^^
File "/Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/transformers/pipelines/base.py", line 194, in inner
padded[key] = _pad(items, key, _padding_value, padding_side)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/transformers/pipelines/base.py", line 100, in _pad
max_length = max(item[key].shape[1] for item in items)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/Users/prox/PycharmProjects/liveWhisper/venv/lib/python3.11/site-packages/transformers/pipelines/base.py", line 100, in
max_length = max(item[key].shape[1] for item in items)