Kaszanas / SC2_Datasets

https://sc2-datasets.readthedocs.io/
GNU General Public License v3.0
8 stars 3 forks source link

UnicodeDecodeError: 'charmap' codec can't decode byte 0x81 #13

Closed Kaszanas closed 2 years ago

Kaszanas commented 2 years ago

Currently while attempting to run Logistic Regression there is a following error on some replay instance:

Traceback (most recent call last):
  File "D:\Projects\SC2EGSet_Experiments\src\experiments\logistic_regression.py", line 51, in <module>
    trainer.fit(model=logistic_regression, datamodule=datamodule)
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 740, in fit
    self._call_and_handle_interrupt(
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 685, in _call_and_handle_interrupt
    return trainer_fn(*args, **kwargs)
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 777, in _fit_impl
    self._run(model, ckpt_path=ckpt_path)
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1199, in _run
    self._dispatch()
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1279, in _dispatch
    self.training_type_plugin.start_training(self)
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\pytorch_lightning\plugins\training_type\training_type_plugin.py", line 202, in start_training
    self._results = trainer.run_stage()
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1289, in run_stage
    return self._run_train()
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\pytorch_lightning\trainer\trainer.py", line 1319, in _run_train
    self.fit_loop.run()
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\pytorch_lightning\loops\base.py", line 145, in run
    self.advance(*args, **kwargs)
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\pytorch_lightning\loops\fit_loop.py", line 234, in advance
    self.epoch_loop.run(data_fetcher)
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\pytorch_lightning\loops\base.py", line 145, in run
    self.advance(*args, **kwargs)
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\pytorch_lightning\loops\epoch\training_epoch_loop.py", line 156, in advance
    batch_idx, (batch, self.batch_progress.is_last_batch) = next(self._dataloader_iter)
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\pytorch_lightning\utilities\fetching.py", line 203, in __next__
    return self.fetching_function()
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\pytorch_lightning\utilities\fetching.py", line 270, in fetching_function
    self._fetch_next_batch()
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\pytorch_lightning\utilities\fetching.py", line 300, in _fetch_next_batch
    batch = next(self.dataloader_iter)
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\pytorch_lightning\trainer\supporters.py", line 550, in __next__
    return self.request_next_batch(self.loader_iters)
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\pytorch_lightning\trainer\supporters.py", line 562, in request_next_batch
    return apply_to_collection(loader_iters, Iterator, next)
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\pytorch_lightning\utilities\apply_func.py", line 96, in apply_to_collection
    return function(data, *args, **kwargs)
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\torch\utils\data\dataloader.py", line 530, in __next__
    data = self._next_data()
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\torch\utils\data\dataloader.py", line 1224, in _next_data
    return self._process_data(data)
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\torch\utils\data\dataloader.py", line 1250, in _process_data
    data.reraise()
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\torch\_utils.py", line 456, in reraise
    raise RuntimeError(msg) from None
RuntimeError: Caught UnicodeDecodeError in DataLoader worker process 3.
Original Traceback (most recent call last):
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\torch\utils\data\_utils\worker.py", line 287, in _worker_loop
    data = fetcher.fetch(index)
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\torch\utils\data\_utils\fetch.py", 
line 49, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\torch\utils\data\_utils\fetch.py", 
line 49, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "D:\Projects\SC2EGSet_Experiments\venv_3_10\lib\site-packages\torch\utils\data\dataset.py", line 
471, in __getitem__
    return self.dataset[self.indices[idx]]
  File "D:\Projects\SC2EGSet_Experiments\src\dataset\pytorch_datasets\sc2_replaypack_dataset.py", line 97, in __getitem__
    replay_data = SC2ReplayData.from_file(replay_filepath=self.list_of_files[index])
  File "D:\Projects\SC2EGSet_Experiments\src\dataset\replay_data\sc2_replay_data.py", line 37, in from_file
    return SC2ReplayData(json.load(replay_file))
  File "C:\Users\kasza\.pyenv\pyenv-win\versions\3.10.2\lib\json\__init__.py", line 293, in load        
    return loads(fp.read(),
  File "C:\Users\kasza\.pyenv\pyenv-win\versions\3.10.2\lib\encodings\cp1252.py", line 23, in decode    
    return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x81 in position 1355: character maps to <undefined>
Kaszanas commented 2 years ago

This was solved by adding the utf-8 encoding to the open() context manager.