transportenergy / database

Tools for accessing and maintaining the iTEM model & historical databases
https://transportenergy.rtfd.io
GNU General Public License v3.0
24 stars 8 forks source link

UnicodeDecodeError upon import #48

Closed romainsacchi closed 3 years ago

romainsacchi commented 3 years ago

Installed from Pypi, with Python 3.8,

from item import historical

leads to:

---------------------------------------------------------------------------
UnicodeDecodeError                        Traceback (most recent call last)
<ipython-input-2-9977dfff8dba> in <module>
----> 1 from item import historical

~\AppData\Local\Continuum\miniconda3\envs\item\lib\site-packages\item\__init__.py in <module>
      1 from .common import init, init_paths
----> 2 from .model import load_model_data
      3 
      4 __all__ = [
      5     "init_paths",

~\AppData\Local\Continuum\miniconda3\envs\item\lib\site-packages\item\model\__init__.py in <module>
     13 
     14 from item.common import paths, log
---> 15 from item.model.common import as_xarray, concat_versions, tidy, select, to_wide
     16 from item.model.dimensions import INDEX, load_template
     17 

~\AppData\Local\Continuum\miniconda3\envs\item\lib\site-packages\item\model\common.py in <module>
     10 
     11 from item.common import log, paths
---> 12 from item.model.dimensions import INDEX
     13 
     14 # Information about the models

~\AppData\Local\Continuum\miniconda3\envs\item\lib\site-packages\item\model\dimensions.py in <module>
    195 
    196 
--> 197 load()

~\AppData\Local\Continuum\miniconda3\envs\item\lib\site-packages\item\model\dimensions.py in load()
     61     for k in ["variable", "mode", "technology", "fuel", "match"]:
     62         with open(join(path, "{}.yaml".format(k))) as f:
---> 63             data[k] = yaml.load(f, Loader=yaml.SafeLoader)
     64     variable, mode, tech, fuel, match = data.values()
     65 

~\AppData\Local\Continuum\miniconda3\envs\item\lib\site-packages\yaml\__init__.py in load(stream, Loader)
    110         Loader = FullLoader
    111 
--> 112     loader = Loader(stream)
    113     try:
    114         return loader.get_single_data()

~\AppData\Local\Continuum\miniconda3\envs\item\lib\site-packages\yaml\loader.py in __init__(self, stream)
     32 
     33     def __init__(self, stream):
---> 34         Reader.__init__(self, stream)
     35         Scanner.__init__(self)
     36         Parser.__init__(self)

~\AppData\Local\Continuum\miniconda3\envs\item\lib\site-packages\yaml\reader.py in __init__(self, stream)
     83             self.eof = False
     84             self.raw_buffer = None
---> 85             self.determine_encoding()
     86 
     87     def peek(self, index=0):

~\AppData\Local\Continuum\miniconda3\envs\item\lib\site-packages\yaml\reader.py in determine_encoding(self)
    122     def determine_encoding(self):
    123         while not self.eof and (self.raw_buffer is None or len(self.raw_buffer) < 2):
--> 124             self.update_raw()
    125         if isinstance(self.raw_buffer, bytes):
    126             if self.raw_buffer.startswith(codecs.BOM_UTF16_LE):

~\AppData\Local\Continuum\miniconda3\envs\item\lib\site-packages\yaml\reader.py in update_raw(self, size)
    176 
    177     def update_raw(self, size=4096):
--> 178         data = self.stream.read(size)
    179         if self.raw_buffer is None:
    180             self.raw_buffer = data

~\AppData\Local\Continuum\miniconda3\envs\item\lib\encodings\cp1252.py in decode(self, input, final)
     21 class IncrementalDecoder(codecs.IncrementalDecoder):
     22     def decode(self, input, final=False):
---> 23         return codecs.charmap_decode(input,self.errors,decoding_table)[0]
     24 
     25 class StreamWriter(Codec,codecs.StreamWriter):

UnicodeDecodeError: 'charmap' codec can't decode byte 0x81 in position 38: character maps to <undefined>
romainsacchi commented 3 years ago

And the same when pip installed from github directly.

khaeru commented 3 years ago

Thanks for the report, @romainsacchi. This has to do with default file encodings on different platforms, especially Windows. The fix is to always use UTF-8.