googledatalab / pydatalab

Google Datalab Library
Apache License 2.0
194 stars 79 forks source link

Object Upload fails on non Latin-1 characters #722

Open jnard0ne opened 4 years ago

jnard0ne commented 4 years ago

https://github.com/googledatalab/pydatalab/blob/8c2df84a1f3bc4db6eb3b4d139676826f8c2e222/datalab/storage/_api.py#L147

Attempting to push a pandas dataframe containing chinese characters to a cloud storage bucket and getting a "Failed to Process HTTP respose" error. It looks like the "data" argument is expecting Latin-1 characters only. Can we add support for UTF-8 encoded data? Full stack trace included below.

---------------------------------------------------------------------------
UnicodeEncodeError                        Traceback (most recent call last)
/usr/local/envs/py3env/lib/python3.5/site-packages/datalab/utils/_http.py in request(url, args, data, headers, method, credentials, raw_response, stats)
    145                                        body=data,
--> 146                                        headers=headers)
    147       if 200 <= response.status < 300:

/usr/local/envs/py3env/lib/python3.5/site-packages/google_auth_httplib2.py in request(self, uri, method, body, headers, **kwargs)
    197         response, content = self.http.request(
--> 198             uri, method, body=body, headers=request_headers, **kwargs)
    199 

/usr/local/envs/py3env/lib/python3.5/site-packages/datalab/kernel/__init__.py in _request(self, uri, method, body, headers, redirections, connection_type)
     71     return _orig_request(self, uri, method=method, body=body, headers=headers,
---> 72                          redirections=redirections, connection_type=connection_type)
     73 

/usr/local/envs/py3env/lib/python3.5/site-packages/google/datalab/kernel/__init__.py in _request(self, uri, method, body, headers, redirections, connection_type)
     59     return _orig_request(self, uri, method=method, body=body, headers=headers,
---> 60                          redirections=redirections, connection_type=connection_type)
     61 

/usr/local/envs/py3env/lib/python3.5/site-packages/httplib2/__init__.py in request(self, uri, method, body, headers, redirections, connection_type)
   1321                 else:
-> 1322                     (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
   1323         except Exception as e:

/usr/local/envs/py3env/lib/python3.5/site-packages/httplib2/__init__.py in _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey)
   1071 
-> 1072         (response, content) = self._conn_request(conn, request_uri, method, body, headers)
   1073 

/usr/local/envs/py3env/lib/python3.5/site-packages/httplib2/__init__.py in _conn_request(self, conn, request_uri, method, body, headers)
    995                     conn.connect()
--> 996                 conn.request(method, request_uri, body, headers)
    997             except socket.timeout:

/usr/local/envs/py3env/lib/python3.5/http/client.py in request(self, method, url, body, headers)
   1106         """Send a complete request to the server."""
-> 1107         self._send_request(method, url, body, headers)
   1108 

/usr/local/envs/py3env/lib/python3.5/http/client.py in _send_request(self, method, url, body, headers)
   1150             # default charset of iso-8859-1.
-> 1151             body = _encode(body, 'body')
   1152         self.endheaders(body)

/usr/local/envs/py3env/lib/python3.5/http/client.py in _encode(data, name)
    160             "if you want to send it encoded in UTF-8." %
--> 161             (name.title(), data[err.start:err.end], name)) from None
    162 

UnicodeEncodeError: 'latin-1' codec can't encode characters in position 5021-5024: Body ('テスト用') is not valid Latin-1. Use body.encode('utf-8') if you want to send it encoded in UTF-8.

During handling of the above exception, another exception occurred:

Exception                                 Traceback (most recent call last)
<ipython-input-61-45d686b08da1> in <module>()
      1 import datalab.storage as gcs
      2 blob = gcs.Item(bucket, 'acts.csv')
----> 3 blob.write_to(acts.to_csv(), 'text/csv')

/usr/local/envs/py3env/lib/python3.5/site-packages/datalab/storage/_item.py in write_to(self, content, content_type)
    222       self._api.object_upload(self._bucket, self._key, content, content_type)
    223     except Exception as e:
--> 224       raise e
    225 
    226 

/usr/local/envs/py3env/lib/python3.5/site-packages/datalab/storage/_item.py in write_to(self, content, content_type)
    220     """
    221     try:
--> 222       self._api.object_upload(self._bucket, self._key, content, content_type)
    223     except Exception as e:
    224       raise e

/usr/local/envs/py3env/lib/python3.5/site-packages/datalab/storage/_api.py in object_upload(self, bucket, key, content, content_type)
    161     url = Api._UPLOAD_ENDPOINT + (Api._OBJECT_PATH % (bucket, ''))
    162     return datalab.utils.Http.request(url, args=args, data=content, headers=headers,
--> 163                                       credentials=self._credentials, raw_response=True)
    164 
    165   def objects_copy(self, source_bucket, source_key, target_bucket, target_key):

/usr/local/envs/py3env/lib/python3.5/site-packages/datalab/utils/_http.py in request(url, args, data, headers, method, credentials, raw_response, stats)
    155         raise RequestException(response.status, content)
    156     except ValueError:
--> 157       raise Exception('Failed to process HTTP response.')
    158     except httplib2.HttpLib2Error:
    159       raise Exception('Failed to send HTTP request.')

Exception: Failed to process HTTP response.
jnard0ne commented 4 years ago

Duplicate of #675