Attempting to push a pandas dataframe containing chinese characters to a cloud storage bucket and getting a "Failed to Process HTTP respose" error. It looks like the "data" argument is expecting Latin-1 characters only. Can we add support for UTF-8 encoded data? Full stack trace included below.
---------------------------------------------------------------------------
UnicodeEncodeError Traceback (most recent call last)
/usr/local/envs/py3env/lib/python3.5/site-packages/datalab/utils/_http.py in request(url, args, data, headers, method, credentials, raw_response, stats)
145 body=data,
--> 146 headers=headers)
147 if 200 <= response.status < 300:
/usr/local/envs/py3env/lib/python3.5/site-packages/google_auth_httplib2.py in request(self, uri, method, body, headers, **kwargs)
197 response, content = self.http.request(
--> 198 uri, method, body=body, headers=request_headers, **kwargs)
199
/usr/local/envs/py3env/lib/python3.5/site-packages/datalab/kernel/__init__.py in _request(self, uri, method, body, headers, redirections, connection_type)
71 return _orig_request(self, uri, method=method, body=body, headers=headers,
---> 72 redirections=redirections, connection_type=connection_type)
73
/usr/local/envs/py3env/lib/python3.5/site-packages/google/datalab/kernel/__init__.py in _request(self, uri, method, body, headers, redirections, connection_type)
59 return _orig_request(self, uri, method=method, body=body, headers=headers,
---> 60 redirections=redirections, connection_type=connection_type)
61
/usr/local/envs/py3env/lib/python3.5/site-packages/httplib2/__init__.py in request(self, uri, method, body, headers, redirections, connection_type)
1321 else:
-> 1322 (response, content) = self._request(conn, authority, uri, request_uri, method, body, headers, redirections, cachekey)
1323 except Exception as e:
/usr/local/envs/py3env/lib/python3.5/site-packages/httplib2/__init__.py in _request(self, conn, host, absolute_uri, request_uri, method, body, headers, redirections, cachekey)
1071
-> 1072 (response, content) = self._conn_request(conn, request_uri, method, body, headers)
1073
/usr/local/envs/py3env/lib/python3.5/site-packages/httplib2/__init__.py in _conn_request(self, conn, request_uri, method, body, headers)
995 conn.connect()
--> 996 conn.request(method, request_uri, body, headers)
997 except socket.timeout:
/usr/local/envs/py3env/lib/python3.5/http/client.py in request(self, method, url, body, headers)
1106 """Send a complete request to the server."""
-> 1107 self._send_request(method, url, body, headers)
1108
/usr/local/envs/py3env/lib/python3.5/http/client.py in _send_request(self, method, url, body, headers)
1150 # default charset of iso-8859-1.
-> 1151 body = _encode(body, 'body')
1152 self.endheaders(body)
/usr/local/envs/py3env/lib/python3.5/http/client.py in _encode(data, name)
160 "if you want to send it encoded in UTF-8." %
--> 161 (name.title(), data[err.start:err.end], name)) from None
162
UnicodeEncodeError: 'latin-1' codec can't encode characters in position 5021-5024: Body ('テスト用') is not valid Latin-1. Use body.encode('utf-8') if you want to send it encoded in UTF-8.
During handling of the above exception, another exception occurred:
Exception Traceback (most recent call last)
<ipython-input-61-45d686b08da1> in <module>()
1 import datalab.storage as gcs
2 blob = gcs.Item(bucket, 'acts.csv')
----> 3 blob.write_to(acts.to_csv(), 'text/csv')
/usr/local/envs/py3env/lib/python3.5/site-packages/datalab/storage/_item.py in write_to(self, content, content_type)
222 self._api.object_upload(self._bucket, self._key, content, content_type)
223 except Exception as e:
--> 224 raise e
225
226
/usr/local/envs/py3env/lib/python3.5/site-packages/datalab/storage/_item.py in write_to(self, content, content_type)
220 """
221 try:
--> 222 self._api.object_upload(self._bucket, self._key, content, content_type)
223 except Exception as e:
224 raise e
/usr/local/envs/py3env/lib/python3.5/site-packages/datalab/storage/_api.py in object_upload(self, bucket, key, content, content_type)
161 url = Api._UPLOAD_ENDPOINT + (Api._OBJECT_PATH % (bucket, ''))
162 return datalab.utils.Http.request(url, args=args, data=content, headers=headers,
--> 163 credentials=self._credentials, raw_response=True)
164
165 def objects_copy(self, source_bucket, source_key, target_bucket, target_key):
/usr/local/envs/py3env/lib/python3.5/site-packages/datalab/utils/_http.py in request(url, args, data, headers, method, credentials, raw_response, stats)
155 raise RequestException(response.status, content)
156 except ValueError:
--> 157 raise Exception('Failed to process HTTP response.')
158 except httplib2.HttpLib2Error:
159 raise Exception('Failed to send HTTP request.')
Exception: Failed to process HTTP response.
https://github.com/googledatalab/pydatalab/blob/8c2df84a1f3bc4db6eb3b4d139676826f8c2e222/datalab/storage/_api.py#L147
Attempting to push a pandas dataframe containing chinese characters to a cloud storage bucket and getting a "Failed to Process HTTP respose" error. It looks like the "data" argument is expecting Latin-1 characters only. Can we add support for UTF-8 encoded data? Full stack trace included below.