Open DmitryKutsev opened 2 years ago
Hi,
I'm using relevel() function to set base level in column of my dataframe and getting this error in case column's values consists '+', '-' or ' ' symbols (mostly there are more, but I've tried this ones).
import pandas as pd from h2o.estimators.glm import H2OGeneralizedLinearEstimator import h2o import re h2o.init() data = { 'name': ['Xavier', 'Ann', 'Jana', 'Yi', 'Robin', 'Amal', 'Nori'], 'city': ['Mexico City', 'Toronto', 'Prague', 'Shanghai', 'Manchester', 'Cairo', 'Osaka'], 'age': [41, 28, 33, 34, 38, 31, 37], 'py-score': [88.0, 79.0, 81.0, 80.0, 68.0, 61.0, 84.0] } df = pd.DataFrame(data=data) hf = h2o.H2OFrame(df) hf['city'].asfactor().relevel('Mexico City')
This will cause H2OResponseError:
--------------------------------------------------------------------------- H2OResponseError Traceback (most recent call last) ~\PycharmProjects\h2o\venv\lib\site-packages\IPython\core\formatters.py in __call__(self, obj) 700 type_pprinters=self.type_printers, 701 deferred_pprinters=self.deferred_printers) --> 702 printer.pretty(obj) 703 printer.flush() 704 return stream.getvalue() ~\PycharmProjects\h2o\venv\lib\site-packages\IPython\lib\pretty.py in pretty(self, obj) 392 if cls is not object \ 393 and callable(cls.__dict__.get('__repr__')): --> 394 return _repr_pprint(obj, self, cycle) 395 396 return _default_pprint(obj, self, cycle) ~\PycharmProjects\h2o\venv\lib\site-packages\IPython\lib\pretty.py in _repr_pprint(obj, p, cycle) 698 """A pprint that just redirects to the normal repr function.""" 699 # Find newlines and replace them with p.break_() --> 700 output = repr(obj) 701 lines = output.splitlines() 702 with p.group(): ~\PycharmProjects\h2o\venv\lib\site-packages\h2o\frame.py in __repr__(self) 579 stk = traceback.extract_stack() 580 if not ("IPython" in stk[-2][0] and "info" == stk[-2][2]): --> 581 self.show() 582 return "" 583 ~\PycharmProjects\h2o\venv\lib\site-packages\h2o\frame.py in show(self, use_pandas, rows, cols) 610 print("This H2OFrame is empty and not initialized.") 611 return --> 612 if self.nrows == 0: 613 print("This H2OFrame is empty.") 614 return ~\PycharmProjects\h2o\venv\lib\site-packages\h2o\frame.py in nrows(self) 319 if not self._ex._cache.nrows_valid(): 320 self._ex._cache.flush() --> 321 self._frame(fill_cache=True) 322 return self._ex._cache.nrows 323 ~\PycharmProjects\h2o\venv\lib\site-packages\h2o\frame.py in _frame(self, rows, rows_offset, cols, cols_offset, fill_cache) 729 730 def _frame(self, rows=10, rows_offset=0, cols=-1, cols_offset=0, fill_cache=False): --> 731 self._ex._eager_frame() 732 if fill_cache: 733 self._ex._cache.fill(rows=rows, rows_offset=rows_offset, cols=cols, cols_offset=cols_offset) ~\PycharmProjects\h2o\venv\lib\site-packages\h2o\expr.py in _eager_frame(self) 88 if not self._cache.is_empty(): return 89 if self._cache._id is not None: return # Data already computed under ID, but not cached locally ---> 90 self._eval_driver('frame') 91 92 def _eager_scalar(self): # returns a scalar (or a list of scalars) ~\PycharmProjects\h2o\venv\lib\site-packages\h2o\expr.py in _eval_driver(self, top) 112 """ 113 exec_str = self._get_ast_str(top) --> 114 res = ExprNode.rapids(exec_str) 115 if 'scalar' in res: 116 if isinstance(res['scalar'], list): ~\PycharmProjects\h2o\venv\lib\site-packages\h2o\expr.py in rapids(expr) 256 :returns: The JSON response (as a python dictionary) of the Rapids execution 257 """ --> 258 return h2o.api("POST /99/Rapids", data={"ast": expr, "session_id": h2o.connection().session_id}) 259 260 ~\PycharmProjects\h2o\venv\lib\site-packages\h2o\h2o.py in api(endpoint, data, json, filename, save_to) 111 # type checks are performed in H2OConnection class 112 _check_connection() --> 113 return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to) 114 115 ~\PycharmProjects\h2o\venv\lib\site-packages\h2o\backend\connection.py in request(self, endpoint, data, json, filename, save_to) 479 save_to = save_to(resp) 480 self._log_end_transaction(start_time, resp) --> 481 return self._process_response(resp, save_to) 482 483 except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as e: ~\PycharmProjects\h2o\venv\lib\site-packages\h2o\backend\connection.py in _process_response(response, save_to) 817 if status_code in {400, 404, 412} and isinstance(data, H2OErrorV3): 818 data.show_stacktrace = False --> 819 raise H2OResponseError(data) 820 821 # Server errors (notably 500 = "Server Error") H2OResponseError: Server error java.lang.IllegalArgumentException: Error: Did not find level `Mexico%20City` in the column. Request: POST /99/Rapids data: {'ast': "(tmp= py_140_sid_b771 (relevel (as.factor (cols_py Key_Frame__upload_bda4861347c26f55bb24425d8760491c.hex 'city')) 'Mexico%20City'))", 'session_id': '_sid_b771'}
Any ideas how can I relevel data with whitespaces and other symbols?
Thanks.
Hi,
I'm using relevel() function to set base level in column of my dataframe and getting this error in case column's values consists '+', '-' or ' ' symbols (mostly there are more, but I've tried this ones).
This will cause H2OResponseError:
Any ideas how can I relevel data with whitespaces and other symbols?
Thanks.