h2oai / h2o-tutorials

Tutorials and training material for the H2O Machine Learning Platform
http://h2o.ai
1.48k stars 1.01k forks source link

Relevel factor H2OResponseError #156

Open DmitryKutsev opened 2 years ago

DmitryKutsev commented 2 years ago

Hi,

I'm using relevel() function to set base level in column of my dataframe and getting this error in case column's values consists '+', '-' or ' ' symbols (mostly there are more, but I've tried this ones).

    import pandas as pd 
    from h2o.estimators.glm import H2OGeneralizedLinearEstimator 
    import h2o 
    import re 

    h2o.init() 

    data = { 
         'name': ['Xavier', 'Ann', 'Jana', 'Yi', 'Robin', 'Amal', 'Nori'], 
         'city': ['Mexico City', 'Toronto', 'Prague', 'Shanghai', 
                  'Manchester', 'Cairo', 'Osaka'], 
         'age': [41, 28, 33, 34, 38, 31, 37], 
         'py-score': [88.0, 79.0, 81.0, 80.0, 68.0, 61.0, 84.0] 
     } 

    df = pd.DataFrame(data=data) 
    hf = h2o.H2OFrame(df) 

    hf['city'].asfactor().relevel('Mexico City')

This will cause H2OResponseError:

---------------------------------------------------------------------------
H2OResponseError                          Traceback (most recent call last)
~\PycharmProjects\h2o\venv\lib\site-packages\IPython\core\formatters.py in __call__(self, obj)
    700                 type_pprinters=self.type_printers,
    701                 deferred_pprinters=self.deferred_printers)
--> 702             printer.pretty(obj)
    703             printer.flush()
    704             return stream.getvalue()

~\PycharmProjects\h2o\venv\lib\site-packages\IPython\lib\pretty.py in pretty(self, obj)
    392                         if cls is not object \
    393                                 and callable(cls.__dict__.get('__repr__')):
--> 394                             return _repr_pprint(obj, self, cycle)
    395 
    396             return _default_pprint(obj, self, cycle)

~\PycharmProjects\h2o\venv\lib\site-packages\IPython\lib\pretty.py in _repr_pprint(obj, p, cycle)
    698     """A pprint that just redirects to the normal repr function."""
    699     # Find newlines and replace them with p.break_()
--> 700     output = repr(obj)
    701     lines = output.splitlines()
    702     with p.group():

~\PycharmProjects\h2o\venv\lib\site-packages\h2o\frame.py in __repr__(self)
    579             stk = traceback.extract_stack()
    580             if not ("IPython" in stk[-2][0] and "info" == stk[-2][2]):
--> 581                 self.show()
    582         return ""
    583 

~\PycharmProjects\h2o\venv\lib\site-packages\h2o\frame.py in show(self, use_pandas, rows, cols)
    610             print("This H2OFrame is empty and not initialized.")
    611             return
--> 612         if self.nrows == 0:
    613             print("This H2OFrame is empty.")
    614             return

~\PycharmProjects\h2o\venv\lib\site-packages\h2o\frame.py in nrows(self)
    319         if not self._ex._cache.nrows_valid():
    320             self._ex._cache.flush()
--> 321             self._frame(fill_cache=True)
    322         return self._ex._cache.nrows
    323 

~\PycharmProjects\h2o\venv\lib\site-packages\h2o\frame.py in _frame(self, rows, rows_offset, cols, cols_offset, fill_cache)
    729 
    730     def _frame(self, rows=10, rows_offset=0, cols=-1, cols_offset=0, fill_cache=False):
--> 731         self._ex._eager_frame()
    732         if fill_cache:
    733             self._ex._cache.fill(rows=rows, rows_offset=rows_offset, cols=cols, cols_offset=cols_offset)

~\PycharmProjects\h2o\venv\lib\site-packages\h2o\expr.py in _eager_frame(self)
     88         if not self._cache.is_empty(): return
     89         if self._cache._id is not None: return  # Data already computed under ID, but not cached locally
---> 90         self._eval_driver('frame')
     91 
     92     def _eager_scalar(self):  # returns a scalar (or a list of scalars)

~\PycharmProjects\h2o\venv\lib\site-packages\h2o\expr.py in _eval_driver(self, top)
    112         """
    113         exec_str = self._get_ast_str(top)
--> 114         res = ExprNode.rapids(exec_str)
    115         if 'scalar' in res:
    116             if isinstance(res['scalar'], list):

~\PycharmProjects\h2o\venv\lib\site-packages\h2o\expr.py in rapids(expr)
    256         :returns: The JSON response (as a python dictionary) of the Rapids execution
    257         """
--> 258         return h2o.api("POST /99/Rapids", data={"ast": expr, "session_id": h2o.connection().session_id})
    259 
    260 

~\PycharmProjects\h2o\venv\lib\site-packages\h2o\h2o.py in api(endpoint, data, json, filename, save_to)
    111     # type checks are performed in H2OConnection class
    112     _check_connection()
--> 113     return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to)
    114 
    115 

~\PycharmProjects\h2o\venv\lib\site-packages\h2o\backend\connection.py in request(self, endpoint, data, json, filename, save_to)
    479                 save_to = save_to(resp)
    480             self._log_end_transaction(start_time, resp)
--> 481             return self._process_response(resp, save_to)
    482 
    483         except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError) as e:

~\PycharmProjects\h2o\venv\lib\site-packages\h2o\backend\connection.py in _process_response(response, save_to)
    817         if status_code in {400, 404, 412} and isinstance(data, H2OErrorV3):
    818             data.show_stacktrace = False
--> 819             raise H2OResponseError(data)
    820 
    821         # Server errors (notably 500 = "Server Error")

H2OResponseError: Server error java.lang.IllegalArgumentException:
  Error: Did not find level `Mexico%20City` in the column.
  Request: POST /99/Rapids
    data: {'ast': "(tmp= py_140_sid_b771 (relevel (as.factor (cols_py Key_Frame__upload_bda4861347c26f55bb24425d8760491c.hex 'city')) 'Mexico%20City'))", 'session_id': '_sid_b771'}

Any ideas how can I relevel data with whitespaces and other symbols?

Thanks.