alfredsasko / advanced-principle-component-analysis

Principle component analysis with varimax rotation and various dimensionality reduction methods based on scikit-learn PCA class
MIT License
8 stars 6 forks source link

Getting error 'int' object has no attribute 'encode' with rpy2 #3

Open sailinz opened 4 years ago

sailinz commented 4 years ago

I'm trying to use this library in the Jupyter notebook. The library can be successfully imported after the R_Home is changed as:

os.environ['R_HOME'] = "/Library/Frameworks/R.framework/Resources

However, when I try to test with the example code, I encounter the below error: ` Traceback (most recent call last): File "testPCA.py", line 16, in varimax_pca5 = CustomPCA(n_components=5, rotation='varimax').fit(X_std) File "~/lib/python3.7/site-packages/sklearn/decomposition/_pca.py", line 351, in fit self._fit(X) File "~/lib/python3.7/site-packages/advanced_pca/custom_pca.py", line 182, in _fit rot_factor_matrix = self._varimax(pd.DataFrame(factor_matrix)) File "~/lib/python3.7/site-packages/advanced_pca/custom_pca.py", line 125, in _varimax factor_mtr = self._df2mtr(factor_df) File "~/lib/python3.7/site-packages/advanced_pca/custom_pca.py", line 107, in _df2mtr rlc.OrdDict([('index', StrVector(tuple(df.index))), File "~/lib/python3.7/site-packages/rpy2/robjects/vectors.py", line 385, in init super().init(obj) File "~/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py", line 512, in init super().init(self.from_object(obj).sexp) File "~/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py", line 592, in from_object res = cls.from_iterable(obj) File "~/lib/python3.7/site-packages/rpy2/rinterfacelib/conversion.py", line 44, in cdata = function(*args, **kwargs) File "~/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py", line 541, in from_iterable populate_func(iterable, r_vector, set_elt, cast_value) File "~/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py", line 463, in _populate_r_vector set_elt(r_vector, i, cast_value(v)) File "~/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py", line 655, in _as_charsxp_cdata return conversion._str_to_charsxp(x) File "~/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py", line 139, in _str_to_charsxp cchar = _str_to_cchar(val) File "~/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py", line 118, in _str_to_cchar b = s.encode(encoding) AttributeError: 'int' object has no attribute 'encode'

`

Any ideas on this?

jkuruzovich commented 4 years ago

I'm getting this same error.


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
~/opt/anaconda3/envs/tf2/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_object(cls, obj)
    587         try:
--> 588             mv = memoryview(obj)
    589             res = cls.from_memoryview(mv)

TypeError: memoryview: a bytes-like object is required, not 'tuple'

During handling of the above exception, another exception occurred:

AttributeError                            Traceback (most recent call last)
<ipython-input-127-abd45db94e07> in <module>
     10 # fit pca objects with and without rotation with 5 principal components
     11 standard_pca5 = CustomPCA(n_components=5).fit(X_std)
---> 12 varimax_pca5 = CustomPCA(n_components=5, rotation='varimax').fit(X_std)

~/opt/anaconda3/envs/tf2/lib/python3.7/site-packages/sklearn/decomposition/pca.py in fit(self, X, y)
    339             Returns the instance itself.
    340         """
--> 341         self._fit(X)
    342         return self
    343 

~/opt/anaconda3/envs/tf2/lib/python3.7/site-packages/advanced_pca/custom_pca.py in _fit(self, X)
    180                 )
    181 
--> 182                 rot_factor_matrix = self._varimax(pd.DataFrame(factor_matrix))
    183 
    184                 self.explained_variance_ = (rot_factor_matrix ** 2).sum(axis=0)

~/opt/anaconda3/envs/tf2/lib/python3.7/site-packages/advanced_pca/custom_pca.py in _varimax(self, factor_df, **kwargs)
    123             rot_factor_df: rotated factor matrix as pd.DataFrame
    124         '''
--> 125         factor_mtr = self._df2mtr(factor_df)
    126         varimax = robjects.r['varimax']
    127         rot_factor_mtr = varimax(factor_mtr)

~/opt/anaconda3/envs/tf2/lib/python3.7/site-packages/advanced_pca/custom_pca.py in _df2mtr(df)
    105         values = FloatVector(df.values.flatten())
    106         dimnames = ListVector(
--> 107             rlc.OrdDict([('index', StrVector(tuple(df.index))),
    108             ('columns', StrVector(tuple(df.columns)))])
    109         )

~/opt/anaconda3/envs/tf2/lib/python3.7/site-packages/rpy2/robjects/vectors.py in __init__(self, obj)
    383 
    384     def __init__(self, obj):
--> 385         super().__init__(obj)
    386         self._add_rops()
    387 

~/opt/anaconda3/envs/tf2/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in __init__(self, obj)
    510             super().__init__(obj)
    511         elif isinstance(obj, collections.abc.Sized):
--> 512             super().__init__(self.from_object(obj).__sexp__)
    513         else:
    514             raise TypeError('The constructor must be called '

~/opt/anaconda3/envs/tf2/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_object(cls, obj)
    590         except (TypeError, ValueError):
    591             try:
--> 592                 res = cls.from_iterable(obj)
    593             except ValueError:
    594                 msg = ('The class methods from_memoryview() and '

~/opt/anaconda3/envs/tf2/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _(*args, **kwargs)
     42 def _cdata_res_to_rinterface(function):
     43     def _(*args, **kwargs):
---> 44         cdata = function(*args, **kwargs)
     45         # TODO: test cdata is of the expected CType
     46         return _cdata_to_rinterface(cdata)

~/opt/anaconda3/envs/tf2/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in from_iterable(cls, iterable, populate_func, set_elt, cast_value)
    539                     cls._R_TYPE, n)
    540             )
--> 541             populate_func(iterable, r_vector, set_elt, cast_value)
    542         return r_vector
    543 

~/opt/anaconda3/envs/tf2/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _populate_r_vector(iterable, r_vector, set_elt, cast_value)
    461 def _populate_r_vector(iterable, r_vector, set_elt, cast_value):
    462     for i, v in enumerate(iterable):
--> 463         set_elt(r_vector, i, cast_value(v))
    464 
    465 

~/opt/anaconda3/envs/tf2/lib/python3.7/site-packages/rpy2/rinterface_lib/sexp.py in _as_charsxp_cdata(x)
    653         return x.__sexp__._cdata
    654     else:
--> 655         return conversion._str_to_charsxp(x)
    656 
    657 

~/opt/anaconda3/envs/tf2/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _str_to_charsxp(val)
    137         s = rlib.R_NaString
    138     else:
--> 139         cchar = _str_to_cchar(val)
    140         s = rlib.Rf_mkCharCE(cchar, _CE_DEFAULT_VALUE)
    141     return s

~/opt/anaconda3/envs/tf2/lib/python3.7/site-packages/rpy2/rinterface_lib/conversion.py in _str_to_cchar(s, encoding)
    116 def _str_to_cchar(s: str, encoding: str = 'utf-8'):
    117     # TODO: use isStrinb and installTrChar
--> 118     b = s.encode(encoding)
    119     return ffi.new('char[]', b)
    120 

AttributeError: 'int' object has no attribute 'encode'```
alfredsasko commented 4 years ago

It seems that code is broken in custom method transforming pandas dataframe to r matrix. Let me know which version of rpy2 are you using that I can simulate it.

sailinz commented 4 years ago

Thank you for looking into this. My rp2 is installed via Anaconda. The version is 3.3.3.

It seems that code is broken in custom method transforming pandas dataframe to r matrix. Let me know which version of rpy2 are you using that I can simulate it.

tenggaard commented 4 years ago

I am getting the same error with rpy2-3.3.5

claudiomartelli commented 4 years ago

I have the same problem with rpy2 ver 3.3.6

I solved changing code in custom_pca.py as follow:

def _df2mtr(df):
    '''Convert pandas dataframe to r matrix. Category dtype is casted as
    factorVector considering missing values
    (original py2ri function of rpy2 can't handle this properly so far)

    Args:
        data: pandas dataframe of shape (# samples, # features)
              with numeric dtype

    Returns:
        mtr: r matrix of shape (# samples # features)
    '''
    # check arguments
    assert isinstance(df, pd.DataFrame), 'Argument df need to be a pd.Dataframe.'

    # select only numeric columns
    df = df.select_dtypes('number')

    # create and return r matrix
    values = FloatVector(df.values.flatten())
    dimnames = ListVector(
        rlc.OrdDict([('index', StrVector(tuple(str(x) for x in df.index))),
        ('columns', StrVector(tuple(str(x) for x in df.columns)))])
    )

    return robjects.r.matrix(values, nrow=len(df.index), ncol=len(df.columns),
                             dimnames = dimnames, byrow=True)

def _varimax(self, factor_df, **kwargs):
    '''
    varimax rotation of factor matrix

    Args:
        factor_df: factor matrix as pd.DataFrame with shape
                   (# features, # principal components)

    Return:
        rot_factor_df: rotated factor matrix as pd.DataFrame
    '''
    factor_mtr = self._df2mtr(factor_df)
    varimax = robjects.r['varimax']
    rot_factor_mtr = varimax(factor_mtr)
    return pandas2ri.rpy2py(rot_factor_mtr.rx2('loadings'))
ptgoff commented 3 years ago

Are your changes reflected in the latest version? I downloaded (via pip) this week and I'm running into the same issue.