/root/miniconda3/lib/python3.9/site-packages/pyspark/init.py:139: in wrapper
return func(self, kwargs)
/root/miniconda3/lib/python3.9/site-packages/spark_rapids_ml/classification.py:910: in init
self._set_params(self._input_kwargs)
/root/miniconda3/lib/python3.9/site-packages/spark_rapids_ml/classification.py:1080: in _set_params
super()._set_params(**kwargs)
def _set_params(self: P, **kwargs: Any) -> P:
"""
Set the kwargs as Spark ML Params and/or cuML parameters, while maintaining parameter
and value mappings defined by the _CumlClass.
"""
param_map = self._param_mapping()
# raise error if setting both sides of a param mapping
for spark_param, cuml_param in param_map.items():
if (
spark_param != cuml_param
and spark_param in kwargs
and cuml_param in kwargs
):
raise ValueError(
f"'{cuml_param}' is an alias of '{spark_param}', set one or the other."
)
for k, v in kwargs.items():
if self.hasParam(k):
# standard Spark ML Param
self._set(**{str(k): v}) # type: ignore
self._set_cuml_param(k, v, silent=False)
elif k in self.cuml_params:
# cuml param
self._cuml_params[k] = v
for spark_param, cuml_param in param_map.items():
if k == cuml_param:
# also set matching Spark Param, if exists
# TODO: map cuml values back to Spark equivalents?
try:
self._set(**{str(spark_param): v})
except TypeError:
# Spark params have a converter, which may not work
# as expected. Eg, it can't convert float back to
# str param.
# TypeError: Invalid param value given for param "featureSubsetStrategy".
# Could not convert <class 'float'> to string type
pass
elif k == "num_workers":
# special case, since not a Spark or cuML param
self._num_workers = v
elif k == "float32_inputs":
self._float32_inputs = v
else:
raise ValueError(f"Unsupported param '{k}'.")
E ValueError: Unsupported param 'enable_sparse_data_optim'.
enviroment: L4, colossus machine, spark3.4.2-python39-cuda12.2.2-cuml23.12-ubuntu20.04
error message: this test case located in tests/test_logistic_regression.py
tests/test_logistic_regression.py:299:
/root/miniconda3/lib/python3.9/site-packages/pyspark/init.py:139: in wrapper return func(self, kwargs) /root/miniconda3/lib/python3.9/site-packages/spark_rapids_ml/classification.py:910: in init self._set_params(self._input_kwargs) /root/miniconda3/lib/python3.9/site-packages/spark_rapids_ml/classification.py:1080: in _set_params super()._set_params(**kwargs)
self = LogisticRegression_56e60d0d4432 kwargs = {'elasticNetParam': 0.0, 'enable_sparse_data_optim': False, 'fitIntercept': True, 'num_workers': 2, ...} param_map = {'aggregationDepth': None, 'elasticNetParam': 'l1_ratio', 'family': '', 'fitIntercept': 'fit_intercept', ...} spark_param = 'maxBlockSizeInMB', cuml_param = None k = 'enable_sparse_data_optim', v = False
Analyze: This parameter is existed in following API doc. But not existed in class declaration. File it to track. https://nvidia.github.io/spark-rapids-ml/api/python-draft/api/spark_rapids_ml.classification.LogisticRegression.html#spark_rapids_ml.classification.LogisticRegression