pyitlib0.2.3 does not support newer scikit-learn versions which our organization uses.
Changes
Update .gitignore to ignore virtual environments, pycache, etc.
Avoid using np.float, it should be np.float64.
_sanitise_array_input: updated to force the object dtype numpy arrays to be re-coverted once None values have been replaced. The exception below happens when the encoder attempts to transform the array that has an object datatype.
tests/test_discrete_random_variable.py:1678:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
pyitlib/discrete_random_variable.py:2253: in information_variation
H1 = entropy_conditional(X, Y, cartesian_product, base, fill_value,
pyitlib/discrete_random_variable.py:3490: in entropy_conditional
S, fill_value = _map_observations_to_integers((X, Alphabet_X,
pyitlib/discrete_random_variable.py:4687: in _map_observations_to_integers
Symbol_matrices = [L.transform(A.ravel()).reshape(A.shape) for A in
pyitlib/discrete_random_variable.py:4687: in <listcomp>
Symbol_matrices = [L.transform(A.ravel()).reshape(A.shape) for A in
venv/lib/python3.8/site-packages/sklearn/preprocessing/_label.py:138: in transform
return _encode(y, uniques=self.classes_)
venv/lib/python3.8/site-packages/sklearn/utils/_encode.py:187: in _encode
diff = _check_unknown(values, uniques)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
def _check_unknown(values, known_values, return_mask=False):
"""
Helper function to check for unknowns in values to be encoded.
Uses pure python method for object dtype, and numpy method for
all other dtypes.
Parameters
----------
values : array
Values to check for unknowns.
known_values : array
Known values. Must be unique.
return_mask : bool, default=False
If True, return a mask of the same shape as `values` indicating
the valid values.
Returns
-------
diff : list
The unique values present in `values` and not in `know_values`.
valid_mask : boolean array
Additionally returned if ``return_mask=True``.
"""
valid_mask = None
if values.dtype.kind in "OUS":
values_set = set(values)
values_set, missing_in_values = _extract_missing(values_set)
uniques_set = set(known_values)
uniques_set, missing_in_uniques = _extract_missing(uniques_set)
diff = values_set - uniques_set
nan_in_diff = missing_in_values.nan and not missing_in_uniques.nan
none_in_diff = missing_in_values.none and not missing_in_uniques.none
def is_valid(value):
return (
value in uniques_set
or missing_in_uniques.none
and value is None
or missing_in_uniques.nan
and is_scalar_nan(value)
)
if return_mask:
if diff or nan_in_diff or none_in_diff:
valid_mask = np.array([is_valid(value) for value in values])
else:
valid_mask = np.ones(len(values), dtype=bool)
diff = list(diff)
if none_in_diff:
diff.append(None)
if nan_in_diff:
diff.append(np.nan)
else:
unique_values = np.unique(values)
diff = np.setdiff1d(unique_values, known_values, assume_unique=True)
if return_mask:
if diff.size:
valid_mask = np.in1d(values, known_values)
else:
valid_mask = np.ones(len(values), dtype=bool)
# check for nans in the known_values
> if np.isnan(known_values).any():
E TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
Description
pyitlib
0.2.3
does not support newerscikit-learn
versions which our organization uses.Changes
.gitignore
to ignore virtual environments, pycache, etc.np.float
, it should benp.float64
._sanitise_array_input
: updated to force theobject
dtype numpy arrays to be re-coverted onceNone
values have been replaced. The exception below happens when the encoder attempts to transform the array that has anobject
datatype.