scikit-learn / scikit-learn

scikit-learn: machine learning in Python
https://scikit-learn.org
BSD 3-Clause "New" or "Revised" License
59.9k stars 25.37k forks source link

test_covariance FAILED #29192

Closed wswsmao closed 4 months ago

wswsmao commented 4 months ago

I build 1.3.0 and 1.3.1 and get this error:

sklearn/covariance/tests/test_covariance.py::test_covariance FAILED                                      [  9%]

=================================================== FAILURES ===================================================
_______________________________________________ test_covariance ________________________________________________

    def test_covariance():
        # Tests Covariance module on a simple dataset.
        # test covariance fit from data
        cov = EmpiricalCovariance()
        cov.fit(X)
        emp_cov = empirical_covariance(X)
        assert_array_almost_equal(emp_cov, cov.covariance_, 4)
        assert_almost_equal(cov.error_norm(emp_cov), 0)
        assert_almost_equal(cov.error_norm(emp_cov, norm="spectral"), 0)
        assert_almost_equal(cov.error_norm(emp_cov, norm="frobenius"), 0)
        assert_almost_equal(cov.error_norm(emp_cov, scaling=False), 0)
        assert_almost_equal(cov.error_norm(emp_cov, squared=False), 0)
        with pytest.raises(NotImplementedError):
            cov.error_norm(emp_cov, norm="foo")
        # Mahalanobis distances computation test
        mahal_dist = cov.mahalanobis(X)
        assert np.amin(mahal_dist) > 0

        # test with n_features = 1
        X_1d = X[:, 0].reshape((-1, 1))
        cov = EmpiricalCovariance()
>       cov.fit(X_1d)

sklearn/covariance/tests/test_covariance.py:58:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
sklearn/base.py:1151: in wrapper
    return fit_method(estimator, *args, **kwargs)
sklearn/covariance/_empirical_covariance.py:247: in fit
    self._set_covariance(covariance)
sklearn/covariance/_empirical_covariance.py:205: in _set_covariance
    self.precision_ = linalg.pinvh(covariance, check_finite=False)
/usr/lib64/python3.11/site-packages/scipy/linalg/_basic.py:1536: in pinvh
    s, u = _decomp.eigh(a, lower=lower, check_finite=False)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

a = array([[0.00226244]]), b = None, lower = True, eigvals_only = False, overwrite_a = False
overwrite_b = False, turbo = False, eigvals = None, type = 1, check_finite = False, subset_by_index = None
subset_by_value = None, driver = 'evr'

    def eigh(a, b=None, lower=True, eigvals_only=False, overwrite_a=False,
             overwrite_b=False, turbo=False, eigvals=None, type=1,
             check_finite=True, subset_by_index=None, subset_by_value=None,
             driver=None):
        """
        Solve a standard or generalized eigenvalue problem for a complex
        Hermitian or real symmetric matrix.

        Find eigenvalues array ``w`` and optionally eigenvectors array ``v`` of
        array ``a``, where ``b`` is positive definite such that for every
        eigenvalue λ (i-th entry of w) and its eigenvector ``vi`` (i-th column of
        ``v``) satisfies::

                          a @ vi = λ * b @ vi
            vi.conj().T @ a @ vi = λ
            vi.conj().T @ b @ vi = 1

        In the standard problem, ``b`` is assumed to be the identity matrix.

        Parameters
        ----------
        a : (M, M) array_like
            A complex Hermitian or real symmetric matrix whose eigenvalues and
            eigenvectors will be computed.
        b : (M, M) array_like, optional
            A complex Hermitian or real symmetric definite positive matrix in.
            If omitted, identity matrix is assumed.
        lower : bool, optional
            Whether the pertinent array data is taken from the lower or upper
            triangle of ``a`` and, if applicable, ``b``. (Default: lower)
        eigvals_only : bool, optional
            Whether to calculate only eigenvalues and no eigenvectors.
            (Default: both are calculated)
        subset_by_index : iterable, optional
            If provided, this two-element iterable defines the start and the end
            indices of the desired eigenvalues (ascending order and 0-indexed).
            To return only the second smallest to fifth smallest eigenvalues,
            ``[1, 4]`` is used. ``[n-3, n-1]`` returns the largest three. Only
            available with "evr", "evx", and "gvx" drivers. The entries are
            directly converted to integers via ``int()``.
        subset_by_value : iterable, optional
            If provided, this two-element iterable defines the half-open interval
            ``(a, b]`` that, if any, only the eigenvalues between these values
            are returned. Only available with "evr", "evx", and "gvx" drivers. Use
            ``np.inf`` for the unconstrained ends.
        driver : str, optional
            Defines which LAPACK driver should be used. Valid options are "ev",
            "evd", "evr", "evx" for standard problems and "gv", "gvd", "gvx" for
            generalized (where b is not None) problems. See the Notes section.
            The default for standard problems is "evr". For generalized problems,
            "gvd" is used for full set, and "gvx" for subset requested cases.
        type : int, optional
            For the generalized problems, this keyword specifies the problem type
            to be solved for ``w`` and ``v`` (only takes 1, 2, 3 as possible
            inputs)::

                1 =>     a @ v = w @ b @ v
                2 => a @ b @ v = w @ v
                3 => b @ a @ v = w @ v

            This keyword is ignored for standard problems.
        overwrite_a : bool, optional
            Whether to overwrite data in ``a`` (may improve performance). Default
            is False.
        overwrite_b : bool, optional
            Whether to overwrite data in ``b`` (may improve performance). Default
            is False.
        check_finite : bool, optional
            Whether to check that the input matrices contain only finite numbers.
            Disabling may give a performance gain, but may result in problems
            (crashes, non-termination) if the inputs do contain infinities or NaNs.
        turbo : bool, optional, deprecated
                .. deprecated:: 1.5.0
                    `eigh` keyword argument `turbo` is deprecated in favour of
                    ``driver=gvd`` keyword instead and will be removed in SciPy
                    1.12.0.
        eigvals : tuple (lo, hi), optional, deprecated
                .. deprecated:: 1.5.0
                    `eigh` keyword argument `eigvals` is deprecated in favour of
                    `subset_by_index` keyword instead and will be removed in SciPy
                    1.12.0.

        Returns
        -------
        w : (N,) ndarray
            The N (1<=N<=M) selected eigenvalues, in ascending order, each
            repeated according to its multiplicity.
        v : (M, N) ndarray
            (if ``eigvals_only == False``)

        Raises
        ------
        LinAlgError
            If eigenvalue computation does not converge, an error occurred, or
            b matrix is not definite positive. Note that if input matrices are
            not symmetric or Hermitian, no error will be reported but results will
            be wrong.

        See Also
        --------
        eigvalsh : eigenvalues of symmetric or Hermitian arrays
        eig : eigenvalues and right eigenvectors for non-symmetric arrays
        eigh_tridiagonal : eigenvalues and right eiegenvectors for
            symmetric/Hermitian tridiagonal matrices

        Notes
        -----
        This function does not check the input array for being Hermitian/symmetric
        in order to allow for representing arrays with only their upper/lower
        triangular parts. Also, note that even though not taken into account,
        finiteness check applies to the whole array and unaffected by "lower"
        keyword.

        This function uses LAPACK drivers for computations in all possible keyword
        combinations, prefixed with ``sy`` if arrays are real and ``he`` if
        complex, e.g., a float array with "evr" driver is solved via
        "syevr", complex arrays with "gvx" driver problem is solved via "hegvx"
        etc.

        As a brief summary, the slowest and the most robust driver is the
        classical ``<sy/he>ev`` which uses symmetric QR. ``<sy/he>evr`` is seen as
        the optimal choice for the most general cases. However, there are certain
        occasions that ``<sy/he>evd`` computes faster at the expense of more
        memory usage. ``<sy/he>evx``, while still being faster than ``<sy/he>ev``,
        often performs worse than the rest except when very few eigenvalues are
        requested for large arrays though there is still no performance guarantee.

        For the generalized problem, normalization with respect to the given
        type argument::

                type 1 and 3 :      v.conj().T @ a @ v = w
                type 2       : inv(v).conj().T @ a @ inv(v) = w

                type 1 or 2  :      v.conj().T @ b @ v  = I
                type 3       : v.conj().T @ inv(b) @ v  = I

        Examples
        --------
        >>> import numpy as np
        >>> from scipy.linalg import eigh
        >>> A = np.array([[6, 3, 1, 5], [3, 0, 5, 1], [1, 5, 6, 2], [5, 1, 2, 2]])
        >>> w, v = eigh(A)
        >>> np.allclose(A @ v - v @ np.diag(w), np.zeros((4, 4)))
        True

        Request only the eigenvalues

        >>> w = eigh(A, eigvals_only=True)

        Request eigenvalues that are less than 10.

        >>> A = np.array([[34, -4, -10, -7, 2],
        ...               [-4, 7, 2, 12, 0],
        ...               [-10, 2, 44, 2, -19],
        ...               [-7, 12, 2, 79, -34],
        ...               [2, 0, -19, -34, 29]])
        >>> eigh(A, eigvals_only=True, subset_by_value=[-np.inf, 10])
        array([6.69199443e-07, 9.11938152e+00])

        Request the second smallest eigenvalue and its eigenvector

        >>> w, v = eigh(A, subset_by_index=[1, 1])
        >>> w
        array([9.11938152])
        >>> v.shape  # only a single column is returned
        (5, 1)

        """
        if turbo:
            warnings.warn("Keyword argument 'turbo' is deprecated in favour of '"
                          "driver=gvd' keyword instead and will be removed in "
                          "SciPy 1.12.0.",
                          DeprecationWarning, stacklevel=2)
        if eigvals:
            warnings.warn("Keyword argument 'eigvals' is deprecated in favour of "
                          "'subset_by_index' keyword instead and will be removed "
                          "in SciPy 1.12.0.",
                          DeprecationWarning, stacklevel=2)

        # set lower
        uplo = 'L' if lower else 'U'
        # Set job for Fortran routines
        _job = 'N' if eigvals_only else 'V'

        drv_str = [None, "ev", "evd", "evr", "evx", "gv", "gvd", "gvx"]
        if driver not in drv_str:
            raise ValueError('"{}" is unknown. Possible values are "None", "{}".'
                             ''.format(driver, '", "'.join(drv_str[1:])))

        a1 = _asarray_validated(a, check_finite=check_finite)
        if len(a1.shape) != 2 or a1.shape[0] != a1.shape[1]:
            raise ValueError('expected square "a" matrix')
        overwrite_a = overwrite_a or (_datacopied(a1, a))
        cplx = True if iscomplexobj(a1) else False
        n = a1.shape[0]
        drv_args = {'overwrite_a': overwrite_a}

        if b is not None:
            b1 = _asarray_validated(b, check_finite=check_finite)
            overwrite_b = overwrite_b or _datacopied(b1, b)
            if len(b1.shape) != 2 or b1.shape[0] != b1.shape[1]:
                raise ValueError('expected square "b" matrix')

            if b1.shape != a1.shape:
                raise ValueError("wrong b dimensions {}, should "
                                 "be {}".format(b1.shape, a1.shape))

            if type not in [1, 2, 3]:
                raise ValueError('"type" keyword only accepts 1, 2, and 3.')

            cplx = True if iscomplexobj(b1) else (cplx or False)
            drv_args.update({'overwrite_b': overwrite_b, 'itype': type})

        # backwards-compatibility handling
        subset_by_index = subset_by_index if (eigvals is None) else eigvals

        subset = (subset_by_index is not None) or (subset_by_value is not None)

        # Both subsets can't be given
        if subset_by_index and subset_by_value:
            raise ValueError('Either index or value subset can be requested.')

        # Take turbo into account if all conditions are met otherwise ignore
        if turbo and b is not None:
            driver = 'gvx' if subset else 'gvd'

        # Check indices if given
        if subset_by_index:
            lo, hi = (int(x) for x in subset_by_index)
            if not (0 <= lo <= hi < n):
                raise ValueError('Requested eigenvalue indices are not valid. '
                                 'Valid range is [0, {}] and start <= end, but '
                                 'start={}, end={} is given'.format(n-1, lo, hi))
            # fortran is 1-indexed
            drv_args.update({'range': 'I', 'il': lo + 1, 'iu': hi + 1})

        if subset_by_value:
            lo, hi = subset_by_value
            if not (-inf <= lo < hi <= inf):
                raise ValueError('Requested eigenvalue bounds are not valid. '
                                 'Valid range is (-inf, inf) and low < high, but '
                                 'low={}, high={} is given'.format(lo, hi))

            drv_args.update({'range': 'V', 'vl': lo, 'vu': hi})

        # fix prefix for lapack routines
        pfx = 'he' if cplx else 'sy'

        # decide on the driver if not given
        # first early exit on incompatible choice
        if driver:
            if b is None and (driver in ["gv", "gvd", "gvx"]):
                raise ValueError('{} requires input b array to be supplied '
                                 'for generalized eigenvalue problems.'
                                 ''.format(driver))
            if (b is not None) and (driver in ['ev', 'evd', 'evr', 'evx']):
                raise ValueError('"{}" does not accept input b array '
                                 'for standard eigenvalue problems.'
                                 ''.format(driver))
            if subset and (driver in ["ev", "evd", "gv", "gvd"]):
                raise ValueError('"{}" cannot compute subsets of eigenvalues'
                                 ''.format(driver))

        # Default driver is evr and gvd
        else:
            driver = "evr" if b is None else ("gvx" if subset else "gvd")

        lwork_spec = {
                      'syevd': ['lwork', 'liwork'],
                      'syevr': ['lwork', 'liwork'],
                      'heevd': ['lwork', 'liwork', 'lrwork'],
                      'heevr': ['lwork', 'lrwork', 'liwork'],
                      }

        if b is None:  # Standard problem
            drv, drvlw = get_lapack_funcs((pfx + driver, pfx+driver+'_lwork'),
                                          [a1])
            clw_args = {'n': n, 'lower': lower}
            if driver == 'evd':
                clw_args.update({'compute_v': 0 if _job == "N" else 1})

            lw = _compute_lwork(drvlw, **clw_args)
            # Multiple lwork vars
            if isinstance(lw, tuple):
                lwork_args = dict(zip(lwork_spec[pfx+driver], lw))
            else:
                lwork_args = {'lwork': lw}

            drv_args.update({'lower': lower, 'compute_v': 0 if _job == "N" else 1})
>           w, v, *other_args, info = drv(a=a1, **drv_args, **lwork_args)
E           _flapack.error: (liwork>=max(1,10*n)||liwork==-1) failed for 10th keyword liwork: dsyevr:liwork=1

/usr/lib64/python3.11/site-packages/scipy/linalg/_decomp.py:560: error
adrinjalali commented 4 months ago

We don't support those old versions. Please let us know if the issue exists in the latest dev version.