intel / scikit-learn-intelex

Intel(R) Extension for Scikit-learn is a seamless way to speed up your Scikit-learn application
https://intel.github.io/scikit-learn-intelex/
Apache License 2.0
1.23k stars 175 forks source link

Change in behavior due to #19 #23

Closed oleksandr-pavlyk closed 5 years ago

oleksandr-pavlyk commented 5 years ago

@fschlimb Binary search implicated #19 in breakage affecting sklearn patches (#15). I left a comment in that PR, but thought it's likely better to file a separate issue.

Specifically, the following script

# d4py_log_loss.py
import numpy as np
import daal4py

def getFPType(X):
    dt = getattr(X, 'dtype', None)
    if dt == np.double:
        return "double"
    elif dt == np.single:
        return "float"
    else:
        raise ValueError("Input array has unexpected dtype = {}".format(dt))

def make2d(X):
    if np.isscalar(X):
        X = np.asarray(X)[np.newaxis, np.newaxis]
    elif isinstance(X, np.ndarray) and X.ndim == 1:
        X = X.reshape((X.size, 1))
    return X

def _resultsToCompute_string(value=True, gradient=True, hessian=False):
    results_needed = []
    if value:
        results_needed.append('value')
    if gradient:
        results_needed.append('gradient')
    if hessian:
        results_needed.append('hessian')

    return '|'.join(results_needed)

def _daal4py_logistic_loss_extra_args(
        nClasses_unused, beta, X, y, l1=0.0, l2=0.0, fit_intercept=True, 
        value=True, gradient=True, hessian=False):
    X = make2d(X)
    nSamples, nFeatures = X.shape

    y = make2d(y)
    beta = make2d(beta)
    n = X.shape[0]

    results_to_compute = _resultsToCompute_string(value=value, 
        gradient=gradient, hessian=hessian)

    objective_function_algorithm_instance = daal4py.optimization_solver_logistic_loss(
        numberOfTerms = n,
        fptype = getFPType(X),
        method = 'defaultDense',
        interceptFlag = fit_intercept,
        penaltyL1 = l1 / n,
        penaltyL2 = l2 / n,
        resultsToCompute = results_to_compute
    )
    objective_function_algorithm_instance.setup(X, y, beta)

    return (objective_function_algorithm_instance, X, y, n)

def _daal4py_loss_and_grad(beta, objF_instance, X, y, n):
    beta_ = make2d(beta)
    res = objF_instance.compute(X, y, beta_)
    gr = res.gradientIdx
    if gr is None:
        print(X)
        print(y)
        print(beta_)
    gr *= n
    v = res.valueIdx
    v *= n
    return (v, gr)

if __name__ == '__main__':
    X, Y1 = np.array([[-1, 0], [0, 1], [1, 1]], dtype=np.double), np.array([0, 1, 1], np.double)
    X = X[-1:]
    y = Y1[-1:]

    beta = np.zeros(3, dtype=np.double)

    objF, X2d, y2d, n = _daal4py_logistic_loss_extra_args(
        1, beta, X, y, l1=0.0, l2=1., 
        value=True, gradient=True, hessian=False)
    _daal4py_loss_and_grad(beta, objF, X2d, y2d, n)

runs fine in daal4py built from 2133108197e6346b89a75cbc07fbbcc7fa747bef, but fails with an error in package built from 08f13014d7bd4c85faaf3fe0dc4896c0274ab523.

Specifically, res.gradientIdx returns None, rather than an array.

fschlimb commented 5 years ago

Fixed with #24