Is there any method to calculate batch linear regression in GPU efficiently???

isaac-you commented 4 years ago

I have written a cpu version of massive regression

from numba import jit 
import numpy as np 

@jit(nopython=True)
def resid_lstsq(x,y):
    ret = np.linalg.lstsq(x,y)
    coef = ret[0]
    yHat = np.dot(x,coef)
    residuls = y - yHat

    return residuls

#np.dot((np.dot(np.linalg.inv(np.dot(x.T,x)),x.T)),y)

@jit(nopython=True)
def resid_matrixReverse(x,y):

    coef = np.dot(np.dot(np.linalg.inv(np.dot(x.T,x)),x.T),y)
    yHat = np.dot(x,coef)
    residuls = y - yHat

    return residuls

@jit(nopython=True)
def resid_matrixSolve(x,y):

    coef = np.linalg.solve(np.dot(x.T, x), np.dot(x.T, y))
    yHat = np.dot(x,coef)
    residuls = y - yHat

    return residuls

def residCalculatorVector(x,y,method='matrixReverse'):
    '''
    get the residuls of regression for one period
    x: the independent variables
    y: the dependent variables
    '''
    if method == 'lstsq':
        return resid_lstsq(x,y)

    elif method == 'matrixReverse':
        return resid_matrixReverse(x,y)

    elif method == 'matrixSolve':
        return resid_matrixSolve(x,y)

    elif method == 'sklearn':
        pass 

def residCalculatorTensor(newFactorTesor,oldFactorTensor,method='matrixReverse'):
    '''
    calculate the residuals between one new factor with several old factors for each date and each stocks
    newFactorTesor: (i,j,k)(i:stock ID, j: trade date, k:factors+target)
    oldFactorTensor (i,j,k)(i:stock ID, j: trade date, k:factors)
    return: the residual tensor for each factors and each trade date and each factors 
    '''

    stockNum = newFactorTesor.shape[0]      #stock number in the period  
    trDateNum = newFactorTesor.shape[1]     #trade date number 
    newFacNum = newFactorTesor.shape[2]     #new factor number + 1 (plus target) 

    ResidTensor = np.full(fill_value=np.nan,shape=(stockNum,trDateNum,newFacNum))   #the last column is the residul of target 

    for factor_i in range(newFacNum):

        for trDate_j in range(trDateNum):
            y_i_j = newFactorTesor[:,trDate_j,factor_i]     #factor_i in date_j for all stocks 
            x_j = oldFactorTensor[:,trDate_j,:]     #old factors in date_j for all stocks
            idx = np.where(~np.isnan(y_i_j))[0] #the index which is NAs
            y_i_j_ = y_i_j[idx]     #rows without NA
            x_j_ = x_j[idx]         #rows without NA
            factorResid_i_j = residCalculatorVector(x_j_,y_i_j_,method=method)
            ResidTensor[idx,trDate_j,factor_i] = factorResid_i_j

    return ResidTensor

newF_01 = np.random.randn(3000,500,50)
oldFTen_01 = np.random.randn(3000,500,100)

%time ans = residCalculatorTensor(newFactorTesor=newF_01,oldFactorTensor=oldFTen_01,method='matrixSolve')

CPU times: user 21min 18s, sys: 51.2 s, total: 22min 9s
Wall time: 1min 23s

I think the CPU version is too slow for such massive regression , And I found that your code contains some function can calculate batch regression by GPU Can you teach me how to calculate such problem efficiently ?? Thank you so much .

lebedov commented 4 years ago

You may wish to look into using the batches least squares linear solver in CUBLAS - I just added wrappers for it to skcuda.

isaac-you commented 4 years ago

Thank you, I will find the wrappers of skcuda for batches OLS.

lebedov / scikit-cuda

Is there any method to calculate batch linear regression in GPU efficiently??? #287