johnnymdoubleu / lassoSSNAL

Semismooth Newton Augmented Langrangian Method implemented in R
GNU General Public License v3.0
1 stars 0 forks source link

Polynomial Basis Expansion (in Python) #9

Open johnnymdoubleu opened 2 years ago

johnnymdoubleu commented 2 years ago
import pandas as pd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.datasets import dump_svmlight_file as writelibsvm
from sklearn.datasets import load_svmlight_file as readlibsvm

def basisExpand(filepath, degree, exclude=None):
    loaded = readlibsvm(filepath)
    df = pd.DataFrame.sparse.from_spmatrix(loaded[0])
    print(f"The Dimension of original data is {loaded[0].shape}")
    if exclude is not None:
        df = df.iloc[:,exclude:]
        print(f"The new Dimension is {df.shape}")
    #check loaded matrix
    #polynomial basis(feature) Expansion
    p = PolynomialFeatures(degree).fit(df)
    sparseMat = pd.DataFrame.sparse.from_spmatrix(p.transform(df))

    #output sparse matrix in libsvm format
    filename = filepath.split('/')[-1].split('.')[0]
    writelibsvm(sparseMat, loaded[1], f=f"expanded/{filename}{degree}.txt",zero_based=True)

    output = {
        "dimensiaon" : sparseMat.shape,
        "degree" : degree,
        "excluded" : exclude,
        "Matrix" : sparseMat
    }
    return output

print(basisExpand("data/pyrim.txt", 5))
print(basisExpand("data/triazines.txt", 4))
print(basisExpand("data/abalone.txt", 7))
print(basisExpand("data/bodyfat.txt", 7))
print(basisExpand("data/housing.txt", 7))
print(basisExpand("data/mpg.txt", 7))
print(basisExpand("data/space_ga.txt", 9))