Open johnnymdoubleu opened 2 years ago
import pandas as pd from sklearn.preprocessing import PolynomialFeatures from sklearn.datasets import dump_svmlight_file as writelibsvm from sklearn.datasets import load_svmlight_file as readlibsvm def basisExpand(filepath, degree, exclude=None): loaded = readlibsvm(filepath) df = pd.DataFrame.sparse.from_spmatrix(loaded[0]) print(f"The Dimension of original data is {loaded[0].shape}") if exclude is not None: df = df.iloc[:,exclude:] print(f"The new Dimension is {df.shape}") #check loaded matrix #polynomial basis(feature) Expansion p = PolynomialFeatures(degree).fit(df) sparseMat = pd.DataFrame.sparse.from_spmatrix(p.transform(df)) #output sparse matrix in libsvm format filename = filepath.split('/')[-1].split('.')[0] writelibsvm(sparseMat, loaded[1], f=f"expanded/{filename}{degree}.txt",zero_based=True) output = { "dimensiaon" : sparseMat.shape, "degree" : degree, "excluded" : exclude, "Matrix" : sparseMat } return output print(basisExpand("data/pyrim.txt", 5)) print(basisExpand("data/triazines.txt", 4)) print(basisExpand("data/abalone.txt", 7)) print(basisExpand("data/bodyfat.txt", 7)) print(basisExpand("data/housing.txt", 7)) print(basisExpand("data/mpg.txt", 7)) print(basisExpand("data/space_ga.txt", 9))