Open ghost opened 1 year ago
Here's a fix: import numpy as np import pandas as pd import matplotlib.pyplot as plt
dataset = pd.read_csv('50_Startups.csv') dataset = dataset.dropna()
X = dataset.iloc[:, :-1].values Y = dataset.iloc[:, 4].values
from sklearn.preprocessing import LabelEncoder, OneHotEncoder label_encoder = LabelEncoder() X[:, 3] = label_encoder.fit_transform(X[:, 3]) onehot_encoder = OneHotEncoder(drop='first', sparse=False) X = onehot_encoder.fit_transform(X)
from sklearn.model_selection import train_test_split X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
from sklearn.linear_model import LinearRegression regressor = LinearRegression() regressor.fit(X_train, Y_train)
Y_pred = regressor.predict(X_test)
import statsmodels.api as sm X = np.append(arr=np.ones((X.shape[0], 1)).astype(int), values=X, axis=1) X_opt = X[:, [0, 1, 2, 3, 4, 5]] ols = sm.OLS(endog=Y, exog=X_opt).fit() print(ols.summary())
import numpy as np import pandas as pd import matplotlib.pyplot as plt
dataset = pd.read_csv('50_Startups.csv') X = dataset.iloc[:, :-1].values Y = dataset.iloc[:, 4].values
from sklearn.preprocessing import LabelEncoder, OneHotEncoder LabEn = LabelEncoder() X[:, 3] = LabEn.fit_transform(X[:, 3]) from sklearn.compose import ColumnTransformer ctrans = ColumnTransformer([("encoder", OneHotEncoder(), [3])], remainder='passthrough') X = np.array(ctrans.fit_transform(X)) X = X[:, 1:]
from sklearn.model_selection import train_test_split X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
from sklearn.linear_model import LinearRegression regressor = LinearRegression() regressor.fit(X_train, Y_train) Y_pred = regressor.predict(X_test)
import statsmodels.regression.linear_model as sm X = np.append(arr= np.ones((50, 1)).astype(int), values= X, axis=1) X_opt = X[:, [0, 1, 2, 3, 4, 5]] ols = sm.OLS(endog=Y, exog= X_opt).fit()