facebook / prophet

Tool for producing high quality forecasts for time series data that has multiple seasonality with linear or non-linear growth.
https://facebook.github.io/prophet
MIT License
18.28k stars 4.51k forks source link

The last line of code is running errors. #2455

Open ghost opened 1 year ago

ghost commented 1 year ago

import numpy as np import pandas as pd import matplotlib.pyplot as plt

dataset = pd.read_csv('50_Startups.csv') X = dataset.iloc[:, :-1].values Y = dataset.iloc[:, 4].values

from sklearn.preprocessing import LabelEncoder, OneHotEncoder LabEn = LabelEncoder() X[:, 3] = LabEn.fit_transform(X[:, 3]) from sklearn.compose import ColumnTransformer ctrans = ColumnTransformer([("encoder", OneHotEncoder(), [3])], remainder='passthrough') X = np.array(ctrans.fit_transform(X)) X = X[:, 1:]

from sklearn.model_selection import train_test_split X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

from sklearn.linear_model import LinearRegression regressor = LinearRegression() regressor.fit(X_train, Y_train) Y_pred = regressor.predict(X_test)

import statsmodels.regression.linear_model as sm X = np.append(arr= np.ones((50, 1)).astype(int), values= X, axis=1) X_opt = X[:, [0, 1, 2, 3, 4, 5]] ols = sm.OLS(endog=Y, exog= X_opt).fit()

AlexandroLuis commented 1 year ago

Here's a fix: import numpy as np import pandas as pd import matplotlib.pyplot as plt

dataset = pd.read_csv('50_Startups.csv') dataset = dataset.dropna()

X = dataset.iloc[:, :-1].values Y = dataset.iloc[:, 4].values

from sklearn.preprocessing import LabelEncoder, OneHotEncoder label_encoder = LabelEncoder() X[:, 3] = label_encoder.fit_transform(X[:, 3]) onehot_encoder = OneHotEncoder(drop='first', sparse=False) X = onehot_encoder.fit_transform(X)

from sklearn.model_selection import train_test_split X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

from sklearn.linear_model import LinearRegression regressor = LinearRegression() regressor.fit(X_train, Y_train)

Y_pred = regressor.predict(X_test)

import statsmodels.api as sm X = np.append(arr=np.ones((X.shape[0], 1)).astype(int), values=X, axis=1) X_opt = X[:, [0, 1, 2, 3, 4, 5]] ols = sm.OLS(endog=Y, exog=X_opt).fit() print(ols.summary())