Open knageswara78 opened 5 years ago
from sklearn.ensemble import RandomForestRegressor train=train.drop(['Item_Identifier', 'Outlet_Identifier'], axis=1) model = RandomForestRegressor(random_state=1, max_depth=10) train.info() # Find categorical variables cols = train.columns num_cols = train._get_numeric_data().columns num_cols list(set(cols) - set(num_cols)) #train=pd.get_dummies(train) model.fit(train,train.Item_Outlet_Sales) #After fitting the model, plot the feature importance graph: features = train.columns importances = model.feature_importances_ indices = np.argsort(importances)[-9:] # top 10 features plt.title('Feature Importances') plt.barh(range(len(indices)), importances[indices], color='b', align='center') plt.yticks(range(len(indices)), [features[i] for i in indices]) plt.xlabel('Relative Importance') plt.show()
Random Forest