knageswara78 / Python_Solutions

0 stars 0 forks source link

Random Forest Variable reduction #9

Open knageswara78 opened 5 years ago

knageswara78 commented 5 years ago

Random Forest

from sklearn.ensemble import RandomForestRegressor
train=train.drop(['Item_Identifier', 'Outlet_Identifier'], axis=1)
model = RandomForestRegressor(random_state=1, max_depth=10)

train.info()

# Find categorical variables
cols = train.columns
num_cols = train._get_numeric_data().columns
num_cols
list(set(cols) - set(num_cols))

#train=pd.get_dummies(train)
model.fit(train,train.Item_Outlet_Sales)

#After fitting the model, plot the feature importance graph:
features = train.columns
importances = model.feature_importances_
indices = np.argsort(importances)[-9:]  # top 10 features
plt.title('Feature Importances')
plt.barh(range(len(indices)), importances[indices], color='b', align='center')
plt.yticks(range(len(indices)), [features[i] for i in indices])
plt.xlabel('Relative Importance')
plt.show()