rsquaredacademy / olsrr

Tools for developing OLS regression models
https://olsrr.rsquaredacademy.com/
Other
102 stars 22 forks source link

Handle extremely significant variables #173

Closed aravindhebbali closed 3 years ago

aravindhebbali commented 3 years ago

Thanks to Serhat Akay for sharing the below reprex and bringing this issue to our notice.

ccpp<-read.csv("data.csv") #this publicly available data is available @
# https://archive.ics.uci.edu/ml/machine-learning-databases/00294/CCPP.zip
head(ccpp)
#>      AT     V      AP    RH     PE
#> 1  8.34 40.77 1010.84 90.01 480.48
#> 2 23.64 58.49 1011.40 74.20 445.75
#> 3 29.74 56.90 1007.15 41.91 438.76
#> 4 19.07 49.69 1007.22 76.79 453.09
#> 5 11.80 40.66 1017.13 97.20 464.43
#> 6 13.97 39.16 1016.05 84.60 470.96
library(olsrr)
#> 
#> Attaching package: 'olsrr'
#> The following object is masked from 'package:datasets':
#> 
#>     rivers
modelfirst<-lm(PE~AT+V+AP+RH,data=ccpp)

ols_step_all_possible(modelfirst)
#>    Index N Predictors  R-Square Adj. R-Square  Mallow's Cp
#> 1      1 1         AT 0.8989476     0.8989370   3988.75094
#> 2      2 1          V 0.7565178     0.7564923  23090.87680
#> 3      3 1         AP 0.2687687     0.2686922  88505.87191
#> 4      4 1         RH 0.1519394     0.1518508 104174.54690
#> 7      5 2      AT RH 0.9209481     0.9209315   1040.13314
#> 5      6 2       AT V 0.9157294     0.9157118   1740.03591
#> 6      7 2      AT AP 0.9008123     0.9007916   3740.66055
#> 8      8 2       V AP 0.7869260     0.7868814  19014.65188
#> 9      9 2       V RH 0.7720134     0.7719657  21014.67035
#> 10    10 2      AP RH 0.3842741     0.3841454  73016.73320
#> 12    11 3    AT V RH 0.9283748     0.9283524     46.08722
#> 13    12 3   AT AP RH 0.9210025     0.9209778   1034.82988
#> 11    13 3    AT V AP 0.9179754     0.9179497   1440.81060
#> 14    14 3    V AP RH 0.8039582     0.8038967  16732.35918
#> 15    15 4 AT V AP RH 0.9286961     0.9286663      5.00000

ols_step_forward_p(modelfirst)
#> Warning in min(pvals, na.rm = TRUE): no non-missing arguments to min; returning
#> Inf
#> Error in if (pvals[minp] <= penter) {: argument is of length zero

ols_step_backward_p(modelfirst)
#> [1] "No variables have been removed from the model."

ols_step_both_p(modelfirst,pent=0.05,prem=0.1)
#> Warning in min(pvals, na.rm = TRUE): no non-missing arguments to min; returning
#> Inf
#> Error in if (pvals[minp] <= pent) {: argument is of length zero

Created on 2020-12-27 by the reprex package (v0.3.0)
aravindhebbali commented 3 years ago

# load libraries
library(readxl)
library(olsrr)
#> 
#> Attaching package: 'olsrr'
#> The following object is masked from 'package:datasets':
#> 
#>     rivers

# data
ccpp  <- read_excel("J:/R/blog_research/data.xlsx")

# model
model <- lm(PE ~ AT + V + AP + RH, data = ccpp)

# variable selection
# forward selection
ols_step_forward_p(model, penter = 0.3)
#> 
#>                               Selection Summary                               
#> -----------------------------------------------------------------------------
#>         Variable                  Adj.                                           
#> Step    Entered     R-Square    R-Square      C(p)          AIC         RMSE     
#> -----------------------------------------------------------------------------
#>    1    AT            0.8989      0.8989    3988.7509    59518.4764    5.4251    
#>    2    RH            0.9209      0.9209    1040.1331    57171.2040    4.7983    
#>    3    V             0.9284      0.9284      46.0872    56229.2420    4.5674    
#>    4    AP            0.9287      0.9287       5.0000    56188.2290    4.5571    
#> -----------------------------------------------------------------------------

# both direction
ols_step_both_p(model, pent = 0.05, prem = 0.1)
#> 
#>                                Stepwise Selection Summary                                 
#> -----------------------------------------------------------------------------------------
#>                      Added/                   Adj.                                           
#> Step    Variable    Removed     R-Square    R-Square      C(p)          AIC         RMSE     
#> -----------------------------------------------------------------------------------------
#>    1       AT       addition       0.899       0.899    3988.7510    59518.4764    5.4251    
#>    2       RH       addition       0.921       0.921    1040.1330    57171.2040    4.7983    
#>    3       V        addition       0.928       0.928      46.0870    56229.2420    4.5674    
#>    4       AP       addition       0.929       0.929       5.0000    56188.2290    4.5571    
#> -----------------------------------------------------------------------------------------

# backward selection
ols_step_backward_p(model)
#> [1] "No variables have been removed from the model."

Created on 2020-12-31 by the reprex package (v0.3.0)