Closed gundalav closed 5 years ago
I have the following importance_frame:
importance_frame
importance_frame <- structure(list(variable = structure(1:20, .Label = c( "A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y" ), class = "factor"), mean_min_depth = c( 1.9761861386314, 2.5220853029533, 2.15539883255869, 1.61935396654558, 1.45123463631321, 1.53296953170083, 1.77518115811586, 1.52151167552988, 1.89182019096144, 2.14429040818413, 1.26326405034901, 1.93502763567771, 1.26898183744519, 2.02060547195198, 1.54217481302459, 1.67384650439192, 1.5485857685783, 2.09727178410599, 2.75747046937195, 2.35864404092358 ), times_a_root = c( 23.4, 5.5, 13.3, 27.9, 39.3, 31.3, 29.7, 34.2, 24.2, 13, 43, 22.7, 45.3, 16.8, 31.5, 30.1, 33.5, 19.3, 1.75, 14.6 ), no_of_nodes = c( 68.1, 32.6, 62.2, 103.2, 103.3, 104.7, 75.6, 105.7, 72.4, 64.6, 118.4, 73.6, 116.6, 74.5, 104.6, 95.6, 103.2, 60.3, 8.875, 36.1 ), no_of_trees = c( 65.1, 32.3, 59.8, 96.1, 94.7, 99.9, 74.8, 100.6, 69.4, 62.8, 111.2, 71.2, 108.3, 72.4, 98.8, 90, 97.6, 58.4, 8.875, 35.9 ), p_value = c( 0.669119230058558, 0.999999783867775, 0.824720803698331, 0.10305110839386, 0.160596787513604, 0.141119826647113, 0.52735342045046, 0.162403671879659, 0.713272963278132, 0.817225145266696, 0.0104446472288876, 0.546649197487473, 0.0330726857615005, 0.672936592800508, 0.0310135225001855, 0.182169849737794, 0.274905137508873, 0.873388429679101, 1, 0.999021554764331 ), gini_decrease = c( 0.233831386391386, 0.0886505361305361, 0.185330422910423, 0.358267377067377, 0.401108053058053, 0.397634655344655, 0.308835228105228, 0.389097318237318, 0.250707615717616, 0.191033563103563, 0.476535763125763, 0.249038827838828, 0.47133199023199, 0.243902473082473, 0.372547632367632, 0.33646759018759, 0.382999447219447, 0.203790450660451, 0.0253906843156843, 0.133164814074814 ), accuracy_decrease = c( -0.00445119047619048, -0.00289380952380952, -0.00482809523809524, -0.00530904761904762, 0.0051652380952381, 0.00616785714285714, 0.00289238095238095, -0.00079095238095238, -0.00239095238095238, -0.00648809523809524, 0.00383690476190476, -0.00413857142857143, 0.00331214285714286, -0.00290619047619048, -0.00131714285714286, -0.0046781746031746, 0.00534214285714286, -0.00532571428571429, 0, -0.000374047619047619 )), class = "data.frame", .Names = c( "variable", "mean_min_depth", "times_a_root", "no_of_nodes", "no_of_trees", "p_value", "gini_decrease", "accuracy_decrease" ), row.names = c(NA, -20L), na.action = structure(c( 80L, 180L ), .Names = c("80", "180"), class = "omit")) importance_frame #> variable mean_min_depth times_a_root no_of_nodes no_of_trees p_value #> 1 A 1.976186 23.40 68.100 65.100 0.66911923 #> 2 C 2.522085 5.50 32.600 32.300 0.99999978 #> 3 D 2.155399 13.30 62.200 59.800 0.82472080 #> 4 E 1.619354 27.90 103.200 96.100 0.10305111 #> 5 F 1.451235 39.30 103.300 94.700 0.16059679 #> 6 G 1.532970 31.30 104.700 99.900 0.14111983 #> 7 H 1.775181 29.70 75.600 74.800 0.52735342 #> 8 I 1.521512 34.20 105.700 100.600 0.16240367 #> 9 K 1.891820 24.20 72.400 69.400 0.71327296 #> 10 L 2.144290 13.00 64.600 62.800 0.81722515 #> 11 M 1.263264 43.00 118.400 111.200 0.01044465 #> 12 N 1.935028 22.70 73.600 71.200 0.54664920 #> 13 P 1.268982 45.30 116.600 108.300 0.03307269 #> 14 Q 2.020605 16.80 74.500 72.400 0.67293659 #> 15 R 1.542175 31.50 104.600 98.800 0.03101352 #> 16 S 1.673847 30.10 95.600 90.000 0.18216985 #> 17 T 1.548586 33.50 103.200 97.600 0.27490514 #> 18 V 2.097272 19.30 60.300 58.400 0.87338843 #> 19 W 2.757470 1.75 8.875 8.875 1.00000000 #> 20 Y 2.358644 14.60 36.100 35.900 0.99902155 #> gini_decrease accuracy_decrease #> 1 0.23383139 -0.0044511905 #> 2 0.08865054 -0.0028938095 #> 3 0.18533042 -0.0048280952 #> 4 0.35826738 -0.0053090476 #> 5 0.40110805 0.0051652381 #> 6 0.39763466 0.0061678571 #> 7 0.30883523 0.0028923810 #> 8 0.38909732 -0.0007909524 #> 9 0.25070762 -0.0023909524 #> 10 0.19103356 -0.0064880952 #> 11 0.47653576 0.0038369048 #> 12 0.24903883 -0.0041385714 #> 13 0.47133199 0.0033121429 #> 14 0.24390247 -0.0029061905 #> 15 0.37254763 -0.0013171429 #> 16 0.33646759 -0.0046781746 #> 17 0.38299945 0.0053421429 #> 18 0.20379045 -0.0053257143 #> 19 0.02539068 0.0000000000 #> 20 0.13316481 -0.0003740476
And I tried to get the important variables with the following code:
library(randomForestExplainer) x_measure <- "gini_decrease" y_measure <- "accuracy_decrease" important_variables(importance_frame, k = 10, measures = c(x_measure, y_measure, size_measure) )
The error I get is this:
Error in `[.data.frame`(rankings, , measures) : undefined columns selected
How can I fix the issue?
Also what is the meaning of negative accuracy_decrease ?
accuracy_decrease
I have the following
importance_frame
:And I tried to get the important variables with the following code:
The error I get is this:
How can I fix the issue?
Also what is the meaning of negative
accuracy_decrease
?