Open IrinaMax opened 4 years ago
Just bumping this along with reprex() and sessionInfo() output.
library(lime)
library(stringi)
library(text2vec)
library(data.table)
library(magrittr)
library(purrr)
#>
#> Attaching package: 'purrr'
#> The following object is masked from 'package:magrittr':
#>
#> set_names
#> The following object is masked from 'package:data.table':
#>
#> transpose
library(xgboost)
set.seed(2000)
# Data loading
data("train_sentences")
data("test_sentences")
data("stop_words_sentences")
setDT(train_sentences)
setDT(test_sentences)
label_to_explain <- "OWNX"
# label train set and test set
train_sentences[, label := class.text == label_to_explain]
test_sentences[, label := class.text == label_to_explain]
get.iterator <- function(data) itoken(data, preprocess_function = tolower, tokenizer = word_tokenizer, progressbar = F)
# Extract vocabulary
v <- create_vocabulary(get.iterator(train_sentences$text), stopwords = stop_words_sentences)
# Function to transform text in matrix
get.matrix <- function(data) {
i <- get.iterator(data)
create_dtm(i, vocab_vectorizer(v))
}
lsa.full.text <- LSA$new(n_topics = 100)
tfidf <- TfIdf$new()
invisible(get.matrix(train_sentences$text) %>% tfidf$fit_transform())
invisible(get.matrix(train_sentences$text) %>% transform(tfidf) %>% lsa.full.text$fit_transform())
#> INFO [10:49:54.450] soft_als: iter 001, frobenious norm change 8.557 loss NA
#> INFO [10:49:55.028] soft_als: iter 002, frobenious norm change 0.531 loss NA
#> INFO [10:49:55.637] soft_als: iter 003, frobenious norm change 0.114 loss NA
#> INFO [10:49:56.123] soft_als: iter 004, frobenious norm change 0.040 loss NA
#> INFO [10:49:56.539] soft_als: iter 005, frobenious norm change 0.019 loss NA
#> INFO [10:49:56.969] soft_als: iter 006, frobenious norm change 0.010 loss NA
#> INFO [10:49:57.396] soft_als: iter 007, frobenious norm change 0.006 loss NA
#> INFO [10:49:57.817] soft_als: iter 008, frobenious norm change 0.004 loss NA
#> INFO [10:49:58.239] soft_als: iter 009, frobenious norm change 0.003 loss NA
#> INFO [10:49:58.680] soft_als: iter 010, frobenious norm change 0.002 loss NA
#> INFO [10:49:59.243] soft_als: iter 011, frobenious norm change 0.001 loss NA
#> INFO [10:49:59.687] soft_als: iter 012, frobenious norm change 0.001 loss NA
#> INFO [10:50:00.140] soft_als: iter 013, frobenious norm change 0.001 loss NA
#> INFO [10:50:00.141] soft_impute: converged with tol 0.001000 after 13 iter
add.lsa <- function(m, lsa) {
l <- transform(m, lsa)
colnames(l) <- ncol(l) %>% seq() %>% paste0("lsa.", .)
cbind2(m, l)
}
dtrain <- get.matrix(train_sentences$text) %>% transform(tfidf) %>% add.lsa(lsa.full.text) %>% xgb.DMatrix(label = train_sentences$label)
dtest <- get.matrix(test_sentences$text) %>% transform(tfidf) %>% add.lsa(lsa.full.text) %>% xgb.DMatrix(label = test_sentences$label)
watchlist <- list(train = dtrain, eval = dtest)
param <- list(max_depth = 7, eta = 0.1, objective = "binary:logistic", eval_metric = "error", nthread = 1)
bst <- xgb.train(param, dtrain, nrounds = 500, watchlist, early_stopping_rounds = 100)
#> [1] train-error:0.126738 eval-error:0.178333
#> Multiple eval metrics are present. Will use eval_error for early stopping.
#> Will train until eval_error hasn't improved in 100 rounds.
#>
#> [2] train-error:0.114025 eval-error:0.170000
#> [3] train-error:0.116011 eval-error:0.163333
#> [4] train-error:0.106079 eval-error:0.153333
#> [5] train-error:0.105284 eval-error:0.160000
#> [6] train-error:0.100914 eval-error:0.163333
#> [7] train-error:0.093762 eval-error:0.155000
#> [8] train-error:0.087406 eval-error:0.153333
#> [9] train-error:0.080254 eval-error:0.141667
#> [10] train-error:0.079857 eval-error:0.143333
#> [11] train-error:0.074295 eval-error:0.135000
#> [12] train-error:0.064760 eval-error:0.130000
#> [13] train-error:0.067143 eval-error:0.133333
#> [14] train-error:0.061184 eval-error:0.128333
#> [15] train-error:0.059992 eval-error:0.120000
#> [16] train-error:0.058800 eval-error:0.121667
#> [17] train-error:0.055622 eval-error:0.113333
#> [18] train-error:0.053238 eval-error:0.101667
#> [19] train-error:0.053238 eval-error:0.101667
#> [20] train-error:0.052046 eval-error:0.100000
#> [21] train-error:0.050854 eval-error:0.100000
#> [22] train-error:0.049265 eval-error:0.096667
#> [23] train-error:0.048073 eval-error:0.098333
#> [24] train-error:0.048073 eval-error:0.096667
#> [25] train-error:0.047676 eval-error:0.093333
#> [26] train-error:0.048073 eval-error:0.095000
#> [27] train-error:0.047676 eval-error:0.095000
#> [28] train-error:0.046484 eval-error:0.088333
#> [29] train-error:0.046484 eval-error:0.088333
#> [30] train-error:0.046087 eval-error:0.088333
#> [31] train-error:0.046087 eval-error:0.086667
#> [32] train-error:0.046087 eval-error:0.086667
#> [33] train-error:0.045689 eval-error:0.091667
#> [34] train-error:0.045689 eval-error:0.091667
#> [35] train-error:0.045292 eval-error:0.091667
#> [36] train-error:0.044895 eval-error:0.095000
#> [37] train-error:0.044895 eval-error:0.098333
#> [38] train-error:0.044497 eval-error:0.093333
#> [39] train-error:0.044497 eval-error:0.093333
#> [40] train-error:0.044497 eval-error:0.093333
#> [41] train-error:0.044497 eval-error:0.095000
#> [42] train-error:0.043306 eval-error:0.093333
#> [43] train-error:0.042511 eval-error:0.093333
#> [44] train-error:0.041716 eval-error:0.093333
#> [45] train-error:0.040922 eval-error:0.090000
#> [46] train-error:0.041319 eval-error:0.090000
#> [47] train-error:0.040524 eval-error:0.086667
#> [48] train-error:0.040524 eval-error:0.086667
#> [49] train-error:0.040524 eval-error:0.086667
#> [50] train-error:0.040524 eval-error:0.086667
#> [51] train-error:0.040524 eval-error:0.086667
#> [52] train-error:0.040524 eval-error:0.086667
#> [53] train-error:0.040524 eval-error:0.085000
#> [54] train-error:0.040524 eval-error:0.086667
#> [55] train-error:0.040524 eval-error:0.085000
#> [56] train-error:0.040524 eval-error:0.085000
#> [57] train-error:0.040524 eval-error:0.085000
#> [58] train-error:0.040524 eval-error:0.085000
#> [59] train-error:0.040524 eval-error:0.083333
#> [60] train-error:0.040524 eval-error:0.083333
#> [61] train-error:0.040524 eval-error:0.083333
#> [62] train-error:0.040524 eval-error:0.086667
#> [63] train-error:0.040524 eval-error:0.085000
#> [64] train-error:0.040524 eval-error:0.088333
#> [65] train-error:0.040127 eval-error:0.085000
#> [66] train-error:0.040127 eval-error:0.088333
#> [67] train-error:0.040127 eval-error:0.085000
#> [68] train-error:0.040127 eval-error:0.085000
#> [69] train-error:0.040127 eval-error:0.083333
#> [70] train-error:0.040127 eval-error:0.083333
#> [71] train-error:0.040127 eval-error:0.085000
#> [72] train-error:0.040127 eval-error:0.085000
#> [73] train-error:0.040127 eval-error:0.085000
#> [74] train-error:0.040127 eval-error:0.085000
#> [75] train-error:0.040127 eval-error:0.085000
#> [76] train-error:0.040127 eval-error:0.083333
#> [77] train-error:0.040127 eval-error:0.081667
#> [78] train-error:0.040127 eval-error:0.081667
#> [79] train-error:0.040127 eval-error:0.083333
#> [80] train-error:0.039730 eval-error:0.085000
#> [81] train-error:0.039730 eval-error:0.088333
#> [82] train-error:0.039730 eval-error:0.086667
#> [83] train-error:0.039730 eval-error:0.086667
#> [84] train-error:0.039730 eval-error:0.086667
#> [85] train-error:0.039730 eval-error:0.086667
#> [86] train-error:0.039730 eval-error:0.088333
#> [87] train-error:0.039730 eval-error:0.086667
#> [88] train-error:0.039730 eval-error:0.086667
#> [89] train-error:0.039730 eval-error:0.088333
#> [90] train-error:0.039730 eval-error:0.088333
#> [91] train-error:0.039730 eval-error:0.090000
#> [92] train-error:0.039730 eval-error:0.091667
#> [93] train-error:0.039730 eval-error:0.088333
#> [94] train-error:0.039730 eval-error:0.085000
#> [95] train-error:0.039730 eval-error:0.088333
#> [96] train-error:0.039730 eval-error:0.086667
#> [97] train-error:0.039730 eval-error:0.086667
#> [98] train-error:0.039730 eval-error:0.086667
#> [99] train-error:0.039730 eval-error:0.088333
#> [100] train-error:0.039730 eval-error:0.088333
#> [101] train-error:0.039730 eval-error:0.091667
#> [102] train-error:0.039730 eval-error:0.091667
#> [103] train-error:0.039730 eval-error:0.090000
#> [104] train-error:0.039730 eval-error:0.090000
#> [105] train-error:0.039730 eval-error:0.090000
#> [106] train-error:0.039730 eval-error:0.090000
#> [107] train-error:0.039730 eval-error:0.091667
#> [108] train-error:0.039730 eval-error:0.090000
#> [109] train-error:0.039730 eval-error:0.086667
#> [110] train-error:0.039730 eval-error:0.086667
#> [111] train-error:0.039730 eval-error:0.083333
#> [112] train-error:0.039730 eval-error:0.085000
#> [113] train-error:0.039730 eval-error:0.090000
#> [114] train-error:0.039730 eval-error:0.090000
#> [115] train-error:0.039730 eval-error:0.088333
#> [116] train-error:0.039730 eval-error:0.088333
#> [117] train-error:0.039730 eval-error:0.086667
#> [118] train-error:0.039730 eval-error:0.088333
#> [119] train-error:0.039730 eval-error:0.090000
#> [120] train-error:0.039730 eval-error:0.091667
#> [121] train-error:0.039730 eval-error:0.093333
#> [122] train-error:0.039730 eval-error:0.095000
#> [123] train-error:0.039730 eval-error:0.093333
#> [124] train-error:0.039730 eval-error:0.091667
#> [125] train-error:0.039730 eval-error:0.088333
#> [126] train-error:0.039730 eval-error:0.090000
#> [127] train-error:0.039730 eval-error:0.090000
#> [128] train-error:0.039730 eval-error:0.090000
#> [129] train-error:0.039730 eval-error:0.093333
#> [130] train-error:0.039730 eval-error:0.093333
#> [131] train-error:0.039730 eval-error:0.095000
#> [132] train-error:0.039730 eval-error:0.091667
#> [133] train-error:0.039730 eval-error:0.091667
#> [134] train-error:0.039730 eval-error:0.091667
#> [135] train-error:0.039730 eval-error:0.093333
#> [136] train-error:0.039730 eval-error:0.093333
#> [137] train-error:0.039730 eval-error:0.091667
#> [138] train-error:0.039730 eval-error:0.093333
#> [139] train-error:0.039730 eval-error:0.093333
#> [140] train-error:0.039730 eval-error:0.093333
#> [141] train-error:0.039730 eval-error:0.093333
#> [142] train-error:0.039730 eval-error:0.093333
#> [143] train-error:0.039730 eval-error:0.095000
#> [144] train-error:0.039730 eval-error:0.095000
#> [145] train-error:0.039730 eval-error:0.093333
#> [146] train-error:0.039730 eval-error:0.091667
#> [147] train-error:0.039730 eval-error:0.091667
#> [148] train-error:0.039730 eval-error:0.093333
#> [149] train-error:0.039730 eval-error:0.090000
#> [150] train-error:0.039730 eval-error:0.090000
#> [151] train-error:0.039730 eval-error:0.095000
#> [152] train-error:0.039730 eval-error:0.095000
#> [153] train-error:0.039730 eval-error:0.095000
#> [154] train-error:0.039730 eval-error:0.095000
#> [155] train-error:0.039730 eval-error:0.096667
#> [156] train-error:0.039730 eval-error:0.096667
#> [157] train-error:0.039730 eval-error:0.095000
#> [158] train-error:0.039730 eval-error:0.095000
#> [159] train-error:0.039730 eval-error:0.090000
#> [160] train-error:0.039730 eval-error:0.088333
#> [161] train-error:0.039730 eval-error:0.088333
#> [162] train-error:0.039730 eval-error:0.090000
#> [163] train-error:0.039730 eval-error:0.090000
#> [164] train-error:0.039730 eval-error:0.091667
#> [165] train-error:0.039730 eval-error:0.090000
#> [166] train-error:0.039730 eval-error:0.090000
#> [167] train-error:0.039730 eval-error:0.091667
#> [168] train-error:0.039730 eval-error:0.090000
#> [169] train-error:0.039730 eval-error:0.093333
#> [170] train-error:0.039730 eval-error:0.090000
#> [171] train-error:0.039730 eval-error:0.088333
#> [172] train-error:0.039730 eval-error:0.086667
#> [173] train-error:0.039730 eval-error:0.088333
#> [174] train-error:0.039730 eval-error:0.088333
#> [175] train-error:0.039730 eval-error:0.086667
#> [176] train-error:0.039730 eval-error:0.085000
#> [177] train-error:0.039730 eval-error:0.085000
#> Stopping. Best iteration:
#> [77] train-error:0.040127 eval-error:0.081667
test_sentences[,prediction := predict(bst, dtest, type = "prob") > 0.5]
test_sentences[label == T, sum(label != prediction)]
#> [1] 32
test_sentences[label == T, sum(label == prediction)]
#> [1] 123
test_sentences[, sum(label == prediction)/length(label)]
#> [1] 0.9183333
test_sentences[, mean(label)]
#> [1] 0.2583333
get.features.matrix <- . %>%
get.matrix() %>%
transform(tfidf) %>%
add.lsa(lsa.full.text) %>%
xgb.DMatrix()
sentences_to_explain <- test_sentences[label == T][1:10, text]
system.time(results <- lime(sentences_to_explain, bst, get.features.matrix, keep_word_position = FALSE)(cases = sentences_to_explain, n_labels = 1, n_features = 5) %T>%
print)
#> Error in eval(lhs, parent, parent): attempt to apply non-function
#> Timing stopped at: 0 0 0
system.time(lime(sentences_to_explain, bst, get.features.matrix, keep_word_position = FALSE)(cases = sentences_to_explain, n_labels = 1, n_features = 4, feature_select = "tree"))
#> Error in system.time(lime(sentences_to_explain, bst, get.features.matrix, : attempt to apply non-function
#> Timing stopped at: 0 0 0
plot_text_explanations(results) %>% print()
#> Error in is.data.frame(explanations): object 'results' not found
long_document <- test_sentences[label == T][5, text] %>% rep(50) %>% paste(collapse = " ")
system.time(lime(long_document, bst, get.features.matrix, keep_word_position = FALSE, feature_select = "highest_weights")(cases = long_document, n_labels = 1, n_features = 5) %T>%
print)
#> Error in eval(lhs, parent, parent): attempt to apply non-function
#> Timing stopped at: 0 0 0
system.time(lime(long_document, bst, get.features.matrix, keep_word_position = FALSE, feature_select = "tree")(cases = long_document, n_labels = 1, n_features = 5) %T>%
print)
#> Error in eval(lhs, parent, parent): attempt to apply non-function
#> Timing stopped at: 0 0 0
system.time(lime(long_document, bst, get.features.matrix, keep_word_position = TRUE, feature_select = "tree")(cases = long_document, n_labels = 1, n_features = 5) %T>%
print)
#> Error in eval(lhs, parent, parent): attempt to apply non-function
#> Timing stopped at: 0 0 0
system.time(lime(long_document, bst, get.features.matrix, keep_word_position = TRUE, feature_select = "highest_weights")(cases = long_document, n_labels = 1, n_features = 5) %T>%
print)
#> Error in eval(lhs, parent, parent): attempt to apply non-function
#> Timing stopped at: 0 0 0
Created on 2020-08-19 by the reprex package (v0.3.0)
> sessionInfo()
R version 4.0.2 (2020-06-22)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 18362)
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] reprex_0.3.0
loaded via a namespace (and not attached):
[1] ps_1.3.4 crayon_1.3.4 digest_0.6.25 R6_2.4.1 lifecycle_0.2.0 magrittr_1.5 evaluate_0.14
[8] pillar_1.4.6 rlang_0.4.7 rstudioapi_0.11 fs_1.5.0 callr_3.4.3 whisker_0.4 vctrs_0.3.2
[15] ellipsis_0.3.1 rmarkdown_2.3 tools_4.0.2 processx_3.4.3 xfun_0.16 yaml_2.2.1 compiler_4.0.2
[22] pkgconfig_2.0.3 clipr_0.7.0 htmltools_0.5.0 knitr_1.29 tibble_3.0.3
I have issue with model explamation lime() try demo in lime/demo/text_classification_explanation.R it game me error:
`results <- lime(sentences_to_explain, bst, get.features.matrix, keep_word_position = false)(cases = sentences_to_explain, n_labels = 1, n_features = 5)
Error in eval(lhs, parent, parent) : attempt to apply non-function ` Can you please suggest? Thanks.