thomasp85 / lime

Local Interpretable Model-Agnostic Explanations (R port of original Python package)
https://lime.data-imaginist.com/
Other
480 stars 109 forks source link

error in text_classification_explanation #170

Open IrinaMax opened 4 years ago

IrinaMax commented 4 years ago

I have issue with model explamation lime() try demo in lime/demo/text_classification_explanation.R it game me error:

`results <- lime(sentences_to_explain, bst, get.features.matrix, keep_word_position = false)(cases = sentences_to_explain, n_labels = 1, n_features = 5)

Error in eval(lhs, parent, parent) : attempt to apply non-function ` Can you please suggest? Thanks.

mkrasmus commented 3 years ago

Just bumping this along with reprex() and sessionInfo() output.

library(lime)
library(stringi)
library(text2vec)
library(data.table)
library(magrittr)
library(purrr)
#> 
#> Attaching package: 'purrr'
#> The following object is masked from 'package:magrittr':
#> 
#>     set_names
#> The following object is masked from 'package:data.table':
#> 
#>     transpose
library(xgboost)

set.seed(2000)

# Data loading
data("train_sentences")
data("test_sentences")
data("stop_words_sentences")

setDT(train_sentences)
setDT(test_sentences)

label_to_explain <- "OWNX"

# label train set and test set
train_sentences[, label := class.text == label_to_explain]
test_sentences[, label := class.text == label_to_explain]

get.iterator <- function(data) itoken(data, preprocess_function = tolower, tokenizer = word_tokenizer, progressbar = F)

# Extract vocabulary
v <- create_vocabulary(get.iterator(train_sentences$text), stopwords = stop_words_sentences)

# Function to transform text in matrix
get.matrix <- function(data) {
  i <- get.iterator(data)
  create_dtm(i, vocab_vectorizer(v))
}

lsa.full.text <- LSA$new(n_topics = 100)
tfidf <- TfIdf$new()
invisible(get.matrix(train_sentences$text) %>% tfidf$fit_transform())
invisible(get.matrix(train_sentences$text) %>% transform(tfidf) %>% lsa.full.text$fit_transform())
#> INFO  [10:49:54.450] soft_als: iter 001, frobenious norm change 8.557 loss NA  
#> INFO  [10:49:55.028] soft_als: iter 002, frobenious norm change 0.531 loss NA  
#> INFO  [10:49:55.637] soft_als: iter 003, frobenious norm change 0.114 loss NA  
#> INFO  [10:49:56.123] soft_als: iter 004, frobenious norm change 0.040 loss NA  
#> INFO  [10:49:56.539] soft_als: iter 005, frobenious norm change 0.019 loss NA  
#> INFO  [10:49:56.969] soft_als: iter 006, frobenious norm change 0.010 loss NA  
#> INFO  [10:49:57.396] soft_als: iter 007, frobenious norm change 0.006 loss NA  
#> INFO  [10:49:57.817] soft_als: iter 008, frobenious norm change 0.004 loss NA  
#> INFO  [10:49:58.239] soft_als: iter 009, frobenious norm change 0.003 loss NA  
#> INFO  [10:49:58.680] soft_als: iter 010, frobenious norm change 0.002 loss NA  
#> INFO  [10:49:59.243] soft_als: iter 011, frobenious norm change 0.001 loss NA  
#> INFO  [10:49:59.687] soft_als: iter 012, frobenious norm change 0.001 loss NA  
#> INFO  [10:50:00.140] soft_als: iter 013, frobenious norm change 0.001 loss NA  
#> INFO  [10:50:00.141] soft_impute: converged with tol 0.001000 after 13 iter

add.lsa <- function(m, lsa) {
  l <- transform(m, lsa)
  colnames(l) <- ncol(l) %>% seq() %>% paste0("lsa.", .)
  cbind2(m, l)
}

dtrain <- get.matrix(train_sentences$text) %>% transform(tfidf) %>% add.lsa(lsa.full.text) %>% xgb.DMatrix(label = train_sentences$label)
dtest <-  get.matrix(test_sentences$text) %>% transform(tfidf) %>% add.lsa(lsa.full.text) %>% xgb.DMatrix(label = test_sentences$label)

watchlist <- list(train = dtrain, eval = dtest)
param <- list(max_depth = 7, eta = 0.1, objective = "binary:logistic", eval_metric = "error", nthread = 1)
bst <- xgb.train(param, dtrain, nrounds = 500, watchlist, early_stopping_rounds = 100)
#> [1]  train-error:0.126738    eval-error:0.178333 
#> Multiple eval metrics are present. Will use eval_error for early stopping.
#> Will train until eval_error hasn't improved in 100 rounds.
#> 
#> [2]  train-error:0.114025    eval-error:0.170000 
#> [3]  train-error:0.116011    eval-error:0.163333 
#> [4]  train-error:0.106079    eval-error:0.153333 
#> [5]  train-error:0.105284    eval-error:0.160000 
#> [6]  train-error:0.100914    eval-error:0.163333 
#> [7]  train-error:0.093762    eval-error:0.155000 
#> [8]  train-error:0.087406    eval-error:0.153333 
#> [9]  train-error:0.080254    eval-error:0.141667 
#> [10] train-error:0.079857    eval-error:0.143333 
#> [11] train-error:0.074295    eval-error:0.135000 
#> [12] train-error:0.064760    eval-error:0.130000 
#> [13] train-error:0.067143    eval-error:0.133333 
#> [14] train-error:0.061184    eval-error:0.128333 
#> [15] train-error:0.059992    eval-error:0.120000 
#> [16] train-error:0.058800    eval-error:0.121667 
#> [17] train-error:0.055622    eval-error:0.113333 
#> [18] train-error:0.053238    eval-error:0.101667 
#> [19] train-error:0.053238    eval-error:0.101667 
#> [20] train-error:0.052046    eval-error:0.100000 
#> [21] train-error:0.050854    eval-error:0.100000 
#> [22] train-error:0.049265    eval-error:0.096667 
#> [23] train-error:0.048073    eval-error:0.098333 
#> [24] train-error:0.048073    eval-error:0.096667 
#> [25] train-error:0.047676    eval-error:0.093333 
#> [26] train-error:0.048073    eval-error:0.095000 
#> [27] train-error:0.047676    eval-error:0.095000 
#> [28] train-error:0.046484    eval-error:0.088333 
#> [29] train-error:0.046484    eval-error:0.088333 
#> [30] train-error:0.046087    eval-error:0.088333 
#> [31] train-error:0.046087    eval-error:0.086667 
#> [32] train-error:0.046087    eval-error:0.086667 
#> [33] train-error:0.045689    eval-error:0.091667 
#> [34] train-error:0.045689    eval-error:0.091667 
#> [35] train-error:0.045292    eval-error:0.091667 
#> [36] train-error:0.044895    eval-error:0.095000 
#> [37] train-error:0.044895    eval-error:0.098333 
#> [38] train-error:0.044497    eval-error:0.093333 
#> [39] train-error:0.044497    eval-error:0.093333 
#> [40] train-error:0.044497    eval-error:0.093333 
#> [41] train-error:0.044497    eval-error:0.095000 
#> [42] train-error:0.043306    eval-error:0.093333 
#> [43] train-error:0.042511    eval-error:0.093333 
#> [44] train-error:0.041716    eval-error:0.093333 
#> [45] train-error:0.040922    eval-error:0.090000 
#> [46] train-error:0.041319    eval-error:0.090000 
#> [47] train-error:0.040524    eval-error:0.086667 
#> [48] train-error:0.040524    eval-error:0.086667 
#> [49] train-error:0.040524    eval-error:0.086667 
#> [50] train-error:0.040524    eval-error:0.086667 
#> [51] train-error:0.040524    eval-error:0.086667 
#> [52] train-error:0.040524    eval-error:0.086667 
#> [53] train-error:0.040524    eval-error:0.085000 
#> [54] train-error:0.040524    eval-error:0.086667 
#> [55] train-error:0.040524    eval-error:0.085000 
#> [56] train-error:0.040524    eval-error:0.085000 
#> [57] train-error:0.040524    eval-error:0.085000 
#> [58] train-error:0.040524    eval-error:0.085000 
#> [59] train-error:0.040524    eval-error:0.083333 
#> [60] train-error:0.040524    eval-error:0.083333 
#> [61] train-error:0.040524    eval-error:0.083333 
#> [62] train-error:0.040524    eval-error:0.086667 
#> [63] train-error:0.040524    eval-error:0.085000 
#> [64] train-error:0.040524    eval-error:0.088333 
#> [65] train-error:0.040127    eval-error:0.085000 
#> [66] train-error:0.040127    eval-error:0.088333 
#> [67] train-error:0.040127    eval-error:0.085000 
#> [68] train-error:0.040127    eval-error:0.085000 
#> [69] train-error:0.040127    eval-error:0.083333 
#> [70] train-error:0.040127    eval-error:0.083333 
#> [71] train-error:0.040127    eval-error:0.085000 
#> [72] train-error:0.040127    eval-error:0.085000 
#> [73] train-error:0.040127    eval-error:0.085000 
#> [74] train-error:0.040127    eval-error:0.085000 
#> [75] train-error:0.040127    eval-error:0.085000 
#> [76] train-error:0.040127    eval-error:0.083333 
#> [77] train-error:0.040127    eval-error:0.081667 
#> [78] train-error:0.040127    eval-error:0.081667 
#> [79] train-error:0.040127    eval-error:0.083333 
#> [80] train-error:0.039730    eval-error:0.085000 
#> [81] train-error:0.039730    eval-error:0.088333 
#> [82] train-error:0.039730    eval-error:0.086667 
#> [83] train-error:0.039730    eval-error:0.086667 
#> [84] train-error:0.039730    eval-error:0.086667 
#> [85] train-error:0.039730    eval-error:0.086667 
#> [86] train-error:0.039730    eval-error:0.088333 
#> [87] train-error:0.039730    eval-error:0.086667 
#> [88] train-error:0.039730    eval-error:0.086667 
#> [89] train-error:0.039730    eval-error:0.088333 
#> [90] train-error:0.039730    eval-error:0.088333 
#> [91] train-error:0.039730    eval-error:0.090000 
#> [92] train-error:0.039730    eval-error:0.091667 
#> [93] train-error:0.039730    eval-error:0.088333 
#> [94] train-error:0.039730    eval-error:0.085000 
#> [95] train-error:0.039730    eval-error:0.088333 
#> [96] train-error:0.039730    eval-error:0.086667 
#> [97] train-error:0.039730    eval-error:0.086667 
#> [98] train-error:0.039730    eval-error:0.086667 
#> [99] train-error:0.039730    eval-error:0.088333 
#> [100]    train-error:0.039730    eval-error:0.088333 
#> [101]    train-error:0.039730    eval-error:0.091667 
#> [102]    train-error:0.039730    eval-error:0.091667 
#> [103]    train-error:0.039730    eval-error:0.090000 
#> [104]    train-error:0.039730    eval-error:0.090000 
#> [105]    train-error:0.039730    eval-error:0.090000 
#> [106]    train-error:0.039730    eval-error:0.090000 
#> [107]    train-error:0.039730    eval-error:0.091667 
#> [108]    train-error:0.039730    eval-error:0.090000 
#> [109]    train-error:0.039730    eval-error:0.086667 
#> [110]    train-error:0.039730    eval-error:0.086667 
#> [111]    train-error:0.039730    eval-error:0.083333 
#> [112]    train-error:0.039730    eval-error:0.085000 
#> [113]    train-error:0.039730    eval-error:0.090000 
#> [114]    train-error:0.039730    eval-error:0.090000 
#> [115]    train-error:0.039730    eval-error:0.088333 
#> [116]    train-error:0.039730    eval-error:0.088333 
#> [117]    train-error:0.039730    eval-error:0.086667 
#> [118]    train-error:0.039730    eval-error:0.088333 
#> [119]    train-error:0.039730    eval-error:0.090000 
#> [120]    train-error:0.039730    eval-error:0.091667 
#> [121]    train-error:0.039730    eval-error:0.093333 
#> [122]    train-error:0.039730    eval-error:0.095000 
#> [123]    train-error:0.039730    eval-error:0.093333 
#> [124]    train-error:0.039730    eval-error:0.091667 
#> [125]    train-error:0.039730    eval-error:0.088333 
#> [126]    train-error:0.039730    eval-error:0.090000 
#> [127]    train-error:0.039730    eval-error:0.090000 
#> [128]    train-error:0.039730    eval-error:0.090000 
#> [129]    train-error:0.039730    eval-error:0.093333 
#> [130]    train-error:0.039730    eval-error:0.093333 
#> [131]    train-error:0.039730    eval-error:0.095000 
#> [132]    train-error:0.039730    eval-error:0.091667 
#> [133]    train-error:0.039730    eval-error:0.091667 
#> [134]    train-error:0.039730    eval-error:0.091667 
#> [135]    train-error:0.039730    eval-error:0.093333 
#> [136]    train-error:0.039730    eval-error:0.093333 
#> [137]    train-error:0.039730    eval-error:0.091667 
#> [138]    train-error:0.039730    eval-error:0.093333 
#> [139]    train-error:0.039730    eval-error:0.093333 
#> [140]    train-error:0.039730    eval-error:0.093333 
#> [141]    train-error:0.039730    eval-error:0.093333 
#> [142]    train-error:0.039730    eval-error:0.093333 
#> [143]    train-error:0.039730    eval-error:0.095000 
#> [144]    train-error:0.039730    eval-error:0.095000 
#> [145]    train-error:0.039730    eval-error:0.093333 
#> [146]    train-error:0.039730    eval-error:0.091667 
#> [147]    train-error:0.039730    eval-error:0.091667 
#> [148]    train-error:0.039730    eval-error:0.093333 
#> [149]    train-error:0.039730    eval-error:0.090000 
#> [150]    train-error:0.039730    eval-error:0.090000 
#> [151]    train-error:0.039730    eval-error:0.095000 
#> [152]    train-error:0.039730    eval-error:0.095000 
#> [153]    train-error:0.039730    eval-error:0.095000 
#> [154]    train-error:0.039730    eval-error:0.095000 
#> [155]    train-error:0.039730    eval-error:0.096667 
#> [156]    train-error:0.039730    eval-error:0.096667 
#> [157]    train-error:0.039730    eval-error:0.095000 
#> [158]    train-error:0.039730    eval-error:0.095000 
#> [159]    train-error:0.039730    eval-error:0.090000 
#> [160]    train-error:0.039730    eval-error:0.088333 
#> [161]    train-error:0.039730    eval-error:0.088333 
#> [162]    train-error:0.039730    eval-error:0.090000 
#> [163]    train-error:0.039730    eval-error:0.090000 
#> [164]    train-error:0.039730    eval-error:0.091667 
#> [165]    train-error:0.039730    eval-error:0.090000 
#> [166]    train-error:0.039730    eval-error:0.090000 
#> [167]    train-error:0.039730    eval-error:0.091667 
#> [168]    train-error:0.039730    eval-error:0.090000 
#> [169]    train-error:0.039730    eval-error:0.093333 
#> [170]    train-error:0.039730    eval-error:0.090000 
#> [171]    train-error:0.039730    eval-error:0.088333 
#> [172]    train-error:0.039730    eval-error:0.086667 
#> [173]    train-error:0.039730    eval-error:0.088333 
#> [174]    train-error:0.039730    eval-error:0.088333 
#> [175]    train-error:0.039730    eval-error:0.086667 
#> [176]    train-error:0.039730    eval-error:0.085000 
#> [177]    train-error:0.039730    eval-error:0.085000 
#> Stopping. Best iteration:
#> [77] train-error:0.040127    eval-error:0.081667

test_sentences[,prediction := predict(bst, dtest, type = "prob") > 0.5]
test_sentences[label == T, sum(label != prediction)]
#> [1] 32
test_sentences[label == T, sum(label == prediction)]
#> [1] 123
test_sentences[, sum(label == prediction)/length(label)]
#> [1] 0.9183333
test_sentences[, mean(label)]
#> [1] 0.2583333

get.features.matrix <- . %>%
  get.matrix() %>%
  transform(tfidf) %>%
  add.lsa(lsa.full.text) %>%
  xgb.DMatrix()

sentences_to_explain <- test_sentences[label == T][1:10, text]

system.time(results <- lime(sentences_to_explain, bst, get.features.matrix, keep_word_position = FALSE)(cases = sentences_to_explain, n_labels = 1, n_features = 5) %T>%
  print)
#> Error in eval(lhs, parent, parent): attempt to apply non-function
#> Timing stopped at: 0 0 0

system.time(lime(sentences_to_explain, bst, get.features.matrix, keep_word_position = FALSE)(cases = sentences_to_explain, n_labels = 1, n_features = 4, feature_select = "tree"))
#> Error in system.time(lime(sentences_to_explain, bst, get.features.matrix, : attempt to apply non-function
#> Timing stopped at: 0 0 0

plot_text_explanations(results) %>% print()
#> Error in is.data.frame(explanations): object 'results' not found

long_document <- test_sentences[label == T][5, text] %>% rep(50) %>% paste(collapse = " ")
system.time(lime(long_document, bst, get.features.matrix, keep_word_position = FALSE, feature_select = "highest_weights")(cases = long_document, n_labels = 1, n_features = 5) %T>%
              print)
#> Error in eval(lhs, parent, parent): attempt to apply non-function
#> Timing stopped at: 0 0 0
system.time(lime(long_document, bst, get.features.matrix, keep_word_position = FALSE, feature_select = "tree")(cases = long_document, n_labels = 1, n_features = 5) %T>%
              print)
#> Error in eval(lhs, parent, parent): attempt to apply non-function
#> Timing stopped at: 0 0 0
system.time(lime(long_document, bst, get.features.matrix, keep_word_position = TRUE, feature_select = "tree")(cases = long_document, n_labels = 1, n_features = 5) %T>%
              print)
#> Error in eval(lhs, parent, parent): attempt to apply non-function
#> Timing stopped at: 0 0 0
system.time(lime(long_document, bst, get.features.matrix, keep_word_position = TRUE, feature_select = "highest_weights")(cases = long_document, n_labels = 1, n_features = 5) %T>%
              print)
#> Error in eval(lhs, parent, parent): attempt to apply non-function
#> Timing stopped at: 0 0 0
Created on 2020-08-19 by the reprex package (v0.3.0)
> sessionInfo()
R version 4.0.2 (2020-06-22)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 18362)

Matrix products: default

locale:
[1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] reprex_0.3.0

loaded via a namespace (and not attached):
 [1] ps_1.3.4        crayon_1.3.4    digest_0.6.25   R6_2.4.1        lifecycle_0.2.0 magrittr_1.5    evaluate_0.14  
 [8] pillar_1.4.6    rlang_0.4.7     rstudioapi_0.11 fs_1.5.0        callr_3.4.3     whisker_0.4     vctrs_0.3.2    
[15] ellipsis_0.3.1  rmarkdown_2.3   tools_4.0.2     processx_3.4.3  xfun_0.16       yaml_2.2.1      compiler_4.0.2 
[22] pkgconfig_2.0.3 clipr_0.7.0     htmltools_0.5.0 knitr_1.29      tibble_3.0.3