Closed mattsigal closed 5 years ago
Hi Jacob,
Sorry for the delay, but I finally got to testing this and have come across an error using split. Here is a reproducible example using a portion of a dataset I'm working with:
test <- structure(list( Theory = structure(c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, NA, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 6L, 6L, 6L, 9L, 9L, NA, 1L, 1L, NA, NA, NA, NA, NA, 1L, 1L, 6L, NA, 1L, 1L, 1L, NA, NA, 1L, 1L, NA, 2L, NA, 1L, 1L, 4L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, NA, 1L, 1L, 1L, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, 1L, NA, NA, 2L, NA, NA, NA, NA, 9L, 9L, 1L, 1L, 1L, 6L, 6L, 1L, 1L, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, NA, NA, NA, NA, 1L, 1L, 1L, 8L, 1L, NA, 6L, 1L, 1L, 1L, NA, NA, NA, NA, NA, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, NA, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, NA, 1L, 8L, NA, 8L, 8L, NA, NA, NA, NA, 2L, 1L, 2L, 10L, 1L, 1L, 1L, 1L, 1L, NA, NA, NA, 6L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 6L, NA, NA, NA, NA, NA, NA, 1L, NA, 9L, NA, NA, NA, 1L, 1L, 1L, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, 1L, NA, 1L, NA, 1L, 1L, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, 1L, NA, 9L, 9L, 9L, 9L, 9L, 9L, 1L, 1L, 1L, 1L, 2L, NA, NA, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 6L, 6L, 6L, 6L, 6L, 7L, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, 1L, 1L, NA, 1L, 1L, 1L, 1L, 1L), .Label = c("Behaviourism", "Behaviourism, Cognitive", "Behaviourism, Gestalt", "Behaviourism, Psychodynamic", "Behaviourism, Psychodynamic, Cognitive", "Cognitive", "Functionalism", "Gestalt", "Psychodynamic", "Structuralism"), class = "factor"), Format = structure(c(1L, 1L, 24L, 1L, 1L, 1L, 1L, 2L, 1L, 10L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 12L, 1L, 1L, 2L, 1L, 1L, 19L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 6L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 12L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 11L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 15L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 13L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 7L, 12L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 8L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 1L, 7L, 1L, 1L, 15L, 1L, 5L, 25L, 5L, 24L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 15L, 1L, 1L, 1L, 1L, 20L, 1L, 18L, 12L, 1L, 1L, NA, 20L, 20L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 25L, 15L, 16L, 15L, 15L, 1L, 1L, 1L, 1L, 19L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 12L, 12L, 5L, 5L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 14L, 1L, 1L, 1L, 1L, 14L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 15L, 12L, NA, 15L, 1L, NA, NA, 1L, 1L, 6L, 1L, 1L, 1L, 1L, 14L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 12L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 22L, 1L, 21L, 23L, 5L, 1L, 1L, 1L, 1L, 10L, 1L, 1L, 1L, 1L, 5L, 17L, 1L, 17L, 6L, 1L, 1L, 9L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 12L, 1L, 18L, 1L, 21L, 18L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 12L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 24L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 6L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L), .Label = c("16mm", "16mm, 35mm", "16mm, 35mm, VHS", "16mm, AVI", "16mm, Digital", "16mm, DVD", "16mm, DVD, Betacam SP", "16mm, DVD, Digital, Betacam SP", "16mm, DVD, Mini-DV", "16mm, MP4", "16mm, MPG", "16mm, VHS", "16mm, VHS, AVI", "16mm, VHS, Digital", "16mm, VHS, DVD", "16mm, VHS, DVD, Digital, AVI", "35mm", "8mm", "8mm, 16mm", "DVD", "DVD, AVI", "Mini-DV", "MPG", "VHS", "VHS, DVD, Digital"), class = "factor")), .Names = c("Theory", "Format"), row.names = c(NA, -427L), class = c("tbl_df", "tbl", "data.frame")) ### library(fastDummies) levels(test$Theory) # Shows 10 combinations used of 6 different theories dummy_cols(test, select_columns = "Theory", split = ", ") # Error in strsplit(.data[[col_name]], split) : non-character argument dummy_cols(test, select_columns = "Theory", split = ",") # Same error
If I leave out the split argument, I get all of the new columns as I would expect:
new <- dummy_cols(test, select_columns = "Theory") names(new)
Any ideas what is causing the strsplit() error? I tested this with the version of fastDummies on CRAN (v1.4.0).
strsplit()
Thanks, I'll take a look.
It seems like the issue was because the column was factor type. Can you try the dev version?
Hi Jacob,
Sorry for the delay, but I finally got to testing this and have come across an error using split. Here is a reproducible example using a portion of a dataset I'm working with:
If I leave out the split argument, I get all of the new columns as I would expect:
Any ideas what is causing the
strsplit()
error? I tested this with the version of fastDummies on CRAN (v1.4.0).