# pull code other than as.data.frame()
test1 <- synTableQuery(sprintf("SELECT * FROM %s", paste0("syn21446700", ".", synGet("syn21446700")$properties$versionNumber - 1)))
View(as.data.frame(test1))
View(test1$asDataFrame())
# look at where file was temporarily saved
test1$filepath
# schema is NULL
test1$schema
# hybrid_death_int is recorded as an integer
test1$headers
# CsvFileTable is correct
class(test1)
# with read.csv it's read in correctly
test2 <- read.csv("c:/users/laveryj/.synapsecache/284/129372284/synapse_table_query_129372284.csv")
# with pandas read csv it's also read in correctly
library(reticulate)
pd <- reticulate::import("pandas")
test3 <- pd$read_csv("c:/users/laveryj/.synapsecache/284/129372284/synapse_table_query_129372284.csv")
# this is the problem - for some reason there is an issue converting to df
test1$asDataFrame() %>% select(contains("hybrid_death")) %>% view()
# no issues - numeric correctly NA
test4 <- read.csv("c:/users/laveryj/.synapsecache/284/129372284/synapse_table_query_129372284.csv",
encoding = "UTF-8",
stringsAsFactors = FALSE,
check.names = FALSE,
na.strings = c(""),
colClasses = "character")
test4 %>% select(contains("hybrid_death")) %>% view()
# these have correct NA
as.integer(test4$hybrid_death_int)
as.numeric(test4$hybrid_death_int)
# reproduces the error
test5 <- synapser::as.data.frame(test1)
test5 %>% select(contains("hybrid_death")) %>% view()
# this doesn't replicate the error, NAs are correct
# sourced functions from: https://github.com/Sage-Bionetworks/synapser/blob/8b2d310ca11c56d2514fb8d719edeb2a1cecc475/R/table.R#L120-L131
test6 <- .readCsvBasedOnSchema(test1)
test6 %>% select(contains("hybrid_death")) %>% view()
Operating system
Windows 10
Description of the problem
When reading in data, numeric variables that have NA values are not read in as NA but are instead read in as -2147483648
Expected behavior
Columns are numeric, with missing values coded as NA
Actual behavior
No errors, warnings, or messages. Columns are numeric, but missing values are coded as -2147483648
Output of
sessionInfo()
R version 4.1.2 (2021-11-01) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows 10 x64 (build 19045)
Matrix products: default
locale: [1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages: [1] stats graphics grDevices utils datasets methods base
other attached packages: [1] synapser_1.0.0
loaded via a namespace (and not attached): [1] Rcpp_1.0.9 pillar_1.8.1 compiler_4.1.2 plyr_1.8.8 tools_4.1.2 digest_0.6.30
[7] jsonlite_1.8.4 evaluate_0.20 lifecycle_1.0.3 tibble_3.1.8 lattice_0.20-45 pkgconfig_2.0.3 [13] png_0.1-8 rlang_1.1.1 Matrix_1.5-3 cli_3.4.1 rstudioapi_0.14 yaml_2.3.7
[19] xfun_0.37 fastmap_1.1.0 withr_2.5.0 dplyr_1.1.0 knitr_1.42 generics_0.1.3
[25] vctrs_0.6.3 rprojroot_2.0.3 grid_4.1.2 tidyselect_1.2.0 reticulate_1.28 glue_1.6.2
[31] here_1.0.1 R6_2.5.1 fansi_1.0.4 rmarkdown_2.23 magrittr_2.0.3 codetools_0.2-18 [37] htmltools_0.5.3 utf8_1.2.3 rjson_0.2.21
Test code from meeting with @thomasyu888 today