THLfi / read.gt3x

R-package for reading Actigraph's binary activity data
20 stars 7 forks source link

Imputation failure #11

Closed muschellij2 closed 4 years ago

muschellij2 commented 4 years ago
library(read.gt3x)
library(R.utils)
#> Loading required package: R.oo
#> Loading required package: R.methodsS3
#> R.methodsS3 v1.8.0 (2020-02-14 07:10:20 UTC) successfully loaded. See ?R.methodsS3 for help.
#> R.oo v1.23.0 successfully loaded. See ?R.oo for help.
#> 
#> Attaching package: 'R.oo'
#> The following object is masked from 'package:R.methodsS3':
#> 
#>     throw
#> The following objects are masked from 'package:methods':
#> 
#>     getClasses, getMethods
#> The following objects are masked from 'package:base':
#> 
#>     attach, detach, load, save
#> R.utils v2.9.2-9000 successfully loaded. See ?R.utils for help.
#> 
#> Attaching package: 'R.utils'
#> The following object is masked from 'package:utils':
#> 
#>     timestamp
#> The following objects are masked from 'package:base':
#> 
#>     cat, commandArgs, getOption, inherits, isOpen, nullfile, parse,
#>     warnings
destfile = file.path(tempdir(), "AI5_NEO1B41100262_2017-06-13.gt3x.gz")
if (!file.exists(destfile)) {
  dl = download.file("https://ndownloader.figshare.com/files/21855588",
                     destfile = destfile)
}
gt3x = R.utils::gunzip(destfile, 
                       overwrite = TRUE,
                       remove = FALSE)
is_gt3x(gt3x)
#> /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T/RtmptrtpP7/AI5_NEO1B41100262_2017-06-13.gt3x 
#>                                                                                          TRUE
read.gt3x:::have_info(gt3x)
#> [1] TRUE
acc = read.gt3x(gt3x, verbose = TRUE, asDataFrame = TRUE)
#> Input is a .gt3x file, unzipping to a temporary location first...
#> Unzipping gt3x data to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmptrtpP7
#> 1/1
#> Unzipping /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T/RtmptrtpP7/AI5_NEO1B41100262_2017-06-13.gt3x
#>  === info.txt, log.bin extracted to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmptrtpP7/AI5_NEO1B41100262_2017-06-13
#> GT3X information
#>  $ Serial Number     :"NEO1B41100262"
#>  $ Device Type       :"GT3XPlus"
#>  $ Firmware          :"3.2.1"
#>  $ Battery Voltage   :"3.92"
#>  $ Sample Rate       :30
#>  $ Start Date        : POSIXct, format: "2017-06-13 14:15:00"
#>  $ Stop Date         : POSIXct, format: "2017-06-20 14:15:00"
#>  $ Last Sample Time  : POSIXct, format: "2017-06-20 14:15:00"
#>  $ TimeZone          :"01:00:00"
#>  $ Download Date     : POSIXct, format: "2017-07-05 17:43:35"
#>  $ Board Revision    :"1"
#>  $ Unexpected Resets :"0"
#>  $ Acceleration Scale:341
#>  $ Acceleration Min  :"-6.0"
#>  $ Acceleration Max  :"6.0"
#>  $ Sex               :"Female"
#>  $ Limb              :"Wrist"
#>  $ Side              :"Right"
#>  $ Dominance         :"Dominant"
#>  $ Subject Name      :"PH04"
#>  $ Serial Prefix     :"NEO"
#> Parsing GT3X data via CPP.. expected sample size: 18144000
#> Total Records: 8844120
#> Scaling...
#> Removing excess rows 
#> Creating dimnames 
#> CPP returning
#> Done (in 3.62087488174438 seconds)
miss = attributes(acc)$missingness
range(miss$time)
#> [1] "1970-01-01 09:06:06 GMT" "2017-06-20 14:14:59 GMT"
range(acc$time)
#> [1] "2016-05-06 21:34:36 GMT" "2016-05-30 05:34:32 GMT"
tail(miss)
#>                           time   n_missing
#> 1497962980 2017-06-20 12:49:40         510
#> 1497964948 2017-06-20 13:22:28         480
#> 1497966788 2017-06-20 13:53:08        1350
#> 1497966852 2017-06-20 13:54:12        1080
#> 1497967229 2017-06-20 14:00:29          60
#> 1497968099 2017-06-20 14:14:59 -1970243030
d = as.numeric(diff(acc$time))
ind = which(d > 60000)
acc[(ind-1):(ind+1),]
#> Sampling Rate: 30Hz
#> Firmware Version: 3.2.1
#> Serial Number Prefix: NEO
#>              X      Y      Z                time
#> 5526869  0.053  0.038 -0.971 2016-05-20 11:34:29
#> 5526870  0.053  0.041 -0.971 2016-05-20 11:34:29
#> 5526871 -0.015 -0.287 -0.985 2016-05-21 05:21:59
acc = read.gt3x(gt3x, verbose = TRUE, asDataFrame = TRUE, imputeZeroes = TRUE)
#> Input is a .gt3x file, unzipping to a temporary location first...
#> Unzipping gt3x data to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmptrtpP7
#> 1/1
#> Unzipping /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T/RtmptrtpP7/AI5_NEO1B41100262_2017-06-13.gt3x
#>  === info.txt, log.bin extracted to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmptrtpP7/AI5_NEO1B41100262_2017-06-13
#> GT3X information
#>  $ Serial Number     :"NEO1B41100262"
#>  $ Device Type       :"GT3XPlus"
#>  $ Firmware          :"3.2.1"
#>  $ Battery Voltage   :"3.92"
#>  $ Sample Rate       :30
#>  $ Start Date        : POSIXct, format: "2017-06-13 14:15:00"
#>  $ Stop Date         : POSIXct, format: "2017-06-20 14:15:00"
#>  $ Last Sample Time  : POSIXct, format: "2017-06-20 14:15:00"
#>  $ TimeZone          :"01:00:00"
#>  $ Download Date     : POSIXct, format: "2017-07-05 17:43:35"
#>  $ Board Revision    :"1"
#>  $ Unexpected Resets :"0"
#>  $ Acceleration Scale:341
#>  $ Acceleration Min  :"-6.0"
#>  $ Acceleration Max  :"6.0"
#>  $ Sex               :"Female"
#>  $ Limb              :"Wrist"
#>  $ Side              :"Right"
#>  $ Dominance         :"Dominant"
#>  $ Subject Name      :"PH04"
#>  $ Serial Prefix     :"NEO"
#> Parsing GT3X data via CPP.. expected sample size: 18144000
#> Error in parseGT3X(logpath, max_samples = samples, scale_factor = info$`Acceleration Scale`, : Index out of bounds: [index=18144000; extent=18144000].

Created on 2020-03-03 by the reprex package (v0.3.0.9001)

Session info ``` r sessioninfo::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 3.6.2 (2019-12-12) #> os macOS Mojave 10.14.6 #> system x86_64, darwin15.6.0 #> ui X11 #> language (EN) #> collate en_US.UTF-8 #> ctype en_US.UTF-8 #> tz America/New_York #> date 2020-03-03 #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date lib source #> assertthat 0.2.1 2019-03-21 [1] CRAN (R 3.6.0) #> backports 1.1.5 2019-10-02 [1] CRAN (R 3.6.0) #> cli 2.0.2 2020-02-28 [1] CRAN (R 3.6.0) #> crayon 1.3.4 2017-09-16 [1] CRAN (R 3.6.0) #> digest 0.6.25 2020-02-23 [1] CRAN (R 3.6.0) #> evaluate 0.14 2019-05-28 [1] CRAN (R 3.6.0) #> fansi 0.4.1 2020-01-08 [1] CRAN (R 3.6.0) #> fs 1.3.1 2019-05-06 [1] CRAN (R 3.6.0) #> glue 1.3.1 2019-03-12 [1] CRAN (R 3.6.0) #> highr 0.8 2019-03-20 [1] CRAN (R 3.6.0) #> htmltools 0.4.0 2019-10-04 [1] CRAN (R 3.6.0) #> knitr 1.28 2020-02-06 [1] CRAN (R 3.6.0) #> magrittr 1.5 2014-11-22 [1] CRAN (R 3.6.0) #> pillar 1.4.3 2019-12-20 [1] CRAN (R 3.6.0) #> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 3.6.0) #> purrr 0.3.3 2019-10-18 [1] CRAN (R 3.6.0) #> R.methodsS3 * 1.8.0 2020-02-14 [1] CRAN (R 3.6.0) #> R.oo * 1.23.0 2019-11-03 [1] CRAN (R 3.6.0) #> R.utils * 2.9.2-9000 2020-02-29 [1] local #> Rcpp 1.0.3 2019-11-08 [1] CRAN (R 3.6.2) #> read.gt3x * 0.1.0.9000 2020-02-28 [1] local #> reprex 0.3.0.9001 2020-01-05 [1] Github (tidyverse/reprex@5ae0b29) #> rlang 0.4.5 2020-03-01 [1] CRAN (R 3.6.2) #> rmarkdown 2.1 2020-01-20 [1] CRAN (R 3.6.0) #> rstudioapi 0.11.0-9000 2020-02-19 [1] Github (rstudio/rstudioapi@deb9c47) #> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.6.0) #> stringi 1.4.6 2020-02-17 [1] CRAN (R 3.6.0) #> stringr 1.4.0 2019-02-10 [1] CRAN (R 3.6.0) #> styler 1.3.2 2020-02-23 [1] CRAN (R 3.6.2) #> tibble 2.1.3 2019-06-06 [1] CRAN (R 3.6.0) #> withr 2.1.2 2018-03-15 [1] CRAN (R 3.6.0) #> xfun 0.12 2020-01-13 [1] CRAN (R 3.6.0) #> yaml 2.2.1 2020-02-01 [1] CRAN (R 3.6.0) #> #> [1] /Library/Frameworks/R.framework/Versions/3.6/Resources/library ```
muschellij2 commented 4 years ago

Running this multiple times showed different results, this may be an overflow issue

TuomoNieminen commented 4 years ago

Fixed, see #15

muschellij2 commented 4 years ago

I saw you closed/reopened. I'm not sure why, I believe this is a solid close.

TuomoNieminen commented 4 years ago

I wanted to give you the chance to review/close :)

muschellij2 commented 4 years ago

Looks great!.

FYI - I had made a fix_zeros function that will make things similar to the ActiGraph output when using idle sleep mode (keeps repeating the same value):

library(read.gt3x)
library(R.utils)
#> Loading required package: R.oo
#> Loading required package: R.methodsS3
#> R.methodsS3 v1.8.0 (2020-02-14 07:10:20 UTC) successfully loaded. See ?R.methodsS3 for help.
#> R.oo v1.23.0 successfully loaded. See ?R.oo for help.
#> 
#> Attaching package: 'R.oo'
#> The following object is masked from 'package:R.methodsS3':
#> 
#>     throw
#> The following objects are masked from 'package:methods':
#> 
#>     getClasses, getMethods
#> The following objects are masked from 'package:base':
#> 
#>     attach, detach, load, save
#> R.utils v2.9.2-9000 successfully loaded. See ?R.utils for help.
#> 
#> Attaching package: 'R.utils'
#> The following object is masked from 'package:utils':
#> 
#>     timestamp
#> The following objects are masked from 'package:base':
#> 
#>     cat, commandArgs, getOption, inherits, isOpen, nullfile, parse,
#>     warnings
library(zoo)
#> 
#> Attaching package: 'zoo'
#> The following objects are masked from 'package:base':
#> 
#>     as.Date, as.Date.numeric

fix_zeros = function(gt3x) {
  if ("time" %in% names(gt3x)) {
    if (!is.unsorted(gt3x$time)) {
      warning("Time is unsorted, will resort the data set")
      gt3x = gt3x[ order(gt3x$time), ]
    }
  }
  zero = rowSums(gt3x[, c("X", "Y", "Z")] == 0) == 3
  names(zero) = NULL
  gt3x$X[zero] = NA
  gt3x$Y[zero] = NA
  gt3x$Z[zero] = NA
  gt3x$X = zoo::na.locf(gt3x$X, na.rm = FALSE)
  gt3x$Y = zoo::na.locf(gt3x$Y, na.rm = FALSE)
  gt3x$Z = zoo::na.locf(gt3x$Z, na.rm = FALSE)

  gt3x$X[ is.na(gt3x$X)] = 0
  gt3x$Y[ is.na(gt3x$Y)] = 0
  gt3x$Z[ is.na(gt3x$Z)] = 0
  gt3x
}

destfile = file.path(tempdir(), "AI5_NEO1B41100262_2017-06-13.gt3x.gz")
if (!file.exists(destfile)) {
  dl = download.file("https://ndownloader.figshare.com/files/21855588",
                     destfile = destfile)
}
gt3x = R.utils::gunzip(destfile, 
                       overwrite = TRUE,
                       remove = FALSE)

acc = read.gt3x(gt3x, verbose = TRUE, asDataFrame = TRUE, imputeZeroes = TRUE)
#> Input is a .gt3x file, unzipping to a temporary location first...
#> Unzipping gt3x data to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpLMo5Bz
#> 1/1
#> Unzipping /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T/RtmpLMo5Bz/AI5_NEO1B41100262_2017-06-13.gt3x
#>  === info.txt, log.bin extracted to /var/folders/1s/wrtqcpxn685_zk570bnx9_rr0000gr/T//RtmpLMo5Bz/AI5_NEO1B41100262_2017-06-13
#> GT3X information
#>  $ Serial Number     :"NEO1B41100262"
#>  $ Device Type       :"GT3XPlus"
#>  $ Firmware          :"3.2.1"
#>  $ Battery Voltage   :"3.92"
#>  $ Sample Rate       :30
#>  $ Start Date        : POSIXct, format: "2017-06-13 14:15:00"
#>  $ Stop Date         : POSIXct, format: "2017-06-20 14:15:00"
#>  $ Last Sample Time  : POSIXct, format: "2017-06-20 14:15:00"
#>  $ TimeZone          :"01:00:00"
#>  $ Download Date     : POSIXct, format: "2017-07-05 17:43:35"
#>  $ Board Revision    :"1"
#>  $ Unexpected Resets :"0"
#>  $ Acceleration Scale:341
#>  $ Acceleration Min  :"-6.0"
#>  $ Acceleration Max  :"6.0"
#>  $ Sex               :"Female"
#>  $ Limb              :"Wrist"
#>  $ Side              :"Right"
#>  $ Dominance         :"Dominant"
#>  $ Subject Name      :"PH04"
#>  $ Serial Prefix     :"NEO"
#> Parsing GT3X data via CPP.. expected sample size: 18144000
#> Total Records: 18143970
#> Scaling...
#> Creating dimnames 
#> CPP returning
#> Done (in 3.24781203269958 seconds)
some_inds = unname(which(rowSums(acc[, c("X", "Y", "Z")] == 0) == 3))
some_inds = head(some_inds, 10)
some_inds = unique(c(some_inds, c(outer(some_inds, 1:5, "-"))))
as.data.frame(acc[some_inds,])
#>           X     Y      Z                time
#> 25231 0.000 0.000  0.000 2017-06-13 14:29:01
#> 25232 0.000 0.000  0.000 2017-06-13 14:29:01
#> 25233 0.000 0.000  0.000 2017-06-13 14:29:01
#> 25234 0.000 0.000  0.000 2017-06-13 14:29:01
#> 25235 0.000 0.000  0.000 2017-06-13 14:29:01
#> 25236 0.000 0.000  0.000 2017-06-13 14:29:01
#> 25237 0.000 0.000  0.000 2017-06-13 14:29:01
#> 25238 0.000 0.000  0.000 2017-06-13 14:29:01
#> 25239 0.000 0.000  0.000 2017-06-13 14:29:01
#> 25240 0.000 0.000  0.000 2017-06-13 14:29:01
#> 25230 0.367 0.399 -0.853 2017-06-13 14:29:00
#> 25229 0.367 0.396 -0.853 2017-06-13 14:29:00
#> 25228 0.370 0.402 -0.853 2017-06-13 14:29:00
#> 25227 0.367 0.399 -0.856 2017-06-13 14:29:00
#> 25226 0.370 0.399 -0.853 2017-06-13 14:29:00

out = fix_zeros(acc)
#> Warning in fix_zeros(acc): Time is unsorted, will resort the data set
as.data.frame(out[some_inds,])
#>           X     Y      Z                time
#> 25231 0.367 0.399 -0.853 2017-06-13 14:29:01
#> 25232 0.367 0.399 -0.853 2017-06-13 14:29:01
#> 25233 0.367 0.399 -0.853 2017-06-13 14:29:01
#> 25234 0.367 0.399 -0.853 2017-06-13 14:29:01
#> 25235 0.367 0.399 -0.853 2017-06-13 14:29:01
#> 25236 0.367 0.399 -0.853 2017-06-13 14:29:01
#> 25237 0.367 0.399 -0.853 2017-06-13 14:29:01
#> 25238 0.367 0.399 -0.853 2017-06-13 14:29:01
#> 25239 0.367 0.399 -0.853 2017-06-13 14:29:01
#> 25240 0.367 0.399 -0.853 2017-06-13 14:29:01
#> 25230 0.367 0.399 -0.853 2017-06-13 14:29:00
#> 25229 0.367 0.396 -0.853 2017-06-13 14:29:00
#> 25228 0.370 0.402 -0.853 2017-06-13 14:29:00
#> 25227 0.367 0.399 -0.856 2017-06-13 14:29:00
#> 25226 0.370 0.399 -0.853 2017-06-13 14:29:00

Created on 2020-05-18 by the reprex package (v0.3.0.9001)

Session info ``` r sessioninfo::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 3.6.3 (2020-02-29) #> os macOS Mojave 10.14.6 #> system x86_64, darwin15.6.0 #> ui X11 #> language (EN) #> collate en_US.UTF-8 #> ctype en_US.UTF-8 #> tz America/New_York #> date 2020-05-18 #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date lib source #> assertthat 0.2.1 2019-03-21 [1] CRAN (R 3.6.0) #> backports 1.1.6 2020-04-05 [1] CRAN (R 3.6.2) #> cli 2.0.2 2020-02-28 [1] CRAN (R 3.6.0) #> crayon 1.3.4 2017-09-16 [1] CRAN (R 3.6.0) #> digest 0.6.25 2020-02-23 [1] CRAN (R 3.6.0) #> ellipsis 0.3.0 2019-09-20 [1] CRAN (R 3.6.0) #> evaluate 0.14 2019-05-28 [1] CRAN (R 3.6.0) #> fansi 0.4.1 2020-01-08 [1] CRAN (R 3.6.0) #> fs 1.4.1 2020-04-04 [1] CRAN (R 3.6.2) #> glue 1.4.0 2020-04-03 [1] CRAN (R 3.6.2) #> highr 0.8 2019-03-20 [1] CRAN (R 3.6.0) #> htmltools 0.4.0 2019-10-04 [1] CRAN (R 3.6.0) #> knitr 1.28 2020-02-06 [1] CRAN (R 3.6.0) #> lattice 0.20-41 2020-04-02 [1] CRAN (R 3.6.2) #> lifecycle 0.2.0 2020-03-06 [1] CRAN (R 3.6.0) #> magrittr 1.5 2014-11-22 [1] CRAN (R 3.6.0) #> pillar 1.4.3 2019-12-20 [1] CRAN (R 3.6.0) #> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 3.6.0) #> purrr 0.3.3 2019-10-18 [1] CRAN (R 3.6.0) #> R.methodsS3 * 1.8.0 2020-02-14 [1] CRAN (R 3.6.0) #> R.oo * 1.23.0 2019-11-03 [1] CRAN (R 3.6.0) #> R.utils * 2.9.2-9000 2020-02-29 [1] local #> Rcpp 1.0.4.7 2020-04-12 [1] Github (RcppCore/Rcpp@cefedb7) #> read.gt3x * 0.1.0.9000 2020-05-05 [1] local #> reprex 0.3.0.9001 2020-01-05 [1] Github (tidyverse/reprex@5ae0b29) #> rlang 0.4.5.9000 2020-03-24 [1] Github (r-lib/rlang@a90b04b) #> rmarkdown 2.1.3 2020-05-09 [1] Github (rstudio/rmarkdown@d7e1bda) #> rstudioapi 0.11.0-9000 2020-03-31 [1] Github (rstudio/rstudioapi@1842116) #> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.6.0) #> stringi 1.4.6 2020-02-17 [1] CRAN (R 3.6.0) #> stringr 1.4.0 2019-02-10 [1] CRAN (R 3.6.0) #> styler 1.3.2 2020-02-23 [1] CRAN (R 3.6.2) #> tibble 3.0.0 2020-03-30 [1] CRAN (R 3.6.2) #> vctrs 0.2.4 2020-03-10 [1] CRAN (R 3.6.0) #> withr 2.1.2 2018-03-15 [1] CRAN (R 3.6.0) #> xfun 0.13 2020-04-13 [1] CRAN (R 3.6.3) #> yaml 2.2.1 2020-02-01 [1] CRAN (R 3.6.0) #> zoo * 1.8-7 2020-01-10 [1] CRAN (R 3.6.0) #> #> [1] /Library/Frameworks/R.framework/Versions/3.6/Resources/library ```