2016-07-11 - Githubissues

andkov commented 8 years ago

Comparing tables before and after subsetting subjects based on dementia diagnosis on the first wave

> t <- table(ds$fu_year, ds$cdx, useNA = "always"); t[t==0] <- "."; t

       1    2   3  4   5  6 <NA>
  0    1236 461 5  87  7  4 2   
  1    1080 365 11 94  6  3 20  
  2    942  295 6  114 10 6 36  
  3    833  236 10 118 6  5 30  
  4    689  206 5  111 8  5 27  
  5    558  168 6  103 6  5 35  
  6    467  153 7  90  4  5 26  
  7    374  107 9  93  8  4 12  
  8    315  90  2  83  6  5 11  
  9    254  79  2  62  6  2 10  
  10   209  58  1  46  4  3 15  
  11   142  41  3  38  1  2 33  
  12   99   31  3  32  1  . 10  
  13   58   10  3  14  1  3 16  
  14   21   4   .  4   3  1 5   
  15   15   4   .  4   2  1 .   
  16   8    2   .  4   3  . 2   
  17   7    3   .  3   2  . 1   
  18   1    .   .  .   1  . .   
  <NA> .    .   .  .   .  . 1   
> t <- table(ds_subset_1$fu_year, ds_subset_1$cdx, useNA = "always"); t[t==0] <- "."; t

       1    2   3 4  5 6 <NA>
  0    1236 .   . .  . . .   
  1    957  130 4 1  . . 15  
  2    832  133 2 16 1 4 26  
  3    736  129 3 20 1 1 20  
  4    617  110 . 26 2 2 19  
  5    491  107 3 26 1 2 19  
  6    418  103 3 30 2 3 16  
  7    337  73  4 41 4 2 11  
  8    288  67  1 40 3 5 6   
  9    231  61  1 39 3 1 4   
  10   185  45  . 30 2 2 11  
  11   130  31  3 22 . 2 29  
  12   91   28  3 23 . . 7   
  13   52   9   3 9  1 2 14  
  14   19   3   . 3  2 1 5   
  15   13   3   . 3  1 1 .   
  16   7    1   . 3  2 . 2   
  17   7    2   . 2  1 . .   
  18   1    .   . .  1 . .   
  <NA> .    .   . .  . . .

andkov commented 8 years ago

The tables have been produced by the scritp

> t <- table(ds$fu_year, ds$cdx); t[t==0] <- "."; t

     1    2   3  4   5  6
  0  1236 461 5  87  7  4
  1  1080 365 11 94  6  3
  2  942  295 6  114 10 6
  3  833  236 10 118 6  5
  4  689  206 5  111 8  5
  5  558  168 6  103 6  5
  6  467  153 7  90  4  5
  7  374  107 9  93  8  4
  8  315  90  2  83  6  5
  9  254  79  2  62  6  2
  10 209  58  1  46  4  3
  11 142  41  3  38  1  2
  12 99   31  3  32  1  .
  13 58   10  3  14  1  3
  14 21   4   .  4   3  1
  15 15   4   .  4   2  1
  16 8    2   .  4   3  .
  17 7    3   .  3   2  .
  18 1    .   .  .   1  .
> names(ds)
 [1] "projid"           "fu_year"          "cdx"              "final_dx"        
 [5] "dementia"         "age_bl"           "age_death"        "age_at_visit"    
 [9] "race"             "educ"             "sex"              "braaksc"         
[13] "ceradsc"          "niareagansc"      "ad_reagan"        "apoe_genotype"   
[17] "total_smell_test" "stroke"           "stroke_cum"       "vasc_risks_sum"  
[21] "cogn_ep"          "cogn_global"      "mmse"             "cts_ebmt"        
[25] "cts_ebdr"         "cts_story"        "cts_delay"        "time_yr"         
[29] "vital_status"     "dementia_status"  "stroke_status"    "path_status"     
[33] "group_smell"      "BSIT"             "apoe"             "id"              
> 
> keep_these_ids <-  ds %>% 
+   dplyr::select(id, fu_year, cdx) %>% 
+   dplyr::filter(fu_year==0, cdx == 1 ) %>% 
+   dplyr::select(id)
> keep_these_ids <- as.integer(keep_these_ids$id)
> length(unique(keep_these_ids)) # 1236
[1] 1236
> length(unique(ds$id)) # 1803
[1] 1803
> 
> ds_subset <- ds %>% 
+   dplyr::filter(id %in% keep_these_ids)
> 
> length(unique(ds_subset$id))
[1] 1236
> 
> ds_subset %>% 
+   # dplyr::slice(1:10)
+   dplyr::group_by(cdx) %>% 
+   dplyr::summarize(count = n())
Source: local data frame [7 x 2]

    cdx count
  (int) (int)
1     1  6648
2     2  1035
3     3    30
4     4   334
5     5    27
6     6    28
7    NA   204
> 
> t <- table(ds_subset$fu_year, ds_subset$cdx); t[t==0] <- "."; t

     1    2   3 4  5 6
  0  1236 .   . .  . .
  1  957  130 4 1  . .
  2  832  133 2 16 1 4
  3  736  129 3 20 1 1
  4  617  110 . 26 2 2
  5  491  107 3 26 1 2
  6  418  103 3 30 2 3
  7  337  73  4 41 4 2
  8  288  67  1 40 3 5
  9  231  61  1 39 3 1
  10 185  45  . 30 2 2
  11 130  31  3 22 . 2
  12 91   28  3 23 . .
  13 52   9   3 9  1 2
  14 19   3   . 3  2 1
  15 13   3   . 3  1 1
  16 7    1   . 3  2 .
  17 7    2   . 2  1 .
  18 1    .   . .  1 .
> names(ds)
 [1] "projid"           "fu_year"          "cdx"              "final_dx"        
 [5] "dementia"         "age_bl"           "age_death"        "age_at_visit"    
 [9] "race"             "educ"             "sex"              "braaksc"         
[13] "ceradsc"          "niareagansc"      "ad_reagan"        "apoe_genotype"   
[17] "total_smell_test" "stroke"           "stroke_cum"       "vasc_risks_sum"  
[21] "cogn_ep"          "cogn_global"      "mmse"             "cts_ebmt"        
[25] "cts_ebdr"         "cts_story"        "cts_delay"        "time_yr"         
[29] "vital_status"     "dementia_status"  "stroke_status"    "path_status"     
[33] "group_smell"      "BSIT"             "apoe"             "id"

andkov commented 8 years ago

Comparing between before and after removing those ids that did not have baseline score on totat_smell_score during wave 0

> t <- table(ds_subset_1$fu_year, ds_subset_1$total_smell_test, useNA = "always"); t[t==0] <- "."; t

       1 2 3 4  5  6  7  8   9   10  11  12  <NA>
  0    2 6 4 21 14 50 64 114 186 251 232 110 182 
  1    . . 3 5  4  12 17 18  49  69  63  36  831 
  2    . 1 3 8  7  6  16 31  48  67  69  39  719 
  3    . 3 5 4  6  19 26 32  43  72  75  32  593 
  4    . . 2 3  3  12 8  21  31  47  36  20  593 
  5    . . . .  .  2  7  11  28  24  38  15  524 
  6    . . 3 2  4  8  12 15  22  39  39  9   422 
  7    . 1 5 6  11 10 7  17  19  40  24  18  314 
  8    . 5 2 5  16 16 15 30  32  38  31  20  200 
  9    2 3 4 14 13 24 22 23  38  32  38  13  114 
  10   1 2 6 5  10 15 25 20  19  31  32  5   104 
  11   1 1 7 5  8  15 14 18  19  20  20  8   81  
  12   . 2 2 6  7  7  8  11  16  21  7   2   63  
  13   . 1 3 5  4  5  8  8   6   7   4   5   34  
  14   . . . 1  5  3  .  3   2   4   4   1   10  
  15   . . . 1  9  .  1  .   3   .   3   1   3   
  16   . 1 4 .  1  2  2  2   .   2   1   .   .   
  17   . . . 1  2  2  1  1   .   .   1   .   4   
  18   . . . .  .  .  .  1   .   .   .   .   1   
  <NA> . . . .  .  .  .  .   .   .   .   .   .   
> t <- table(ds_subset_2$fu_year, ds_subset_2$total_smell_test, useNA = "always"); t[t==0] <- "."; t

       1 2 3 4  5  6  7  8   9   10  11  12  <NA>
  0    2 6 4 21 14 50 64 114 186 251 232 110 .   
  1    . . 3 4  3  11 16 17  47  66  61  32  683 
  2    . 1 2 7  6  5  16 30  42  62  66  37  585 
  3    . 1 3 2  3  11 23 22  36  54  50  23  549 
  4    . . 2 3  3  12 6  21  29  45  36  20  471 
  5    . . . .  .  1  7  11  27  23  37  15  415 
  6    . . 3 2  3  8  12 15  21  38  38  9   325 
  7    . 1 5 6  9  10 7  16  18  39  24  18  229 
  8    . 5 2 5  16 16 15 29  31  37  31  20  128 
  9    2 3 4 13 13 24 22 21  37  32  37  13  52  
  10   1 1 5 5  10 15 25 20  19  31  31  5   50  
  11   1 1 7 4  8  15 14 15  18  19  20  8   37  
  12   . 2 2 5  7  7  8  11  16  19  5   2   22  
  13   . 1 3 2  2  3  6  7   2   4   2   3   19  
  14   . . . .  1  1  .  1   .   2   .   .   4   
  <NA> . . . .  .  .  .  .   .   .   .   .   .

andkov commented 8 years ago

@ampiccinin

Removing respondents who have less then three waves of total_smell_test observations

> t <- table(ds_subset_2$fu_year, ds_subset_2$total_smell_test, useNA = "always"); t[t==0] <- "."; t

       1 2 3 4  5  6  7  8   9   10  11  12  <NA>
  0    2 6 4 21 14 50 64 114 186 251 232 110 .   
  1    . . 3 4  3  11 16 17  47  66  61  32  683 
  2    . 1 2 7  6  5  16 30  42  62  66  37  585 
  3    . 1 3 2  3  11 23 22  36  54  50  23  549 
  4    . . 2 3  3  12 6  21  29  45  36  20  471 
  5    . . . .  .  1  7  11  27  23  37  15  415 
  6    . . 3 2  3  8  12 15  21  38  38  9   325 
  7    . 1 5 6  9  10 7  16  18  39  24  18  229 
  8    . 5 2 5  16 16 15 29  31  37  31  20  128 
  9    2 3 4 13 13 24 22 21  37  32  37  13  52  
  10   1 1 5 5  10 15 25 20  19  31  31  5   50  
  11   1 1 7 4  8  15 14 15  18  19  20  8   37  
  12   . 2 2 5  7  7  8  11  16  19  5   2   22  
  13   . 1 3 2  2  3  6  7   2   4   2   3   19  
  14   . . . .  1  1  .  1   .   2   .   .   4   
  <NA> . . . .  .  .  .  .   .   .   .   .   .   
> t <- table(ds_subset_3$fu_year, ds_subset_3$tst_count, useNA = "always"); t[t==0] <- "."; t

       3  4   5   6  7  8  9  10 11 12 13 14 15 <NA>
  0    95 137 106 94 83 54 56 56 63 55 50 32 9  .   
  1    88 131 104 93 82 51 54 56 63 55 50 32 9  .   
  2    85 129 102 90 79 50 56 54 63 55 50 32 9  .   
  3    7  128 100 91 83 52 55 53 61 55 50 32 9  .   
  4    6  18  99  87 77 50 54 53 61 54 48 32 9  .   
  5    4  3   8   80 80 49 55 51 61 54 50 32 9  .   
  6    .  2   6   21 79 53 53 53 63 54 49 32 9  .   
  7    .  .   3   6  13 46 53 54 61 55 50 32 9  .   
  8    .  .   1   2  4  14 53 55 60 55 50 32 9  .   
  9    .  .   1   .  1  8  11 49 60 53 49 32 9  .   
  10   .  .   .   .  .  4  3  16 55 51 48 32 9  .   
  11   .  .   .   .  .  1  .  8  15 52 50 32 9  .   
  12   .  .   .   .  .  .  .  1  5  10 49 32 9  .   
  13   .  .   .   .  .  .  1  1  2  2  7  32 9  .   
  14   .  .   .   .  .  .  .  .  .  .  .  .  9  .   
  <NA> .  .   .   .  .  .  .  .  .  .  .  .  .  .

andkov commented 8 years ago

At the end of the session we decided to spend the next meeting (Friday, July 15) to wrap our progress on the longitudinal-response-patter, create a stopping point, prepare a presentation about what we've done so far, and get feedback from @ampiccinin about where to go next (what needs to be resolved/clarified/specified in order to move on).

IALSA / longitudinal-response-pattern

2016-07-11 #6