ajdamico / convey

variance of distribution measures estimation of survey data
GNU General Public License v3.0
17 stars 7 forks source link

svyatk missingness handling #392

Closed ajdamico closed 1 year ago

ajdamico commented 1 year ago
library(survey)
library( convey )
library( laeken )

data( eusilc )
names( eusilc ) <- tolower( names( eusilc ) )

# set up survey design objects
des_eusilc <- svydesign( ids = ~rb030 , strata = ~db040 , weights = ~rb050 , data = eusilc )
des_eusilc_rep <-as.svrepdesign( des_eusilc , type= "bootstrap" , replicates = 50 )

# prepare for convey
des_eusilc <- convey_prep( des_eusilc )
des_eusilc_rep <- convey_prep( des_eusilc_rep )

# # # # # #

# remove both missings and zeroes
des_eusilc_positive <- subset( des_eusilc , py010n > 0 )
des_eusilc_positive_rep <- subset( des_eusilc_rep , py010n > 0 )

# works without issue
svyatk( ~ py010n , des_eusilc_positive )
svyatk( ~ py010n , des_eusilc_positive_rep )

# # # # # #

# remove missings but not zeroes
des_eusilc_nomiss <- subset( des_eusilc , !is.na( py010n ) )
des_eusilc_nomiss_rep <- subset( des_eusilc_rep , !is.na( py010n ) )

# correctly give errors because they have zeroes
svyatk( ~ py010n , des_eusilc_nomiss )
svyatk( ~ py010n , des_eusilc_nomiss_rep )

# # # # # #

# remove zeroes but not missings
des_eusilc_nozero <- subset( des_eusilc , py010n > 0 | is.na( py010n ) )
des_eusilc_nozero_rep <- subset( des_eusilc_rep , py010n > 0 | is.na( py010n ) )

svyatk( ~ py010n , des_eusilc_nozero ) # gives an error instead of returning NA in the coef() and SE()
svyatk( ~ py010n , des_eusilc_nozero_rep ) # gives an error instead of returning NA in the coef() and SE()

svyatk( ~ py010n , des_eusilc_nozero , na.rm = TRUE ) # incorrectly blanks out the SE
svyatk( ~ py010n , des_eusilc_nozero_rep , na.rm = TRUE ) # works correctly
ajdamico commented 1 year ago

fixed by https://github.com/ajdamico/convey/pull/397