Closed ajdamico closed 1 year ago
rewrite of sipp without monetdblite could look something like this:
tf <- tempfile()
lodown::cachaca( "http://thedataweb.rm.census.gov/pub/sipp/2014/pu2014w1_dat.zip" , tf, mode = 'wb' )
fn <- unzip( tf , exdir = tempdir() )
line.num <- 0
month_one <- tempfile()
month_twelve <- tempfile()
# one read-only file connection "r" - pointing to the ASCII file
incon <- file( fn , "r")
outcon.month_one <- file( month_one , "w")
outcon.month_twelve <- file( month_twelve , "w")
while( length( line <- readLines( incon , 1 ) ) > 0 ){
if ( substr( line , 24 , 25 ) == " 1" ){
# write the line to the household file
writeLines( line , outcon.month_one )
}
if ( substr( line , 24 , 25 ) == "12" ){
# write the line to the household file
writeLines( line , outcon.month_twelve )
}
# add to the line counter #
line.num <- line.num + 1
# every 10k records..
if ( line.num %% 10000 == 0 ) {
# print current progress to the screen #
cat( " " , prettyNum( line.num , big.mark = "," ) , "of approximately 870,000 sipp lines processed" , "\r" )
}
}
close( outcon.month_one )
close( outcon.month_twelve )
close( incon )
x <-
lodown:::read_SAScii(
month_one ,
lodown:::fix.ahiehi(lodown:::fix.ct("http://thedataweb.rm.census.gov/pub/sipp/2014/pu2014w1.sas")) ,
beginline = 5 )
saveRDS( x , file = tempfile() )
rm( x )
x <-
lodown:::read_SAScii(
month_twelve ,
lodown:::fix.ahiehi(lodown:::fix.ct("http://thedataweb.rm.census.gov/pub/sipp/2014/pu2014w1.sas")) ,
beginline = 5 )
saveRDS( x , file = tempfile() )
rm( x )
might need a build matrix here for each panel, 2014 still may not fit
read the release notes
06/30/17 SSA Supplement Release Notes 14,932
06/30/17 SSA Supplement ASCII Data File 44633754 3,024,796 3,024,670 N/A 35,937 06/30/17 SSA Supplement SAS Data File 35,258,368 N/A 8,652,384 N/A 35,937 06/30/17 SSA Supplement SAS Input Statements 15,231
06/30/17 SSA Supplement Metadata Dictionary (pdf) 466,854