Closed nguinasso closed 9 years ago
Test of readr Norman 2015-04-14
cltype <-'DcdcccccDcddcddc' cltype1<-'cccccccccc' cltype2<-'cccccccddc' cltype3<-'cccccddccc' cltype4<-'cccdDD' cltyp<-paste(cltype,cltype1,cltype2,cltype3,cltype4,sep="") str_length(cltyp) ## [1] 52 #(http://gulfsciencedata.bp.com/go/doctype/6145/179482/) ocfile <- '../WaterChemistry_W-01v02-01.csv' cltyp ## [1] "DcdcccccDcddcddccccccccccccccccccddccccccddccccccdDD" ocfile ## [1] "../WaterChemistry_W-01v02-01.csv" rm(bpwc,bpwc.h,bpwc.h1) ## Warning in rm(bpwc, bpwc.h, bpwc.h1): object 'bpwc' not found ## Warning in rm(bpwc, bpwc.h, bpwc.h1): object 'bpwc.h' not found ## Warning in rm(bpwc, bpwc.h, bpwc.h1): object 'bpwc.h1' not found system.time(bpwc <-read.csv(ocfile)) ## user system elapsed ## 91.67 1.94 93.61 library(readr) system.time(bpwc.h <-read_csv(ocfile,col_types=cltyp,prog=FALSE)) ## |================================================================================| 100% 1565 MB ## user system elapsed ## 25.63 0.55 26.18 head(problems(bpwc.h)) ## [1] row col expected actual ## <0 rows> (or 0-length row.names) system.time(bpwc.h1 <-read_csv(ocfile,prog=FALSE)) ## |================================================================================| 100% 1565 MB ## Warning: 11742041 problems parsing '../WaterChemistry_W-01v02-01.csv'. See ## problems(...) for more details. ## user system elapsed ## 31.37 0.75 32.12 head(problems(bpwc.h1)) ## row col expected actual ## 1 12324 25 an integer K1009141-005 ## 2 12324 28 T/F/TRUE/FALSE Water ## 3 12324 29 T/F/TRUE/FALSE Water ## 4 12324 49 T/F/TRUE/FALSE SMP ## 5 12325 25 an integer K1009141-005 ## 6 12325 28 T/F/TRUE/FALSE Water sessionInfo() ## R version 3.1.3 Patched (2015-03-16 r68170) ## Platform: x86_64-w64-mingw32/x64 (64-bit) ## Running under: Windows 7 x64 (build 7601) Service Pack 1 ## ## locale: ## [1] LC_COLLATE=English_United States.1252 ## [2] LC_CTYPE=English_United States.1252 ## [3] LC_MONETARY=English_United States.1252 ## [4] LC_NUMERIC=C ## [5] LC_TIME=English_United States.1252 ## ## attached base packages: ## [1] stats graphics grDevices utils datasets methods base ## ## other attached packages: ## [1] readr_0.1.0.9000 stringr_0.6.2 ## ## loaded via a namespace (and not attached): ## [1] digest_0.6.4 evaluate_0.5.5 formatR_1.0 htmltools_0.2.6 ## [5] knitr_1.9 Rcpp_0.11.5 rmarkdown_0.5.1 tools_3.1.3 ## [9] yaml_2.1.13 str(bpwc) ## 'data.frame': 2479957 obs. of 52 variables: ## $ Data.Publication.Date : Factor w/ 1 level "2014-05-30": 1 1 1 1 1 1 1 1 1 1 ... ## $ Data.Publication.Reference : Factor w/ 1 level "Water Column Chemistry W-01v02-01": 1 1 1 1 1 1 1 1 1 1 ... ## $ Study.Reference.Number : num 1022 1022 1022 1022 1022 ... ## $ Study.Name : Factor w/ 97 levels "","American Diver Cruise 01 JUL 17-AUG 6 2010",..: 18 18 18 18 18 18 18 18 18 18 ... ## $ Harmonized.Study.Name : Factor w/ 67 levels "1-meter MOCNESS Plankton: September 2010 (Walton Smith I)",..: 21 21 21 21 21 21 21 21 21 21 ... ## $ Harmonized.Cruise.ID : Factor w/ 138 levels "American Diver 01 (07-21-10 to 08-05-10)",..: 83 83 83 83 83 83 83 83 83 83 ... ## $ Location.or.Station.ID : Factor w/ 1347 levels "","000-009","000-018",..: 1 1 1 1 1 1 1 1 1 1 ... ## $ Interpretive.Sample.ID : Factor w/ 22772 levels "0V010011","0V010021",..: 17710 17710 17710 17710 17710 17710 17710 17710 17710 17710 ... ## $ Sample.Date : Factor w/ 374 levels "2010-05-05","2010-05-06",..: 71 71 71 71 71 71 71 71 71 71 ... ## $ Sample.Time : Factor w/ 1401 levels "00:00:00.0000000",..: 406 406 406 406 406 406 406 406 406 406 ... ## $ Latitude : num 28.7 28.7 28.7 28.7 28.7 ... ## $ Longitude : num -88.5 -88.5 -88.5 -88.5 -88.5 ... ## $ Spatial.Zone : Factor w/ 7 levels "AL","Federal",..: 7 7 7 7 7 7 7 7 7 7 ... ## $ Upper.Depth : num 1467 1467 1467 1467 1467 ... ## $ Lower.Depth : num 1467 1467 1467 1467 1467 ... ## $ Depth.Unit : Factor w/ 2 levels "","m": 2 2 2 2 2 2 2 2 2 2 ... ## $ Field.Fraction : Factor w/ 3 levels "Dissolved","Suspended particulate",..: 3 3 3 3 3 3 3 3 3 3 ... ## $ Sample.Type : Factor w/ 6 levels "Equipment blank",..: 4 4 4 4 4 4 4 4 4 4 ... ## $ Field.Matrix : Factor w/ 2 levels "Solid (non-specific)",..: 2 2 2 2 2 2 2 2 2 2 ... ## $ Field.Sample.Material : Factor w/ 4 levels "Filter from the Payne filtration method, 0.7um fiber glass",..: 3 3 3 3 3 3 3 3 3 3 ... ## $ Field.Data.Verification.Status: Factor w/ 3 levels "Complete","Not started",..: 3 3 3 3 3 3 3 3 3 3 ... ## $ Field.Data.Verification.Result: Factor w/ 9 levels "1 of 15 DQOs could not be verified: Coordinates correctness",..: 9 9 9 9 9 9 9 9 9 9 ... ## $ Analytical.Sample.ID : Factor w/ 26737 levels "0V010011","0V010021",..: 21145 21145 21145 21145 21145 21145 21145 21145 21145 21145 ... ## $ Lab : Factor w/ 9 levels "Alpha Analytical",..: 6 6 6 6 6 6 6 6 6 6 ... ## $ Laboratory.Sample.ID : Factor w/ 34753 levels "1005011-01","1005011-01D",..: 14840 14840 14840 14840 14840 14840 14840 14840 14840 14840 ... ## $ ASR.Number : Factor w/ 23 levels "","ARF 007-0",..: 20 20 20 20 20 20 20 20 20 20 ... ## $ SDG : Factor w/ 2642 levels "10-0059","10-0061",..: 1265 1265 1265 1265 1265 1265 1265 1265 1265 1265 ... ## $ Lab.Matrix : Factor w/ 3 levels "","Solid (non-specific)",..: 1 1 1 1 1 1 1 1 1 1 ... ## $ Lab.Material : Factor w/ 5 levels "","Filter (liquid)",..: 1 1 1 1 1 1 1 1 1 1 ... ## $ Parameter.Type : Factor w/ 8 levels "Biomarker","BTEX/PIANO",..: 4 4 4 4 4 4 4 4 4 4 ... ## $ Chemical.Name : Factor w/ 330 levels "1-Decene","1-Heptene/1,2-DMCP (trans)",..: 122 124 125 126 129 131 136 137 138 142 ... ## $ Chemical.Code : Factor w/ 330 levels "100-41-4","100-42-5",..: 217 82 32 144 124 73 66 65 80 84 ... ## $ Chemical.Type : Factor w/ 3 levels "Surrogate","Target analyte",..: 3 3 3 3 3 3 3 3 3 3 ... ## $ Concentration...NDs.at.MDL : num 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 ... ## $ Concentration...NDs.at.zero : num 0 0 0 0 0 0 0 0 0 0 ... ## $ Unit : Factor w/ 3 levels "pct","ug/L","ug/Samp": 2 2 2 2 2 2 2 2 2 2 ... ## $ Final.Qualifiers : Factor w/ 11 levels "","F","J","JF",..: 7 7 7 7 7 7 7 7 7 7 ... ## $ Validation.Qualifiers : Factor w/ 15 levels "","F","FJ","j",..: 10 10 10 10 10 10 10 10 10 10 ... ## $ Lab.Qualifiers : Factor w/ 52 levels "","*","B","B,J",..: 44 44 44 44 44 44 44 44 44 44 ... ## $ Nondetect.Flag : Factor w/ 2 levels "N","Y": 2 2 2 2 2 2 2 2 2 2 ... ## $ Validation.Level : Factor w/ 7 levels "Not Validated",..: 7 7 7 7 7 7 7 7 7 7 ... ## $ Reporting.Limit : num 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 ... ## $ Method.Detection.Limit : num 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 ... ## $ Measurement.Basis : Factor w/ 1 level "Wet": 1 1 1 1 1 1 1 1 1 1 ... ## $ Lab.Fraction : Factor w/ 2 levels "Suspended particulate",..: 2 2 2 2 2 2 2 2 2 2 ... ## $ Preparation.Method : Factor w/ 17 levels "","3510C - Separatory funel liquid-liquid extraction",..: 15 15 15 15 15 15 15 15 15 15 ... ## $ Analytical.Method : Factor w/ 20 levels "8015B - Nonhalogenated Organics Using GC/FID",..: 20 20 20 20 20 20 20 20 20 20 ... ## $ Base.Analytical.Method : int 8270 8270 8270 8270 8270 8270 8270 8270 8270 8270 ... ## $ Lab.Replicate : Factor w/ 4 levels "","1","LD","SMP": 1 1 1 1 1 1 1 1 1 1 ... ## $ Dilution.Factor : num 1 1 1 1 1 1 1 1 1 1 ... ## $ Date.Extracted : Factor w/ 358 levels "","2010-05-11",..: 70 70 70 70 70 70 70 70 70 70 ... ## $ Date.Analyzed : Factor w/ 536 levels "","2010-05-17",..: 67 67 67 67 67 67 67 67 67 67 ... str(bpwc.h) ## Classes 'tbl_df', 'tbl' and 'data.frame': 2479957 obs. of 52 variables: ## $ Data Publication Date : Date, format: "2014-05-30" "2014-05-30" ... ## $ Data Publication Reference : chr "Water Column Chemistry W-01v02-01" "Water Column Chemistry W-01v02-01" "Water Column Chemistry W-01v02-01" "Water Column Chemistry W-01v02-01" ... ## $ Study Reference Number : num 1022 1022 1022 1022 1022 ... ## $ Study Name : chr "Deepwater Dispersant Sampling Program" "Deepwater Dispersant Sampling Program" "Deepwater Dispersant Sampling Program" "Deepwater Dispersant Sampling Program" ... ## $ Harmonized Study Name : chr "Deepwater Dispersant Sampling Program" "Deepwater Dispersant Sampling Program" "Deepwater Dispersant Sampling Program" "Deepwater Dispersant Sampling Program" ... ## $ Harmonized Cruise ID : chr "Ocean Veritas 09 (07-13-10 to 07-17-10)" "Ocean Veritas 09 (07-13-10 to 07-17-10)" "Ocean Veritas 09 (07-13-10 to 07-17-10)" "Ocean Veritas 09 (07-13-10 to 07-17-10)" ... ## $ Location or Station ID : chr "" "" "" "" ... ## $ Interpretive Sample ID : chr "SW-20100715-OV09-001" "SW-20100715-OV09-001" "SW-20100715-OV09-001" "SW-20100715-OV09-001" ... ## $ Sample Date : Date, format: "2010-07-15" "2010-07-15" ... ## $ Sample Time : chr "07:22:00.0000000" "07:22:00.0000000" "07:22:00.0000000" "07:22:00.0000000" ... ## $ Latitude : num 28.7 28.7 28.7 28.7 28.7 ... ## $ Longitude : num -88.5 -88.5 -88.5 -88.5 -88.5 ... ## $ Spatial Zone : chr "Wellhead" "Wellhead" "Wellhead" "Wellhead" ... ## $ Upper Depth : num 1467 1467 1467 1467 1467 ... ## $ Lower Depth : num 1467 1467 1467 1467 1467 ... ## $ Depth Unit : chr "m" "m" "m" "m" ... ## $ Field Fraction : chr "Total" "Total" "Total" "Total" ... ## $ Sample Type : chr "Natural sample" "Natural sample" "Natural sample" "Natural sample" ... ## $ Field Matrix : chr "Water" "Water" "Water" "Water" ... ## $ Field Sample Material : chr "Surface Water" "Surface Water" "Surface Water" "Surface Water" ... ## $ Field Data Verification Status: chr "Underway" "Underway" "Underway" "Underway" ... ## $ Field Data Verification Result: chr "Unassigned" "Unassigned" "Unassigned" "Unassigned" ... ## $ Analytical Sample ID : chr "SW-20100715-OV09-001" "SW-20100715-OV09-001" "SW-20100715-OV09-001" "SW-20100715-OV09-001" ... ## $ Lab : chr "LLI" "LLI" "LLI" "LLI" ... ## $ Laboratory Sample ID : chr "6035886" "6035886" "6035886" "6035886" ... ## $ ASR Number : chr "ASR 052-draft-0" "ASR 052-draft-0" "ASR 052-draft-0" "ASR 052-draft-0" ... ## $ SDG : chr "BMM26" "BMM26" "BMM26" "BMM26" ... ## $ Lab Matrix : chr "" "" "" "" ... ## $ Lab Material : chr "" "" "" "" ... ## $ Parameter Type : chr "PAH/aPAH" "PAH/aPAH" "PAH/aPAH" "PAH/aPAH" ... ## $ Chemical Name : chr "Acenaphthene" "Acenaphthylene" "Anthracene" "Benz(a)anthracene" ... ## $ Chemical Code : chr "83-32-9" "208-96-8" "120-12-7" "56-55-3" ... ## $ Chemical Type : chr "Target Analyte" "Target Analyte" "Target Analyte" "Target Analyte" ... ## $ Concentration - NDs at MDL : num 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 ... ## $ Concentration - NDs at zero : num 0 0 0 0 0 0 0 0 0 0 ... ## $ Unit : chr "ug/L" "ug/L" "ug/L" "ug/L" ... ## $ Final Qualifiers : chr "U" "U" "U" "U" ... ## $ Validation Qualifiers : chr "U" "U" "U" "U" ... ## $ Lab Qualifiers : chr "U" "U" "U" "U" ... ## $ Nondetect Flag : chr "Y" "Y" "Y" "Y" ... ## $ Validation Level : chr "Validated Level4" "Validated Level4" "Validated Level4" "Validated Level4" ... ## $ Reporting Limit : num 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 ... ## $ Method Detection Limit : num 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 0.0053 ... ## $ Measurement Basis : chr "Wet" "Wet" "Wet" "Wet" ... ## $ Lab Fraction : chr "Total" "Total" "Total" "Total" ... ## $ Preparation Method : chr "SW3510C" "SW3510C" "SW3510C" "SW3510C" ... ## $ Analytical Method : chr "SW8270C_MOD" "SW8270C_MOD" "SW8270C_MOD" "SW8270C_MOD" ... ## $ Base Analytical Method : chr "8270" "8270" "8270" "8270" ... ## $ Lab Replicate : chr "" "" "" "" ... ## $ Dilution Factor : num 1 1 1 1 1 1 1 1 1 1 ... ## $ Date Extracted : Date, format: "2010-07-20" "2010-07-20" ... ## $ Date Analyzed : Date, format: "2010-07-21" "2010-07-21" ...
Can you please supply a minimal reproducible example?
e.g. this works as I expect:
read_csv("a b,c.d 1, 2")
The period and space are preserved.
Test of readr Norman 2015-04-14