Closed smgogarten closed 7 years ago
"$dosage" seems to return incorrect values when the number of alternate alleles is large. Here is an example:
> library(SeqVarTools) > gdsfile <- system.file("extdata", "hapmap_exome_chr22.gds", package="SeqVarTools") > gds <- seqOpen(gdsfile) > x <- refDosage(gds, use.names=FALSE) > y <- seqGetData(gds, "$dosage") > all.equal(x,y) [1] "'is.NA' value mismatch: 264 in current 266 in target" > mm <- logical(ncol(x)) > for (i in seq_along(mm)) if (any(is.na(x[,i]) != is.na(y[,i]) | (!is.na(x[,i]) & !is.na(y[,i]) & x[,i] != y[,i]))) mm[i] <- TRUE > seqSetFilter(gds, variant.sel=mm) # of selected variants: 8 > seqNumAllele(gds) [1] 5 4 6 7 6 7 4 5 > refDosage(gds, use.names=FALSE) variant sample [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [1,] 2 2 1 1 2 1 1 1 [2,] 2 2 0 2 1 0 1 2 [3,] 2 2 1 2 2 NA 1 2 [4,] 2 2 1 2 2 2 1 NA [5,] 2 2 1 2 2 2 1 1 [6,] 2 2 0 2 1 1 0 2 [7,] 2 2 2 2 1 1 1 NA [8,] 2 2 1 2 1 2 0 NA [9,] 2 2 1 2 2 0 1 1 [10,] 2 1 1 2 2 2 1 2 [11,] 2 2 2 NA 2 2 1 1 [12,] 2 2 2 1 1 1 1 1 [13,] NA 2 1 2 NA 1 1 1 [14,] 2 NA 0 2 1 2 1 2 [15,] 2 2 0 2 0 1 1 1 [16,] 2 2 1 2 2 2 0 1 [17,] 2 1 1 1 1 2 1 1 [18,] NA 2 0 2 0 1 0 1 [19,] 2 1 1 2 2 1 1 1 [20,] 0 2 1 2 1 1 1 1 [21,] 2 2 1 2 NA 2 0 2 [22,] 2 2 1 2 1 2 1 2 > seqGetData(gds, "$dosage") variant sample [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [1,] 2 2 2 2 2 0 2 1 [2,] 2 2 2 2 1 0 2 1 [3,] 2 2 2 1 1 0 2 2 [4,] 1 1 0 2 1 2 2 0 [5,] 1 2 0 2 2 1 2 2 [6,] 2 2 1 2 0 1 1 2 [7,] 2 2 2 2 1 0 1 NA [8,] 2 2 1 2 1 2 0 NA [9,] 2 2 1 2 2 0 1 1 [10,] 2 1 1 2 2 2 1 2 [11,] 2 2 2 NA 2 1 1 1 [12,] 2 2 2 1 1 1 1 1 [13,] NA 2 1 2 NA 1 1 0 [14,] 2 NA 0 2 1 2 1 2 [15,] 2 2 0 2 0 1 1 0 [16,] 2 2 1 2 2 2 0 1 [17,] 2 1 1 1 1 1 1 1 [18,] NA 2 0 2 0 1 0 1 [19,] 2 1 1 2 2 1 1 1 [20,] 0 2 1 2 1 1 1 1 [21,] 2 2 1 2 NA 2 0 2 [22,] 2 2 1 2 1 2 1 2 > getGenotype(gds, use.names=FALSE) variant sample [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [1,] "0/0" "0/0" "0/2" "0/1" "0/0" "0/1" "0/1" "0/1" [2,] "0/0" "0/0" "2/2" "0/0" "0/3" "4/4" "0/1" "0/0" [3,] "0/0" "0/0" "0/2" "0/0" "0/0" NA "0/1" "0/0" [4,] "0/0" "0/0" "0/2" "0/0" "0/0" "0/0" "0/1" NA [5,] "0/0" "0/0" "0/2" "0/0" "0/0" "0/0" "0/1" "0/1" [6,] "0/0" "0/0" "2/2" "0/0" "0/1" "0/1" "1/1" "0/0" [7,] "0/0" "0/0" "0/0" "0/0" "0/3" "0/5" "0/1" NA [8,] "0/0" "0/0" "0/2" "0/0" "0/4" "0/0" "1/1" NA [9,] "0/0" "0/0" "0/2" "0/0" "0/0" "4/4" "0/1" "0/1" [10,] "0/0" "0/3" "0/2" "0/0" "0/0" "0/0" "0/1" "0/0" [11,] "0/0" "0/0" "0/0" NA "0/0" "0/0" "0/1" "0/1" [12,] "0/0" "0/0" "0/0" "0/2" "0/4" "0/1" "0/1" "0/1" [13,] NA "0/0" "0/2" "0/0" NA "0/2" "0/1" "0/1" [14,] "0/0" NA "4/4" "0/0" "0/1" "0/0" "0/1" "0/0" [15,] "0/0" "0/0" "4/4" "0/0" "1/1" "0/1" "0/1" "0/1" [16,] "0/0" "0/0" "0/2" "0/0" "0/0" "0/0" "1/3" "0/1" [17,] "0/0" "0/2" "0/4" "0/1" "0/1" "0/0" "0/1" "0/1" [18,] NA "0/0" "4/5" "0/0" "1/1" "0/2" "1/1" "0/1" [19,] "0/0" "0/1" "0/4" "0/0" "0/0" "0/1" "0/1" "0/1" [20,] "1/1" "0/0" "0/4" "0/0" "0/1" "0/2" "0/1" "0/1" [21,] "0/0" "0/0" "0/2" "0/0" NA "0/0" "1/1" "0/0" [22,] "0/0" "0/0" "0/2" "0/0" "0/1" "0/0" "0/1" "0/0"
"$dosage" seems to return incorrect values when the number of alternate alleles is large. Here is an example: