RGLab / flowCore

Core flow cytometry infrastructure
43 stars 25 forks source link

Subsetting does not remove all keywords corresponding to the removed channels #128

Closed SamGG closed 6 years ago

SamGG commented 6 years ago

Describe the bug Subsetting does not remove all keywords corresponding to the removed channels.

To Reproduce

# Setup environment
library(flowCore)
wrk.dir = "C:/demo" ; cur.dir = getwd() ; setwd(wrk.dir)

# Create a flowframe from scratch
ff.exprs = matrix(rnorm(50), 10, 5)
colnames(ff.exprs) = paste0("FL-", 1:5)
(ff = flowFrame(
  exprs = ff.exprs
))
#> flowFrame object 'anonymous'
#> with 10 cells and 5 observables:
#>     name desc     range  minRange  maxRange
#> $P1 FL-1 FL-1 0.5090667 -1.790688 0.5090667
#> $P2 FL-2 FL-2 1.8893867 -1.354387 1.8893867
#> $P3 FL-3 FL-3 1.2112230 -2.368423 1.2112230
#> $P4 FL-4 FL-4 1.6061092 -2.563638 1.6061092
#> $P5 FL-5 FL-5 1.3979243 -2.257122 1.3979243
#> 0 keywords are stored in the 'description' slot
# Write
write.FCS(ff, "ff0.fcs")
#> [1] "ff0.fcs"
(ff0 = read.FCS("ff0.fcs"))
#> flowFrame object 'ff0.fcs'
#> with 10 cells and 5 observables:
#>     name desc     range  minRange   maxRange
#> $P1 FL-1 FL-1 0.5090667 -1.790688 -0.4909333
#> $P2 FL-2 FL-2 1.8893867 -1.354387  0.8893867
#> $P3 FL-3 FL-3 1.2112230 -2.368423  0.2112230
#> $P4 FL-4 FL-4 1.6061092 -2.563638  0.6061092
#> $P5 FL-5 FL-5 1.3979243 -2.257122  0.3979243
#> 51 keywords are stored in the 'description' slot

##### Subsetting

ff5 = ff0

# Add keywords
ff5.kwds = keyword(ff5)
ff5.kwds = c(ff5.kwds,
             `$P5D` = "Logarithmic,4,0.1",
             `$P5CALIBRATION` = "1,log4",
             `$P5G` = "1",
             `$P5F` = "655-730 nm",
             `$P5L` = "488nm",
             `$P5O` = "30mW",
             `$P5V` = "518",
             `$P5T` = "-20")
keyword(ff5) = ff5.kwds
keyword(ff5)[grep("$P5", names(keyword(ff5)), fixed = TRUE)]
#> $`$P5B`
#> [1] "32"
#> 
#> $`$P5E`
#> [1] "0,0"
#> 
#> $`$P5N`
#> [1] "FL-5"
#> 
#> $`$P5R`
#> [1] "1.39792433069503"
#> 
#> $`$P5S`
#> [1] "FL-5"
#> 
#> $`flowCore_$P5Rmax`
#> [1] "1.39792433069503"
#> 
#> $`flowCore_$P5Rmin`
#> [1] "-2.25712180137634"
#> 
#> $`$P5D`
#> [1] "Logarithmic,4,0.1"
#> 
#> $`$P5CALIBRATION`
#> [1] "1,log4"
#> 
#> $`$P5G`
#> [1] "1"
#> 
#> $`$P5F`
#> [1] "655-730 nm"
#> 
#> $`$P5L`
#> [1] "488nm"
#> 
#> $`$P5O`
#> [1] "30mW"
#> 
#> $`$P5V`
#> [1] "518"
#> 
#> $`$P5T`
#> [1] "-20"

(ff6 = ff5[,3:4])
#> flowFrame object 'ff0.fcs'
#> with 10 cells and 2 observables:
#>     name desc    range  minRange  maxRange
#> $P3 FL-3 FL-3 1.211223 -2.368423 0.2112230
#> $P4 FL-4 FL-4 1.606109 -2.563638 0.6061092
#> 37 keywords are stored in the 'description' slot
setdiff(names(keyword(ff6)), names(keyword(ff5)))
#> character(0)
setdiff(names(keyword(ff5)), names(keyword(ff6)))
#>  [1] "$P1B" "$P1E" "$P1N" "$P1R" "$P1S" "$P2B" "$P2E" "$P2N" "$P2R" "$P2S"
#> [11] "$P5B" "$P5E" "$P5N" "$P5R" "$P5S" "$P5D" "$P5G" "$P5F" "$P5L" "$P5O"
#> [21] "$P5V" "$P5T"

keyword(ff6)[grep("$P5", names(keyword(ff6)), fixed = TRUE)]
#> $`flowCore_$P5Rmax`
#> [1] "1.39792433069503"
#> 
#> $`flowCore_$P5Rmin`
#> [1] "-2.25712180137634"
#> 
#> $`$P5CALIBRATION`
#> [1] "1,log4"

# some keywords have not been removed

# Write
write.FCS(ff6, "ff6.fcs", delimiter = "/")
#> [1] "ff6.fcs"

(ff66 = read.FCS("ff6.fcs"))
#> flowFrame object 'ff0.fcs'
#> with 10 cells and 2 observables:
#>     name desc    range  minRange  maxRange
#> $P1 FL-3 FL-3 1.211223 -2.368423 0.5090667
#> $P2 FL-4 FL-4 1.606109 -2.563638 1.8893867
#> 38 keywords are stored in the 'description' slot

keyword(ff66)[grep("$P5", names(keyword(ff66)), fixed = TRUE)]
#> $`$P5CALIBRATION`
#> [1] "1,log4"
#> 
#> $`flowCore_$P5Rmax`
#> [1] "1.39792433069503"
#> 
#> $`flowCore_$P5Rmin`
#> [1] "-2.25712180137634"

# Not removed when writing file

# Back to current dir
setwd(cur.dir)
packageVersion("flowCore")
#> [1] '1.47.7'

Created on 2018-09-01 by the reprex package (v0.2.0).

Expected behavior No reference to the removed channels.

mikejiang commented 6 years ago

similar to #127 , many keywords (e.g. flowCore_$P here) are not standard and thus we don't want to deal with it unless it is really critical.

SamGG commented 6 years ago

I think the flowCore_$P keywords should be in agreement with the data. Naively, if these keywords are not really useful, why not discarding them before writing?