s4hts / HTStream

A high throughput sequence read toolset using a streaming approach facilitated by Linux pipes
https://s4hts.github.io/HTStream/
Apache License 2.0
49 stars 9 forks source link

Is "percentage-hits" calculated properly for SeqScreener? #237

Closed samhunter closed 4 years ago

samhunter commented 4 years ago

For example, when screening for PhiX, the percentage-hits is reported as 0.25, but there are only 15 hits/ 5000000 reads (0.0003%).

"Program_details": {
    "program": "hts_SeqScreener",
    "version": "v1.3.2",
    "options": {
        "append-stats-file": "01-HTS_Preproc/mouse_110_WT_C/mouse_110_WT_C.json",
        "check-read-2": false,
        "force": false,
        "inverse": false,
        "kmer": 12,
        "notes": "screen phix",
        "percentage-hits": 0.25,
        "record": false,
        "uncompressed": false
    },
    "screen_info": {
        "screenBP": 5386,
        "lookupKmers": 5372
    }
},
"Fragment": {
    "in": 5000000,
    "out": 4999985,
    "basepairs_in": 1010000000,
    "basepairs_out": 1009996970
},
"Single_end": {
    "in": 0,
    "out": 0,
    "basepairs_in": 0,
    "basepairs_out": 0,
    "hits": 0
},
"Paired_end": {
    "in": 5000000,
    "out": 4999985,
    "hits": 15,
    "Read1": {
        "basepairs_in": 505000000,
        "basepairs_out": 504998485
    },
    "Read2": {
        "basepairs_in": 505000000,
        "basepairs_out": 504998485
    }
}

}

samhunter commented 4 years ago

Never mind.. that is in the "options": {} section...