ytchen0323 / cloud-scale-bwamem

Apache License 2.0
15 stars 9 forks source link

Why output so many adam AlignmentRecord? #13

Open xubo245 opened 7 years ago

xubo245 commented 7 years ago

align 10000000 single-end reads to grch38 chr1, and result is 99592633 AlignmentRecord...

Should it output about 10000000 AlignmentRecord? Why 99592633 AlignmentRecord?

input:

BWAMEMCommand(fastaInputPath=/home/hadoop/disk2/xubo/ref/GRCH38L1Index/GRCH38chr1L3556522.fasta, fastqHDFSInputPath=/xubo/project/alignment/cs-bwamem/input/fastq/newg38L50c10000000Nhs20SingleP64bn200000000.fastq, isPairEnd=false, fastqInputFolderNum=1, batchedFolderNum=1, isPSWBatched=true, subBatchSize=10, isPSWJNI=false, jniLibPath=/home/hadoop/disk2/xubo/tools/cloud-scale-bwamem-0.2.2/target/jniNative.so, outputChoice=2, outputPath=/xubo/project/alignment/cs-bwamem/input/fastq/newg38L50c10000000Nhs20SingleP64bn200000000t10k1.adam, localRef=1, headerLine=@RG ID:foo SM:bar, isSWExtBatched=true, swExtBatchSize=1024, isFPGAAccSWExtend=false, fpgaSWExtThreshold=128, jniSWExtendLibPath=./target/jniSWExtend.so)

take(10):


{"readNum": 0, "contig": {"contigName": "chr1", "contigLength": 248956422, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 113456428, "oldPosition": null, "end": 113456478, "mapq": 60, "readName": "chr1_113456059_113456478_0:0:0_1:0:0_0", "sequence": "AGCTATCCAGTTCATTTTATTTCAATCTCAAATTTGATAAAGTACCAATA", "qual": "22222222222222222222222222222222222222222222222222", "cigar": "50M", "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": true, "mateNegativeStrand": false, "primaryAlignment": true, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": "48A1", "origQual": null, "attributes": "NM:i:1\tAS:i:48\tXS:i:19\tRG:Z:foo", "recordGroupName": "foo", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": "bar", "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": {"contigName": "chr1", "contigLength": 248956422, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 147866406, "oldPosition": null, "end": 147866456, "mapq": 16, "readName": "chr1_147866015_147866456_1:0:0_3:0:0_1", "sequence": "ATGGTACAGAACTGAGGCCTCAGAAATAACACCTCACATCTGCAACTGTC", "qual": "22222222222222222222222222222222222222222222222222", "cigar": "50M", "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": true, "mateNegativeStrand": false, "primaryAlignment": true, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": "4A7A20A16", "origQual": null, "attributes": "NM:i:3\tAS:i:35\tXS:i:31\tRG:Z:foo", "recordGroupName": "foo", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": "bar", "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": {"contigName": "chr1", "contigLength": 248956422, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 93466506, "oldPosition": null, "end": 93466552, "mapq": 0, "readName": "chr1_147866015_147866456_1:0:0_3:0:0_1", "sequence": "GTTGCAGATGTGAGGTGTTATTTCTGAGGCCTCAGTTCTGTACCAT", "qual": "2222222222222222222222222222222222222222222222", "cigar": "4H46M", "oldCigar": null, "basesTrimmedFromStart": 4, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": true, "supplementaryAlignment": false, "mismatchingPositions": "12T20T7T4", "origQual": null, "attributes": "NM:i:3\tAS:i:31\tRG:Z:foo", "recordGroupName": "foo", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": "bar", "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": {"contigName": "chr1", "contigLength": 248956422, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 37623647, "oldPosition": null, "end": 37623697, "mapq": 60, "readName": "chr1_37623648_37624206_0:0:0_2:0:0_2", "sequence": "AGGCAGGATCCCTTGAGCCTAGGAGTTGCAGACCAGCTTGGGCAACATAG", "qual": "22222222222222222222222222222222222222222222222222", "cigar": "50M", "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": true, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": "50", "origQual": null, "attributes": "NM:i:0\tAS:i:50\tXS:i:0\tRG:Z:foo", "recordGroupName": "foo", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": "bar", "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": {"contigName": "chr1", "contigLength": 248956422, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 102868139, "oldPosition": null, "end": 102868189, "mapq": 60, "readName": "chr1_102867607_102868189_2:0:0_0:0:0_3", "sequence": "ATATGGCTGTGGAGGCCTCACAATCATGGTGGAAAACAAAGTGGGAGCAA", "qual": "22222222222222222222222222222222222222222222222222", "cigar": "50M", "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": true, "mateNegativeStrand": false, "primaryAlignment": true, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": "50", "origQual": null, "attributes": "NM:i:0\tAS:i:50\tXS:i:32\tRG:Z:foo", "recordGroupName": "foo", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": "bar", "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": {"contigName": "chr1", "contigLength": 248956422, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 29956235, "oldPosition": null, "end": 29956269, "mapq": 0, "readName": "chr1_102867607_102868189_2:0:0_0:0:0_3", "sequence": "TTCCACCATGATTGTGAGGCCTCCACAGCCATAT", "qual": "2222222222222222222222222222222222", "cigar": "16H34M", "oldCigar": null, "basesTrimmedFromStart": 16, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": true, "supplementaryAlignment": false, "mismatchingPositions": "32G1", "origQual": null, "attributes": "NM:i:1\tAS:i:32\tRG:Z:foo", "recordGroupName": "foo", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": "bar", "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": {"contigName": "chr1", "contigLength": 248956422, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 149368766, "oldPosition": null, "end": 149368816, "mapq": 0, "readName": "chr1_146252067_146252539_1:0:0_0:0:0_4", "sequence": "AGAGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTGGCGTGATCTTGGCT", "qual": "22222222222222222222222222222222222222222222222222", "cigar": "50M", "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": true, "secondaryAlignment": false, "supplementaryAlignment": false, "mismatchingPositions": "50", "origQual": null, "attributes": "NM:i:0\tAS:i:50\tXS:i:50\tRG:Z:foo", "recordGroupName": "foo", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": "bar", "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": {"contigName": "chr1", "contigLength": 248956422, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 146252489, "oldPosition": null, "end": 146252539, "mapq": 0, "readName": "chr1_146252067_146252539_1:0:0_0:0:0_4", "sequence": "AGCCAAGATCACGCCACTGCACTCCAGCCTGGGCAACAGAGCAAGACTCT", "qual": "22222222222222222222222222222222222222222222222222", "cigar": "50M", "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": true, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": true, "supplementaryAlignment": false, "mismatchingPositions": "50", "origQual": null, "attributes": "NM:i:0\tAS:i:50\tRG:Z:foo", "recordGroupName": "foo", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": "bar", "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": {"contigName": "chr1", "contigLength": 248956422, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 43631407, "oldPosition": null, "end": 43631457, "mapq": 0, "readName": "chr1_146252067_146252539_1:0:0_0:0:0_4", "sequence": "AGAGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTGGCGTGATCTTGGCT", "qual": "22222222222222222222222222222222222222222222222222", "cigar": "50M", "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": false, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": true, "supplementaryAlignment": false, "mismatchingPositions": "50", "origQual": null, "attributes": "NM:i:0\tAS:i:50\tRG:Z:foo", "recordGroupName": "foo", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": "bar", "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
{"readNum": 0, "contig": {"contigName": "chr1", "contigLength": 248956422, "contigMD5": null, "referenceURL": null, "assembly": null, "species": null, "referenceIndex": 0}, "start": 148703234, "oldPosition": null, "end": 148703284, "mapq": 0, "readName": "chr1_146252067_146252539_1:0:0_0:0:0_4", "sequence": "AGCCAAGATCACGCCACTGCACTCCAGCCTGGGCAACAGAGCAAGACTCT", "qual": "22222222222222222222222222222222222222222222222222", "cigar": "50M", "oldCigar": null, "basesTrimmedFromStart": 0, "basesTrimmedFromEnd": 0, "readPaired": false, "properPair": false, "readMapped": true, "mateMapped": false, "failedVendorQualityChecks": false, "duplicateRead": false, "readNegativeStrand": true, "mateNegativeStrand": false, "primaryAlignment": false, "secondaryAlignment": true, "supplementaryAlignment": false, "mismatchingPositions": "50", "origQual": null, "attributes": "NM:i:0\tAS:i:50\tRG:Z:foo", "recordGroupName": "foo", "recordGroupSequencingCenter": null, "recordGroupDescription": null, "recordGroupRunDateEpoch": null, "recordGroupFlowOrder": null, "recordGroupKeySequence": null, "recordGroupLibrary": null, "recordGroupPredictedMedianInsertSize": null, "recordGroupPlatform": null, "recordGroupPlatformUnit": null, "recordGroupSample": "bar", "mateAlignmentStart": null, "mateAlignmentEnd": null, "mateContig": null, "inferredInsertSize": null}
xubo245 commented 7 years ago

Is it not filter?

read chr1_146252067_146252539_1 has many AlignmentRecord...