Open yctuga opened 3 years ago
Q1: shell script titled as file hw2
output
387828 pairs aligned concordantly 0 times; of these:
4888 (1.26%) aligned discordantly 1 time
----
382940 pairs aligned 0 times concordantly or discordantly; of these:
765880 mates make up the pairs; of these:
522696 (68.25%) aligned 0 times
162176 (21.18%) aligned exactly 1 time
81008 (10.58%) aligned >1 times
95.74% overall alignment rate
Q2:
$ cat PE_2_hg38.sam |grep -v ^@ |awk 'BEGIN{FS="\t"}{print $1, $2%16}' |uniq |awk '{print $2}' |sort |uniq -c
output
13194 1 148062 13 5753427 3 226572 5 226572 9
based on the flag information, 11759814 reads are mapped 131942 + 57534272 + 226572 = 11759814 522696 reads are not mapped 148062*2 + 226572 = 522696
count split-mapped reads $ cat PE_2_hg38.sam |grep -v ^@ |awk 'BEGIN{FS="\t";OFS="\t"}{if ($6 ~ /[0-9]*N/){print $1}}' |uniq |sort |uniq |wc -l
1740403 are split-mapped reads
!/bin/bash
SBATCH --job-name=hw2_RNAseqMapping
SBATCH --partition=batch
SBATCH --ntasks=1
SBATCH --cpus-per-task=4
SBATCH --time=24:00:00
SBATCH --mem=20G
SBATCH --output=log.%j
SBATCH --mail-user=yct@uga.edu
SBATCH --mail-type=ALL
define directory
data='/home/yct/BINF8211/HW2/data' result='/home/yct/BINF8211/HW2/result' hisat_source='/home/yct/BINF8211/HW2/source'
module load
module load HISAT2/2.1.0-foss-2019b module load SAMtools/1.10-GCC-8.3.0
build index
hisat2-build -p 16 $hisat_souce/hg38.fa $hisat_source/hg38
HISAT2
hisat2 -p 4 -x $hisat_source/hg38 \ -1 $data/PE1.fastq.gz \ -2 $data/PE2.fastq.gz \ -S $result/PE_2_hg38.sam \ --rna-strandness RF -q
sort bam file
samtools view -bS $restult/PE_2_hg38.sam > $result/PE_2_hg38.bam
'
samtools sort $result/PE_2_hg38.bam $result/PE_2hg38_sorted
mkdir $result/tmp
samtools sort -T $result/tmp/ -o $result/PE_2_hg38_sorted.bam $result/PE_2_hg38.bam