#####################################################################################
(1) Prerequisites and S3norm installation
(3) How to run S3norm pipeline
(5) How to run specific steps in S3norm pipeline
#####################################################################################
cd /where_user_clone_the_S3norm_GitHub/
git clone https://github.com/guanjue/S3norm.git
###### For python dependencies, they can be installed by the following scripts
pip install --upgrade pip --user
pip install --upgrade numpy --user
pip install --upgrade scipy --user
###### Installing gwak
### Installing brew
/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
### For some MAC, the following script needs to be run before installing gawk by brew
sudo chown -R "$USER":admin $(brew --prefix)/*
### Installing gawk
brew install gawk
#####################################################################################
head file_list.txt
sig1.sorted.bedgraph sig1.ctrl.sorted.bedgraph
sig2.sorted.bedgraph sig2.ctrl.sorted.bedgraph
sig3.sorted.bedgraph sig3.ctrl.sorted.bedgraph
head sig1.UNsorted.bedgraph
chr8 65127400 65127600 77.25
chr21 40481600 40481800 72.84
chr17 19170200 19170400 63.21
chr14 32630400 32630600 51.64
chr6 118552200 118552400 129.82
chr13 93149400 93149600 295.07
chr10 117806400 117806600 142.97
chr19 1370200 1370400 223.43
chr14 28469600 28469800 167.98
chr2 181514400 181514600 220.7
sort -k1,1 -k2,2n sig1.UNsorted.bedgraph > sig1.sorted.bedgraph
sort -k1,1 -k2,2n sig2.UNsorted.bedgraph > sig2.sorted.bedgraph
sort -k1,1 -k2,2n sig3.UNsorted.bedgraph > sig3.sorted.bedgraph
sort -k1,1 -k2,2n sig1.ctrl.UNsorted.bedgraph > sig1.ctrl.sorted.bedgraph
sort -k1,1 -k2,2n sig2.ctrl.UNsorted.bedgraph > sig2.ctrl.sorted.bedgraph
sort -k1,1 -k2,2n sig3.ctrl.UNsorted.bedgraph > sig3.ctrl.sorted.bedgraph
###### The head of the bedgraph files after sorting.
###### The first three columns of the bedgraph files are exactly the same.
###### Only the 4th columns are different.
head sig1.sorted.bedgraph
chr1 7000 7200 0
chr1 18800 19000 0
chr1 62400 62600 5.02
chr1 63800 64000 188.21
chr1 95600 95800 16.41
chr1 136000 136200 0
chr1 156000 156200 0
chr1 158800 159000 0
chr1 206400 206600 51.87
chr1 217000 217200 0
head sig2.sorted.bedgraph
chr1 7000 7200 0
chr1 18800 19000 0
chr1 62400 62600 0
chr1 63800 64000 2.66
chr1 95600 95800 0
chr1 136000 136200 50.26
chr1 156000 156200 0
chr1 158800 159000 0
chr1 206400 206600 0
chr1 217000 217200 0
head sig3.sorted.bedgraph
chr1 7000 7200 0
chr1 18800 19000 0
chr1 62400 62600 0
chr1 63800 64000 0
chr1 95600 95800 0
chr1 136000 136200 0
chr1 156000 156200 0
chr1 158800 159000 0
chr1 206400 206600 0
chr1 217000 217200 0
#####################################################################################
### Setting script directory
script_directory='/where_user_clone_the_S3norm_GitHub/S3norm/'
### Setting working directory
working_directory='/where_user_clone_the_S3norm_GitHub/S3norm/example_file/'
### Entering working directory
cd $working_directory
### Run S3norm
time python $script_directory'/src/s3norm_pipeline.py' -s $script_directory'/src/' -t file_list.txt
bash run_pipeline.sh
ls -ltrh example_file/
total 91464
-rw-r--r-- 1 universe staff 2.8M Jul 29 00:46 sig1.UNsorted.bedgraph
-rw-r--r-- 1 universe staff 3.9M Jul 29 00:47 sig1.ctrl.UNsorted.bedgraph
-rw-r--r-- 1 universe staff 2.7M Jul 29 00:48 sig2.UNsorted.bedgraph
-rw-r--r-- 1 universe staff 3.9M Jul 29 00:48 sig2.ctrl.UNsorted.bedgraph
-rw-r--r-- 1 universe staff 2.7M Jul 29 00:49 sig3.UNsorted.bedgraph
-rw-r--r-- 1 universe staff 2.5M Jul 29 00:49 sig3.ctrl.UNsorted.bedgraph
-rw-r--r-- 1 universe staff 2.8M Jul 29 00:50 sig1.sorted.bedgraph
-rw-r--r-- 1 universe staff 2.7M Jul 29 00:50 sig2.sorted.bedgraph
-rw-r--r-- 1 universe staff 2.7M Jul 29 00:50 sig3.sorted.bedgraph
-rw-r--r-- 1 universe staff 3.9M Jul 29 00:50 sig1.ctrl.sorted.bedgraph
-rw-r--r-- 1 universe staff 3.9M Jul 29 00:50 sig2.ctrl.sorted.bedgraph
-rw-r--r-- 1 universe staff 2.5M Jul 29 00:51 sig3.ctrl.sorted.bedgraph
-rw-r--r-- 1 universe staff 141B Jul 29 00:51 file_list.txt
drwxr-xr-x 4 universe staff 136B Jul 29 00:52 average_ref_bedgraph
drwxr-xr-x 8 universe staff 272B Jul 29 00:52 S3norm_rc_bedgraph
drwxr-xr-x 8 universe staff 272B Jul 29 00:52 S3norm_NBP_bedgraph
drwxr-xr-x 5 universe staff 170B Jul 29 00:52 NBP_bedgraph
#####################################################################################
ls -ltrh S3norm_rc_bedgraph/
total 22176
-rw-r--r-- 1 universe staff 3.9M Jul 29 00:51 sig1.sorted.bedgraph.s3norm.bedgraph
-rw-r--r-- 1 universe staff 86B Jul 29 00:51 sig1.sorted.bedgraph.info.txt
-rw-r--r-- 1 universe staff 3.5M Jul 29 00:51 sig2.sorted.bedgraph.s3norm.bedgraph
-rw-r--r-- 1 universe staff 86B Jul 29 00:51 sig2.sorted.bedgraph.info.txt
-rw-r--r-- 1 universe staff 3.4M Jul 29 00:51 sig3.sorted.bedgraph.s3norm.bedgraph
-rw-r--r-- 1 universe staff 71B Jul 29 00:51 sig3.sorted.bedgraph.info.txt
ls -ltrh NBP_bedgraph
total 21480
-rw-r--r-- 1 universe staff 3.8M Jul 29 00:51 sig1.sorted.bedgraph.s3norm.NB.neglog10p.bedgraph
-rw-r--r-- 1 universe staff 3.4M Jul 29 00:52 sig2.sorted.bedgraph.s3norm.NB.neglog10p.bedgraph
-rw-r--r-- 1 universe staff 3.3M Jul 29 00:52 sig3.sorted.bedgraph.s3norm.NB.neglog10p.bedgraph
ls -ltrh S3norm_NBP_bedgraph/
total 22480
-rw-r--r-- 1 universe staff 4.0M Jul 29 00:52 sig1.sorted.bedgraph.NBP.s3norm.bedgraph
-rw-r--r-- 1 universe staff 71B Jul 29 00:52 sig1.sorted.bedgraph.NBP.info.txt
-rw-r--r-- 1 universe staff 3.6M Jul 29 00:52 sig2.sorted.bedgraph.NBP.s3norm.bedgraph
-rw-r--r-- 1 universe staff 86B Jul 29 00:52 sig2.sorted.bedgraph.NBP.info.txt
-rw-r--r-- 1 universe staff 3.4M Jul 29 00:52 sig3.sorted.bedgraph.NBP.s3norm.bedgraph
-rw-r--r-- 1 universe staff 86B Jul 29 00:52 sig3.sorted.bedgraph.NBP.info.txt
time python $script_directory'/src/s3norm_pipeline.py' -s $script_directory'/src/' -t file_list.txt -r median
ls -ltrh average_ref_bedgraph/
total 13296
-rw-r--r-- 1 universe staff 2.7M Jul 29 00:51 average_ref.bedgraph
-rw-r--r-- 1 universe staff 3.8M Jul 29 00:52 average_ref.bedgraph.NBP.bedgraph
#####################################################################################
script_directory='/Users/universe/Documents/2018_BG/S3norm/'
python $script_directory'/src/s3norm_pipeline.py' -s $script_directory'/src/' -t file_list.txt -r max1 -m non0mean -i 2.0 -f 0.05 -l 0.001 -a 100000 -b 0 -p z -k 0 -g 0
(1) -r : The method for generating the reference signal track. Options: max1 (default: select the dataset with the max FRiP score as reference), max1 (select the dataset with the highest FRiP score as reference), median (generate signal track by using the median signal of each bin), mean (generate signal track by using the mean signal of each bin)
(2) -m : The method for matching peaks and background. Options: non0mean (default), non0median, mean, median)
(2) -m : filelist_row_number (If user want to select one sample as the reference, use "-m filelist_row_number", where filelist_row_number is the row number (start from 1) of the reference sample in the file_list.txt)
(3) -i : The initial value for the power parameter in the non-linear transformation. Default: 2.0
(4) -f : The FDR threshold for identifying common peaks. Default: 0.05 . The range is 0.0 < x < 1.0
(5) -l : The minimum proportion of bins are used as peak for S3norm. Default: 0.001 . The range is 0.0 < x < 1.0
(6) -a : The upperlimit for signal. This is to reduce the bias cause by extrame signals in the data. Default: 100000
(7) -b : The lowerlimit for signal. S3norm requires all signal to be x >= 0 . Default: 0
(8) -p : The method used to identify common peaks. Options: z (Default) and neglog10p (negative log10 p-value from background model)
(9) -k : The user given common peak regions. Options: 0 (Default, the common peak will be identified by S3norm) or filename (a file points out which bins are the common peaks. The rows in this file match the rows in bedgraph files. It should contain only 1 column. If the row of a bin is a common peak, the column should be 1 for that row. Otherwise, it should be 0 )
(10) -g : The user given common background regions. Options: 0 (Default, the common background will be identified by S3norm) or filename (a file points out which bins are the common background. The rows in this file match the rows in bedgraph files. It should contain only 1 column. If the row of a bin is a common background, the column should be 1 for that row. Otherwise, it should be 0 )
(11) -c : Whether to use cross feature mode. (T: for use cross mark mode; F: for NOT use cross mark mode)
#####################################################################################
time python $script_directory'/src/s3norm.py' -r $working_directory'average_ref_bedgraph/average_ref.bedgraph' -t sig1.sort.bedgraph -o sig1.output
time python $script_directory'/src/s3norm.py' -r $working_directory'average_ref_bedgraph/average_ref.bedgraph' -t sig1.sorted.bedgraph -o sig1.runseparately.output -m non0mean -i 2.0 -f 0.05 -l 0.001 -a 100000 -b 0 -p z -k 0 -g 0 -c F
(1) -m : The method for matching peaks and background. Options: non0mean (default), non0median, mean, median)
(2) -i : The initial value for the power parameter in the non-linear transformation. Default: 2.0
(3) -f : The FDR threshold for identifying common peaks. Default: 0.05 . The range is 0.0 < x < 1.0
(4) -l : The minimum proportion of bins are used as peak for S3norm. Default: 0.001 . The range is 0.0 < x < 1.0
(5) -a : The upperlimit for signal. This is to reduce the bias cause by extrame signals in the data. Default: 100000
(6) -b : The lowerlimit for signal. S3norm requires all signal to be x >= 0 . Default: 0
(7) -p : The method used to identify common peaks. Options: z (Default) and neglog10p (negative log10 p-value from background model)
(8) -k : The user given common peak regions. Options: 0 (Default, the common peak will be identified by S3norm) or filename (a file points out which bins are the common peaks. The rows in this file match the rows in bedgraph files. It should contain only 1 column. If the row of a bin is a common peak, the column should be 1 for that row. Otherwise, it should be 0 )
(9) -g : The user given common background regions. Options: 0 (Default, the common background will be identified by S3norm) or filename (a file points out which bins are the common background. The rows in this file match the rows in bedgraph files. It should contain only 1 column. If the row of a bin is a common background, the column should be 1 for that row. Otherwise, it should be 0 )
(10) -c : Whether to use cross feature mode. (T: for use cross mark mode; F: for not use cross mark mode)
Rscript $script_directory'/src/negative_binomial_neglog10p.R' $working_directory'S3norm_rc_bedgraph/sig1.sorted.bedgraph.s3norm.bedgraph' sig1.ctrl.sorted.bedgraph sig1.sorted.bedgraph.s3norm.NB.neglog10p.bedgraph
#####################################################################################
Xiang, Guanjue, et al. "S3norm: simultaneous normalization of sequencing depth and signal-to-noise ratio in epigenomic data." bioRxiv (2018): 506634.