git clone https://github.com/guanjue/IDEAS_2018.git
https://www.gnu.org/software/gsl/manual/gsl-ref.html
http://hgdownload.soe.ucsc.edu/admin/exe/
run_IDEAS.parafile
>>> head -100 run_IDEAS.parafile
id= test_IDEAS #job id, also used as output file names
email= giardine@bx.psu.edu
thread= 32 #number of threads to be used for parallelization
prepmat= 0 #1: preprocess data, 0: for data already processed for ideas
build= mm10 #hg19, hg38, mm9, mm10, not used if bedfile is specified
prenorm= 0 #1: normalize data (assumed 100Million reads in total), 0: do not normalize
bed= mm10.noblack_list.bin #user specified windows
sig= mean #mean: mean signal per window, max: max signal per window
ideas= 1 #1: run ideas, 0: not run ideas
train= 50 #number of random starts, used to select states, 0: no training
trainsz= 500000
log2= 0 #take log2(x+num), 0: do not take log2
cap= 16 #maximum signal is capped at 16
norm= 0 #1: standardize by mean and std, 0: no normalization
num_state= 0 #specify number of states for the model, 0: let program determine
num_start= 100 #specify number of states at the initialization stage
minerr= 0.5 #minimum standard deviation in each state, usually between (0,1]
#otherpara= /gpfs/group/yzz2/default/scratch/roadmap_analysis/impute/bin_12mark_1e-4.para0
smooth= 0 #make states more homogeneous along genome, 0: original ideas
burnin= 20 #number of burnins, include both sampling and maximization
sample= 5 #number of steps for maximization, 1 may be fine
#split= mm10.noblack_list.bin.inv #specify an interval file, ideas will run on different intervals separately. The name of interval file is $bed'.inv'
impute= None #specify which marks to be imputed; or All or None
maketrack= 1 #1: make custom tracks for browser visual, 0: no tracks
#statefiles= /storage/home/gzx103/scratch/gtex_encode/bams/entex_data_output_0_16lim_ideas_01/ideas_state_filelist.txt #only needed if ideas was not run; separate file names by ","
#hubURL= "http://bx.psu.edu/~yuzhang/tmp/" #URL where the custom tracks will be stored
#mycolor= 255,0,0;255,255,0;0,255,0;0,0,255;50,50,50 #rgb color for each mark, semicolon delimited
#statecolor= /gpfs/group/yzz2/default/scratch/roadmap_analysis/impute/statecolort.txt #rgb color of each state
#statename= statename.txt #state names
#cellinfo= cellinfo.txt #cell type information, order of cell types will be the same in browser, 4 columns: cell type id as shown in state files, cell type short label to be shown in browser, cell type long label, cell type text color
thread= 32 #number of threads to be used for parallelization
build= mm10 #hg19, hg38, mm9, mm10, not used if bedfile is specified
bed= mm10.noblack_list.bin #user specified windows
split= mm10.noblack_list.bin.inv #specify an interval file, ideas will run on different intervals separately
cap= 16 #maximum signal is capped at 16
impute= None #specify which marks to be imputed; or All or None; If user wants to keep the imputed signal, set it as 'All'
mm10.noblack_list.bin
>>> head mm10.noblack_list.bin
chr1 0 200 R1
chr1 200 400 R2
chr1 400 600 R3
chr1 600 800 R4
chr1 800 1000 R5
chr1 1000 1200 R6
chr1 1200 1400 R7
chr1 1400 1600 R8
chr1 1600 1800 R9
chr1 1800 2000 R10
......
run_IDEAS.input
>>> head run_IDEAS.input
ERY_ad atac /storage/home/gzx103/group/software/IDEAS/IDEAS_2018/test_data/run_IDEAS_input/ERY_ad.atac.1M.txt
MEP atac /storage/home/gzx103/group/software/IDEAS/IDEAS_2018/test_data/run_IDEAS_input/MEP.atac.1M.txt
ERY_ad h3k27ac /storage/home/gzx103/group/software/IDEAS/IDEAS_2018/test_data/run_IDEAS_input/ERY_ad.h3k27ac.1M.txt
MEP h3k27ac /storage/home/gzx103/group/software/IDEAS/IDEAS_2018/test_data/run_IDEAS_input/MEP.h3k27ac.1M.txt
......
cell_mark=NK_atac
bigWigAverageOverBed $cell_mark'.bw' whole_genome_bin.bed $cell_mark'.bw.tab'
sort -k1,1 $cell_mark'.bw.tab' | cut -f5 > $cell_mark'.bw.tab.sig'
>>> head /storage/home/gzx103/group/software/IDEAS/IDEAS_2018/test_data/run_IDEAS_input/ERY_ad.atac.1M.txt
0
0
0
0
0
......
cp ~/group/software/IDEAS/IDEAS_2018/run_IDEAS.sh working_dir/
cp ~/group/software/IDEAS/IDEAS_2018/run_IDEAS.parafile working_dir/
cp ~/group/software/IDEAS/IDEAS_2018/run_IDEAS.input working_dir/
<<<<<<< HEAD
=======
# also make sure all the paths in the "run_IDEAS.input" are absolute path.
>>>>>>> 01138efd09dcd5db00916be265069eba00889ffe
run_IDEAS.input
>>> head run_IDEAS.input
ERY_ad atac /storage/home/gzx103/group/software/IDEAS/IDEAS_2018/test_data/run_IDEAS_input/ERY_ad.atac.1M.txt
MEP atac /storage/home/gzx103/group/software/IDEAS/IDEAS_2018/test_data/run_IDEAS_input/MEP.atac.1M.txt
ERY_ad h3k27ac /storage/home/gzx103/group/software/IDEAS/IDEAS_2018/test_data/run_IDEAS_input/ERY_ad.h3k27ac.1M.txt
MEP h3k27ac /storage/home/gzx103/group/software/IDEAS/IDEAS_2018/test_data/run_IDEAS_input/MEP.h3k27ac.1M.txt
......
>>> head -100 run_IDEAS.sh
###### run IDEAS
######
### cp script in the directory
IDEAS_job_name=run_IDEAS
script_dir=/storage/home/gzx103/group/software/IDEAS/IDEAS_2018/
output_dir=/storage/home/gzx103/group/software/IDEAS/IDEAS_2018/test_data/run_IDEAS_result/
<<<<<<< HEAD
binfile=mm10.noblack_list.bin
=======
binfile=mm10.noblack_list.bin #(Absolute path is required if file isn't under the working direactory)
>>>>>>> 01138efd09dcd5db00916be265069eba00889ffe
### make output directory
mkdir -p $output_dir
### cp scripts to the working directory
cp -r $script_dir'bin' ./
cp -r $script_dir'data' ./
### get genome inv file
time python $script_dir'bin/bed2inv.py' -i $binfile -o $binfile'.inv'
### run IDEAS
time Rscript bin/runme.R run_IDEAS.input run_IDEAS.parafile $output_dir
### rm tmp files
rm $output_dir*tmp*
### get heatmap
time Rscript bin/get_heatmap.R $output_dir$IDEAS_job_name'.para0' FALSE ./bin/createGenomeTracks.R
thread= 32 #number of threads to be used for parallelization
build= mm10 #hg19, hg38, mm9, mm10, not used if bedfile is specified
bed= mm10.noblack_list.bin #user specified windows. (Absolute path is required if file isn't under the working direactory)
time bash run_IDEAS.sh
output_dir=/storage/home/gzx103/group/software/IDEAS/IDEAS_2018/test_data/run_IDEAS_result/
track_dir=/storage/home/gzx103/group/software/IDEAS/IDEAS_2018/test_data/run_IDEAS_result/Tracks/