library(withr)
setRepositories(ind=1:3)
devtools::install_github("Durenlab/RegNMF",ref="main")
Check these path that will be used in our program.
#Using "which" to check the path
which macs2
which awk
#On linux you can use these commands to unzip
#Unzip Filtered feature barcode matrix MEX
tar -zxvf [XXX]filtered_feature_bc_matrix.tar.gz
#There will be a folder named "filtered_feature_bc_matrix/" contain "barcodes.tsv.gz", "matrix.mtx.gz", "features.tsv.gz". Unzip them
gunzip filtered_feature_bc_matrix/*.gz
#Unzip ATAC Per fragment information file
gunzip [XXX]atac_fragments.tsv.gz
We will use these data in our program.
detectCores()
function to check how many core you can use in R.You can use demo function for clustering.
See the function detail at ./man/demo.Rd
demo(in_foldername,out_foldername,fragment,macs2path,awktoolspath,core)
Ensure there are files named "matrix.mtx", "features.tsv", "barcodes.tsv" in the input folder.
callpeak()
in the fourth step requiers MACS2 for peak calling, so you may not able to run the fourth step if you have not installed MACS2.
Use "read_ATAC_GEX" for loading data.
element=read_ATAC_GEX(in_foldername)
Use "RegNMF" for cross-modalities dimension reduction; use "clustering" for subpopulation identification.
W123H=RegNMF(E=element$E,
O=element$O,
Symbol=element$Symbol,
PeakName=element$PeakName,
Symbol_location=element$Symbol_location,
Peak_location=element$Peak_location)
ans=clustering(W123H$H)
ans$plot is a figure of tsne.
Use "SplitGroup" to predict raw subpoplation-specific cis-regulatory networks. These networks could be further refined in the next step.
groupName=SplitGroup(foldername=out_foldername,
barcord=element$barcode[,1],
W3=W123H$W3,
H=W123H$H,
Reg_symbol_name=W123H$Reg_gene_name,
Reg_peak_name=W123H$Reg_peak_name,
cluster=ans$S[1,])
In this function we'll make a file shows pair of barcords and clusters and a folder contains predicted regulations in each clusters.
Use "callpeak" to call peak in each clusters and refine the subpopulation-specific cis-regulatory netyworks.
visual_need=callpeak(outfolder=out_foldername,
fragment=fragment,
barcord_cluster_whole=groupName["barcordFileName"],
oldRegFolder=groupName["RegFolderName"],
macs2path=macs2path,
awkpath=awkpath,
cluster=unique(ans$S[1,]),
clusterL=length(ans$S[1,]))
In this function we'll make three folders, which named "barcord_cluster", "peak_cluster"and "RE_cluster" , contain barcords, peaks, regulations infomation by each clusters.
Use "Visualization" to run interactive visualization function that takes the genomics region as input and plots the genes, peaks, and interactions in the given range. It includes the genes, the raw peaks from all cells (before clustering), peaks of each cluster from MACS2, and the predicted (refined) peak-gene association in each cluster . The result will be output as "result.pdf"
Visualization(wholef=in_foldername,
peakf=visual_need["peak_clusterF"],
regf=visual_need["RE_clusterF"],
chr=chr,
from=from,
to=to,
clusterlist=clusterlist,
width=width,
height=height)