from distributed import Client, LocalCluster
from arboreto.utils import load_tf_names
from arboreto.algo import grnboost2
if name == 'main':
in_file = '1.1_exprMatrix_filtered_t.txt'
tf_file = '1.1_inputTFs.txt'
out_file = 'grn_output.tsv'
# ex_matrix is a DataFrame with gene names as column names
ex_matrix = pd.read_csv(in_file, sep='\t')
# tf_names is read using a utility function included in Arboreto
tf_names = load_tf_names(tf_file)
# instantiate a custom Dask distributed Client
client = Client(LocalCluster())
# compute the GRN
network = grnboost2(expression_data=ex_matrix,
tf_names=tf_names,
client_or_address=client)
# write the GRN to file
network.to_csv(out_file, sep='\t', index=False, header=False
the sh file is:
!/bin/bash
SBATCH --job-name=GRNBoost2
SBATCH -N 1
SBATCH --cpus-per-task=4
SBATCH --mail-user=meo104@pitt.edu
SBATCH --mail-type=FAIL
SBATCH --mem=32g
SBATCH -t 3-00:00 # Runtime in D-HH:MM
SBATCH --output=GRNBoost2.out
load modules
module load arboreto/0.1.5
python grn.py
however GRNBoost doesn't run after this warning:
FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
expression_matrix = expression_data.as_matrix()
I'm trying to run GRNboost as a batch job.
my grn.py file is:
import pandas as pd
from distributed import Client, LocalCluster from arboreto.utils import load_tf_names from arboreto.algo import grnboost2
if name == 'main':
the sh file is:
!/bin/bash
SBATCH --job-name=GRNBoost2
SBATCH -N 1
SBATCH --cpus-per-task=4
SBATCH --mail-user=meo104@pitt.edu
SBATCH --mail-type=FAIL
SBATCH --mem=32g
SBATCH -t 3-00:00 # Runtime in D-HH:MM
SBATCH --output=GRNBoost2.out
load modules
module load arboreto/0.1.5
python grn.py
however GRNBoost doesn't run after this warning: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead. expression_matrix = expression_data.as_matrix()
How can I fix this?