Open Sum02dean opened 2 years ago
Note that you will need to use a new python env to use this as there is a conflict between PyBigWig and Bambi numpy versions. I recommend saving the data to csv as such:
SAVE_DATA = True
def spearman_scoring(y, y_true):
return scipy.stats.spearmanr(y, y_true)[0]
def to_pandas(x):
return pd.DataFrame(x.numpy())
# Get genes
train_genes, _ = chromosome_splits(cell_line=1, test_size=0.1)
_, test_genes = chromosome_splits(cell_line=2, test_size=0.1)
n_genes_train, _ = np.shape(train_genes)
n_genes_test, _ = np.shape(test_genes)
# Load train data
train_dataloader = torch.utils.data.DataLoader(
HistoneDataset(train_genes), shuffle=True, batch_size=n_genes_train)
# Load test data
test_dataloader = torch.utils.data.DataLoader(
HistoneDataset(test_genes), shuffle=False, batch_size=n_genes_test)
# Run train loader
(x_train, y_train) = next(iter(train_dataloader))
_, n_features, n_bins = x_train.shape
x_train = x_train.reshape(n_genes_train, n_features * n_bins)
# Run test loader
(x_test, y_test) = next(iter(test_dataloader))
n_genes_test, _, _ = x_test.shape
x_test = x_test.reshape(n_genes_test, n_features * n_bins)
# Save csv
if SAVE_DATA:
to_pandas(x_train).to_csv('x_train.csv')
to_pandas(y_train).to_csv('y_train.csv')
to_pandas(x_test).to_csv('x_test.csv')
to_pandas(y_test).to_csv('y_test.csv')
print("saving complete")
Then you can load it into the bami_model.py file in utils.
build Bayesian regressor using pymc3