QIFEIDKN / STAGATE_Tutorials

4 stars 9 forks source link

Erro with T8_Batch.ipynb #2

Open uuup111 opened 1 year ago

uuup111 commented 1 year ago

Hello, When I run the T8_Batch.ipynb, I found a problem as following:

` After flitering: (20139, 11750) adata = adata[:, adata.var[highly_variable]]: (20139, 3000)

STAGATE_slideseqv2mob_batch.py:73: ImplicitModificationWarning: Trying to modify attribute .obs of view, initializing view as actual. adata.obs['X'] = adata.obsm['spatial'][:, 0] ------Calculating spatial graph... Traceback (most recent call last): File "STAGATE_slideseqv2mob_batch.py", line 84, in STAGATE_pyG.Cal_Spatial_Net(temp_adata, rad_cutoff=50) File "/hwfssz5/ST_BIOINTEL/P20Z10200N0039/06.groups/01.Bio_info_algorithm/renyating/software/python/lib/python3.8/site-packages/STAGATE_pyG-1.0.0-py3.8.egg/STAGATE_pyG/utils.py", line 82, in Cal_Spatial_Net nbrs = sklearn.neighbors.NearestNeighbors(radius=rad_cutoff).fit(coor) File "/hwfssz5/ST_BIOINTEL/P20Z10200N0039/06.groups/01.Bio_info_algorithm/renyating/software/python/lib/python3.8/site-packages/sklearn/neighbors/_unsupervised.py", line 175, in fit return self._fit(X) File "/hwfssz5/ST_BIOINTEL/P20Z10200N0039/06.groups/01.Bio_info_algorithm/renyating/software/python/lib/python3.8/site-packages/sklearn/neighbors/_base.py", line 444, in _fit X = self._validate_data(X, accept_sparse="csr", order="C") File "/hwfssz5/ST_BIOINTEL/P20Z10200N0039/06.groups/01.Bio_info_algorithm/renyating/software/python/lib/python3.8/site-packages/sklearn/base.py", line 577, in _validate_data X = check_array(X, input_name="X", **check_params) File "/hwfssz5/ST_BIOINTEL/P20Z10200N0039/06.groups/01.Bio_info_algorithm/renyating/software/python/lib/python3.8/site-packages/sklearn/utils/validation.py", line 909, in check_array raise ValueError( ValueError: Found array with 0 sample(s) (shape=(0, 2)) while a minimum of 1 is required by NearestNeighbors. `

uuup111 commented 1 year ago

Here is my code:

` import os import torch from tqdm import tqdm os.environ['CUDA_VISIBLE_DEVICES'] = '3' import numpy as np import scanpy as sc import matplotlib.pyplot as plt import STAGATE_pyG import pandas as pd import torch.nn.functional as F

def get_slideseq_v2_adata(file_fold): import scanpy as sc

data file

counts_file = file_fold + 'Puck_200127_15.digital_expression.txt'
coor_file = file_fold + 'Puck_200127_15_bead_locations.csv'
used_barcode_file = file_fold + 'used_barcodes.txt'

# data
counts = pd.read_csv(counts_file, sep='\t', index_col=0)
coor_df = pd.read_csv(coor_file, index_col=3)
coor_df.drop(coor_df.columns[coor_df.columns.str.contains('unnamed', case=False)], axis=1, inplace=True)
adata = sc.AnnData(counts.T)
adata.var_names_make_unique()
coor_df = coor_df.loc[adata.obs_names, ['xcoord', 'ycoord']]
adata.obsm["spatial"] = coor_df.to_numpy()
print('adata.shape:')
print(adata.shape)

sc.pp.calculate_qc_metrics(adata, inplace=True)
used_barcode = pd.read_csv(used_barcode_file, sep='\t', header=None)
used_barcode = used_barcode[0]
adata = adata[used_barcode,]
print('used_barcode shape:')
print(adata.shape)
return adata

data_name = 'slideseqv2_MOB' data_path = 'data_cell_clustering/slideSeqV2_MOB/' res_dir = 'res/' + data_name + '/' adata = get_slideseq_v2_adata(data_path) adata.var_names_make_unique()

sc.pp.filter_genes(adata, min_cells=50) print('After flitering: ', adata.shape)

Normalization

sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=3000) sc.pp.normalize_total(adata, target_sum=1e4) sc.pp.log1p(adata)

nClusters = 20 adata = adata[:, adata.var['highly_variable']] print('adata = adata[:, adata.var[highly_variable]]:') print(adata.shape) adata.obs['X'] = adata.obsm['spatial'][:, 0] adata.obs['Y'] = adata.obsm['spatial'][:, 1]

grid setting

num_batch_x = 3 num_batch_y = 2 Batch_list = STAGATE_pyG.Batch_Data(adata, num_batch_x=num_batch_x, num_batch_y=num_batch_y, spatial_key=['X', 'Y'], plot_Stats=True)

Consturcting network for each batch

for temp_adata in Batch_list: STAGATE_pyG.Cal_Spatial_Net(temp_adata, rad_cutoff=50) STAGATE_pyG.Stats_Spatial_Net(temp_adata)

data_list = [STAGATE_pyG.Transfer_pytorch_Data(adata) for adata in Batch_list] for temp in data_list: temp.cuda() STAGATE_pyG.Cal_Spatial_Net(adata, rad_cutoff=50) data = STAGATE_pyG.Transfer_pytorch_Data(adata)

from torch_geometric.loader import DataLoader

batch_size=1 or 2

loader = DataLoader(data_list, batch_size=1, shuffle=True)

hyper-parameters

num_epoch = 1000 lr=0.001 weight_decay=1e-4 hidden_dims = [512, 30] model = STAGATE_pyG.STAGATE(hidden_dims = [data_list[0].x.shape[1]]+hidden_dims).cuda()

optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) for epoch in tqdm(range(1, num_epoch+1)): for batch in loader: model.train() optimizer.zero_grad() z, out = model(batch.x, batch.edge_index) loss = F.mse_loss(batch.x, out) #F.nll_loss(out[data.train_mask], data.y[data.train_mask]) loss.backward() torch.nn.utils.clip_gradnorm(model.parameters(), 5.) optimizer.step()

The total network

data.cuda() data(x=[20139, 3000], edge_index=[2, 248439]) model.eval() z, out = model(data.x, data.edge_index)

STAGATE_rep = z.to('cpu').detach().numpy() adata.obsm['STAGATE'] = STAGATE_rep

sc.pp.neighbors(adata, use_rep='STAGATE') sc.tl.umap(adata) sc.tl.louvain(adata, resolution=0.5) plt.rcParams["figure.figsize"] = (3, 3) sc.pl.embedding(adata, basis="spatial", color="louvain",s=6, show=False)#, legend_loc=False) plt.title('') plt.axis('off') plt.savefig(res_dir + data_name +'_STAGATE.pdf')

preds = np.array(adata.obs['louvain'],dtype=int) np.savetxt(res_dir+data_name+'_STAGATE_pred_types.txt', preds, fmt='%d',delimiter='\t') `