vanvalenlab / deepcell-tf

Deep Learning Library for Single Cell Analysis
https://deepcell.readthedocs.io
Other
425 stars 99 forks source link

QUERY : Segmentation Issue #698

Closed DattaSujayAyyagari closed 1 year ago

DattaSujayAyyagari commented 1 year ago

In the below code I am trying to segment a TIFF image converted to NPZ. The code runs without error but the segmentation output is not as desired. Screenshot of the final output attached for reference.

!/usr/bin/env python

coding: utf-8

In[1]:

import numpy as np import matplotlib.pyplot as plt import tifffile as tiff import os import pandas as pd

In[2]:

img = tiff.imread('A9D035F9-5A78-47A6-8C41-A1A35487CEF9_codex-ome copy.tif') img_array = np.array(img)

In[3]:

split_ratio = 0.5 # You can adjust this ratio based on your requirements split_index = int(len(img_array) * split_ratio) train_data1 = img_array[:split_index] test_data1 = img_array[split_index:]

In[4]:

Save the train and test data in separate NPZ files

np.savez('tissuenet_v1.0_train.npz', data=train_data1) np.savez('tissuenet_v1.0_test.npz', data=test_data1)

In[5]:

train_data = np.load('tissuenet_v1.0_train.npz') test_data = np.load('tissuenet_v1.0_test.npz')

In[6]:

print(train_data['data'].shape) print(test_data['data'].shape)

In[7]:

new_shape = (2601, 512, 512, 2) data_1 = train_data['data'] train_data_reshaped = data_1[:2601, :512, :512] train_data_reshaped = np.stack((train_data_reshaped, train_data_reshaped), axis=-1)

In[8]:

data_2 = test_data['data'] test_data_reshaped = data_2[:2601, :512, :512] test_data_reshaped = np.stack((test_data_reshaped, test_data_reshaped), axis=-1)

In[9]:

train_data_reshaped = np.load('train_data_reshaped.npz') test_data_reshaped = np.load('test_data_reshaped.npz')

In[10]:

train_data_reshaped = train_data_reshaped['data']

X_train = train_data_reshaped[:, :, :, :-1] # Features, all dimensions except the last one y_train = train_data_reshaped[:, :, :, :-1] # Labels, the last dimension

In[11]:

test_data_reshaped = test_data_reshaped['data']

X_test = train_data_reshaped[:, :, :, :-1] # Features, all dimensions except the last one y_test = train_data_reshaped[:, :, :, :-1] # Labels, the last dimension

In[12]:

print(X_train.shape) print(y_train.shape)

In[13]:

print(X_test.shape) print(y_test.shape)

In[14]:

from skimage.transform import resize

Resize X_train and y_train to the desired shape

desired_shape = (256, 256, 1) X_train_resized = np.zeros((X_train.shape[0],) + desired_shape) y_train_resized = np.zeros((y_train.shape[0],) + desired_shape)

for i in range(X_train.shape[0]): X_train_resized[i] = resize(X_train[i, :, :, 0], desired_shape, anti_aliasing=True, mode='reflect') y_train_resized[i] = resize(y_train[i, :, :, 0], desired_shape, anti_aliasing=True, mode='reflect')

Now X_train_resized and y_train_resized should have the shape (256, 256, 1)

In[15]:

from skimage.transform import resize

Resize X_train and y_train to the desired shape

desired_shape_test = (256, 256, 1) X_test_resized = np.zeros((X_test.shape[0],) + desired_shape_test) y_test_resized = np.zeros((y_test.shape[0],) + desired_shape_test)

for i in range(X_train.shape[0]): X_test_resized[i] = resize(X_test[i, :, :, 0], desired_shape_test, anti_aliasing=True, mode='reflect') y_test_resized[i] = resize(y_test[i, :, :, 0], desired_shape_test, anti_aliasing=True, mode='reflect')

Now X_test_resized and y_test_resized should have the shape (256, 256, 1)

In[16]:

test_size = 0.5 # % of data saved as test seed = 0 # seed for random train-test split

In[17]:

from deepcell.utils.data_utils import reshape_matrix

size = 256

X_train, y_train = reshape_matrix(X_train_resized, y_train_resized, reshape_size=size) print('X.shape: {}\ny.shape: {}'.format(X_train.shape, y_train.shape))

In[18]:

from deepcell.utils.data_utils import reshape_matrix

size = 256

X_test, y_test = reshape_matrix(X_test_resized, y_test_resized, reshape_size=size) print('X.shape: {}\ny.shape: {}'.format(X_test.shape, y_test.shape))

In[19]:

import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

from deepcell.model_zoo.panopticnet import PanopticNet

classes = { 'inner_distance': 1, # inner distance 'outer_distance': 1, # outer distance 'fgbg': 2, # foreground/background separation }

model = PanopticNet( backbone='resnet50', input_shape=X_train_resized.shape[1:], norm_method='std', num_semantic_classes=classes)

In[30]:

from tensorflow.keras.optimizers import SGD, Adam from deepcell.utils.train_utils import rate_scheduler

model_name = 'watershed_centroid_nuclear_general_std'

n_epoch = 5 # Number of training epochs test_size = .20 # % of data saved as test norm_method = 'whole_image' # data normalization

lr = 1e-5 optimizer = Adam(learning_rate=lr, clipnorm=0.001) lr_sched = rate_scheduler(lr=lr, decay=0.99)

batch_size = 1

min_objects = 1 # throw out images with fewer than this many objects

In[31]:

from deepcell import image_generators from deepcell.utils import train_utils

transforms = list(classes.keys()) transforms_kwargs = {'outer-distance': {'erosion_width': 0}}

use augmentation for training but not validation

datagen = image_generators.SemanticDataGenerator( rotation_range=180, shear_range=0, zoom_range=(0.75, 1.25), horizontal_flip=True, vertical_flip=True)

datagen_val = image_generators.SemanticDataGenerator( rotation_range=0, shear_range=0, zoom_range=0, horizontal_flip=0, vertical_flip=0)

train_data = datagen.flow( {'X': X_train, 'y': y_train}, seed=seed, transforms=transforms, transforms_kwargs=transforms_kwargs, min_objects=min_objects, batch_size=batch_size)

val_data = datagen_val.flow( {'X': X_test, 'y': y_test}, seed=seed, transforms=transforms, transforms_kwargs=transforms_kwargs, min_objects=min_objects, batch_size=batch_size)

In[32]:

from deepcell.model_zoo.panopticnet import PanopticNet

classes = { 'inner_distance': 1, 'outer_distance': 1, }

prediction_model = PanopticNet( backbone='resnet50', input_shape=X_train.shape[1:], norm_method='std', num_semantic_heads=2, num_semantic_classes=classes, location=True, # should always be true include_top=True)

In[33]:

from timeit import default_timer

start = default_timer() test_images = prediction_model.predict(X_test) watershed_time = default_timer() - start

print('Watershed segmentation of shape', test_images[0].shape, 'in', watershed_time, 'seconds.')

In[34]:

import time

from matplotlib import pyplot as plt import numpy as np

from skimage.feature import peak_local_max

from deepcell_toolbox.deep_watershed import deep_watershed

index = 1000

fig, axes = plt.subplots(1, 4, figsize=(20, 20))

masks = deep_watershed( test_images, min_distance=10, detection_threshold=0.1, distance_threshold=0.01, exclude_border=False, small_objects_threshold=0)

calculated in the postprocessing above, but useful for visualizing

inner_distance = test_images[0] outer_distance = test_images[1]

coords = peak_local_max( inner_distance[index], min_distance=10, threshold_abs=0.1, exclude_border=False)

raw image with centroid

axes[0].imshow(X_test[index, ..., 0]) axes[0].scatter(coords[..., 1], coords[..., 0], color='r', marker='.', s=10)

axes[1].imshow(inner_distance[index, ..., 0], cmap='jet') axes[2].imshow(outer_distance[index,

..., 0], cmap='jet') axes[3].imshow(masks[index, ...], cmap='jet')

plt.show()

Screenshot 2023-10-25 191331

rossbar commented 1 year ago

Without any more specific information, it's difficult to say definitively where the problem lies, but given the info above it almost certainly lies in the data and problem setup. Generally speaking, the issue tracker is for reporting issues with the library, not user code - I will go ahead and close this as there isn't anything actionable here.

If you end up with a minimal reproducible example of something that you believe stems from a bug in the library, please feel free to reopen.

ngreenwald commented 1 year ago

Hey @DattaSujayAyyagari, if you just want to generate predictions, I would use the pre-trained Mesmer model in this notebook: https://github.com/vanvalenlab/deepcell-tf/blob/master/notebooks/applications/Mesmer-Application.ipynb