Open Seojii opened 3 years ago
+1
+1
Hey @Seojii, @chen-chunling and @mydesign-star,
You can use this code here with a slight modification to meet your goals. I hope this can help you.
Cheers
Hey @Seojii, @chen-chunling and @mydesign-star,
You can use this code here with a slight modification to meet your goals. I hope this can help you.
Cheers
Hi. The code is extremely helpful. However, I'm still not sure about one thing. How do we select or pass a selected point? Is it related to voix_pixels
?
@fadamsyah The third point is automatically calculated. It is that point inside the box which is farthest away from the box boundaries (according to euclidean distance). The relevant code is here - https://github.com/shiyinzhang/Inside-Outside-Guidance/blob/696ec2ddae2e994541cc9d81bb3c41984e233c64/dataloaders/helpers.py#L170-L177
@fadamsyah第三点是自动计算的。它是盒子内离盒子边界最远的那个点(根据欧几里得距离)。相关代码在这里 -
Hello, I think the third point is calculated, but it should be used to imitate the click behavior of people during training. When testing a single image, we can pass in the third point by ourselves.
Hey @Seojii, @chen-chunling and @mydesign-star,
You can use this code here with a slight modification to meet your goals. I hope this can help you.
Cheers
Hi I can't access the repo because it requires me to sign in, but it seems users can only be added by that community (indatalabs). Could you share the code here or provide publicly accessible link? Thanks
This is a simple code (you need to download the model first tho):
from datetime import datetime
import scipy.misc as sm
from collections import OrderedDict
import glob
import numpy as np
import socket
# PyTorch includes
import torch
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader
# Custom includes
from dataloaders.combine_dbs import CombineDBs as combine_dbs
import dataloaders.pascal as pascal
import dataloaders.sbd as sbd
from dataloaders import custom_transforms as tr
from networks.loss import class_cross_entropy_loss
from dataloaders.helpers import *
from networks.mainnetwork import *
import matplotlib.pyplot as plt
from PIL import Image
import cv2
import argparse
def process(image_name):
# Set gpu_id to -1 to run in CPU mode, otherwise set the id of the corresponding gpu
gpu_id = 0
device = torch.device("cuda:"+str(gpu_id) if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
print('Using GPU: {} '.format(gpu_id))
# Setting parameters
resume_epoch = 100 # test epoch
nInputChannels = 5 # Number of input channels (RGB + heatmap of IOG points)
# Network definition
modelName = 'IOG_pascal'
net = Network(nInputChannels=nInputChannels,
num_classes=1,
backbone='resnet101',
output_stride=16,
sync_bn=None,
freeze_bn=False)
# load pretrain_dict
pretrain_dict = torch.load('IOG_PASCAL_SBD.pth')
net.load_state_dict(pretrain_dict)
# net.to(device)
# Generate result of the validation images
net.eval()
image = np.array(Image.open(image_name).convert('RGB').resize((1700, 850)))
im_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
roi = cv2.selectROI(im_rgb)
image = image.astype(np.float32)
bbox = np.zeros_like(image[..., 0])
bbox[int(roi[1]):int(roi[1]+roi[3]), int(roi[0]):int(roi[0]+roi[2])] = 1
void_pixels = 1 - bbox
sample = {'image': image, 'gt': bbox, 'void_pixels': void_pixels}
trns = transforms.Compose([
tr.CropFromMask(crop_elems=('image', 'gt','void_pixels'), relax=30, zero_pad=True),
tr.FixedResize(resolutions={'gt': None, 'crop_image': (512, 512), 'crop_gt': (512, 512), 'crop_void_pixels': (512, 512)},flagvals={'gt':cv2.INTER_LINEAR,'crop_image':cv2.INTER_LINEAR,'crop_gt':cv2.INTER_LINEAR,'crop_void_pixels': cv2.INTER_LINEAR}),
tr.IOGPoints(sigma=10, elem='crop_gt',pad_pixel=10),
tr.ToImage(norm_elem='IOG_points'),
tr.ConcatInputs(elems=('crop_image', 'IOG_points')),
tr.ToTensor()])
tr_sample = trns(sample)
inputs = tr_sample['concat'][None]
# inputs = inputs.to(device)
outputs = net.forward(inputs)[-1]
# outputs = fine_out.to(torch.device('cpu'))
pred = np.transpose(outputs.data.numpy()[0, :, :, :], (1, 2, 0))
pred = 1 / (1 + np.exp(-pred))
pred = np.squeeze(pred)
gt = tens2image(tr_sample['gt'])
bbox = get_bbox(gt, pad=30, zero_pad=True)
result = crop2fullmask(pred, bbox, gt, zero_pad=True, relax=0,mask_relax=False)
light = np.zeros_like(image)
light[:, :, 2] = 255.
alpha = 0.5
blending = (alpha * light + (1 - alpha) * image) * result[..., None] + (1 - result[..., None]) * image
blending[blending > 255.] = 255
print('sahpe image: ', blending.shape)
cv2.imshow('resulting ', cv2.cvtColor(blending.astype(np.uint8), cv2.COLOR_RGB2BGR))
cv2.waitKey(0)
cv2.imshow('resulting segmentation', cv2.cvtColor(result.astype(np.uint8), cv2.COLOR_RGB2BGR))
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Run class agnostic segmentation')
parser.add_argument('--image_name', type=str, default='test360.png',
help='path to target image')
args = parser.parse_args()
process(args.image_name)
This is a simple code (you need to download the model first tho):
from datetime import datetime import scipy.misc as sm from collections import OrderedDict import glob import numpy as np import socket # PyTorch includes import torch import torch.optim as optim from torchvision import transforms from torch.utils.data import DataLoader # Custom includes from dataloaders.combine_dbs import CombineDBs as combine_dbs import dataloaders.pascal as pascal import dataloaders.sbd as sbd from dataloaders import custom_transforms as tr from networks.loss import class_cross_entropy_loss from dataloaders.helpers import * from networks.mainnetwork import * import matplotlib.pyplot as plt from PIL import Image import cv2 import argparse def process(image_name): # Set gpu_id to -1 to run in CPU mode, otherwise set the id of the corresponding gpu gpu_id = 0 device = torch.device("cuda:"+str(gpu_id) if torch.cuda.is_available() else "cpu") if torch.cuda.is_available(): print('Using GPU: {} '.format(gpu_id)) # Setting parameters resume_epoch = 100 # test epoch nInputChannels = 5 # Number of input channels (RGB + heatmap of IOG points) # Network definition modelName = 'IOG_pascal' net = Network(nInputChannels=nInputChannels, num_classes=1, backbone='resnet101', output_stride=16, sync_bn=None, freeze_bn=False) # load pretrain_dict pretrain_dict = torch.load('IOG_PASCAL_SBD.pth') net.load_state_dict(pretrain_dict) # net.to(device) # Generate result of the validation images net.eval() image = np.array(Image.open(image_name).convert('RGB').resize((1700, 850))) im_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) roi = cv2.selectROI(im_rgb) image = image.astype(np.float32) bbox = np.zeros_like(image[..., 0]) bbox[int(roi[1]):int(roi[1]+roi[3]), int(roi[0]):int(roi[0]+roi[2])] = 1 void_pixels = 1 - bbox sample = {'image': image, 'gt': bbox, 'void_pixels': void_pixels} trns = transforms.Compose([ tr.CropFromMask(crop_elems=('image', 'gt','void_pixels'), relax=30, zero_pad=True), tr.FixedResize(resolutions={'gt': None, 'crop_image': (512, 512), 'crop_gt': (512, 512), 'crop_void_pixels': (512, 512)},flagvals={'gt':cv2.INTER_LINEAR,'crop_image':cv2.INTER_LINEAR,'crop_gt':cv2.INTER_LINEAR,'crop_void_pixels': cv2.INTER_LINEAR}), tr.IOGPoints(sigma=10, elem='crop_gt',pad_pixel=10), tr.ToImage(norm_elem='IOG_points'), tr.ConcatInputs(elems=('crop_image', 'IOG_points')), tr.ToTensor()]) tr_sample = trns(sample) inputs = tr_sample['concat'][None] # inputs = inputs.to(device) outputs = net.forward(inputs)[-1] # outputs = fine_out.to(torch.device('cpu')) pred = np.transpose(outputs.data.numpy()[0, :, :, :], (1, 2, 0)) pred = 1 / (1 + np.exp(-pred)) pred = np.squeeze(pred) gt = tens2image(tr_sample['gt']) bbox = get_bbox(gt, pad=30, zero_pad=True) result = crop2fullmask(pred, bbox, gt, zero_pad=True, relax=0,mask_relax=False) light = np.zeros_like(image) light[:, :, 2] = 255. alpha = 0.5 blending = (alpha * light + (1 - alpha) * image) * result[..., None] + (1 - result[..., None]) * image blending[blending > 255.] = 255 print('sahpe image: ', blending.shape) cv2.imshow('resulting ', cv2.cvtColor(blending.astype(np.uint8), cv2.COLOR_RGB2BGR)) cv2.waitKey(0) cv2.imshow('resulting segmentation', cv2.cvtColor(result.astype(np.uint8), cv2.COLOR_RGB2BGR)) cv2.waitKey(0) cv2.destroyAllWindows() if __name__ == '__main__': parser = argparse.ArgumentParser(description='Run class agnostic segmentation') parser.add_argument('--image_name', type=str, default='test360.png', help='path to target image') args = parser.parse_args() process(args.image_name)
Hi, thanks for your code. I have some problems, you just passed the background box and foreground point, but you didn't use refinementnetwork.py. How do I modify the object mask by positive and negative clicks after the object was selected? Can you share the code ? Please.
Hi, Thank you for your great work!
I want to test your model with a single image (not in benchmark dataset) But, it seems that current code doesn't have this function. Could you provide me a guide for testing a single image?