shiyinzhang / Inside-Outside-Guidance

Interactive Object Segmentation with Inside-Outside Guidance
217 stars 34 forks source link

Test on a single image #2

Open Seojii opened 3 years ago

Seojii commented 3 years ago

Hi, Thank you for your great work!

I want to test your model with a single image (not in benchmark dataset) But, it seems that current code doesn't have this function. Could you provide me a guide for testing a single image?

chen-chunling commented 3 years ago

+1

ZJuanJuan commented 3 years ago

+1

gost-sniper commented 3 years ago

Hey @Seojii, @chen-chunling and @mydesign-star,

You can use this code here with a slight modification to meet your goals. I hope this can help you.

Cheers

fadamsyah commented 2 years ago

Hey @Seojii, @chen-chunling and @mydesign-star,

You can use this code here with a slight modification to meet your goals. I hope this can help you.

Cheers

Hi. The code is extremely helpful. However, I'm still not sure about one thing. How do we select or pass a selected point? Is it related to voix_pixels?

Gateway2745 commented 2 years ago

@fadamsyah The third point is automatically calculated. It is that point inside the box which is farthest away from the box boundaries (according to euclidean distance). The relevant code is here - https://github.com/shiyinzhang/Inside-Outside-Guidance/blob/696ec2ddae2e994541cc9d81bb3c41984e233c64/dataloaders/helpers.py#L170-L177

Caooc commented 2 years ago

@fadamsyah第三点是自动计算的。它是盒子内离盒子边界最远的那个点(根据欧几里得距离)。相关代码在这里 -

https://github.com/shiyinzhang/Inside-Outside-Guidance/blob/696ec2ddae2e994541cc9d81bb3c41984e233c64/dataloaders/helpers.py#L170-L177

Hello, I think the third point is calculated, but it should be used to imitate the click behavior of people during training. When testing a single image, we can pass in the third point by ourselves.

kaiyoo commented 1 year ago

Hey @Seojii, @chen-chunling and @mydesign-star,

You can use this code here with a slight modification to meet your goals. I hope this can help you.

Cheers

Hi I can't access the repo because it requires me to sign in, but it seems users can only be added by that community (indatalabs). Could you share the code here or provide publicly accessible link? Thanks

gost-sniper commented 1 year ago

This is a simple code (you need to download the model first tho):

from datetime import datetime
import scipy.misc as sm
from collections import OrderedDict
import glob
import numpy as np
import socket

# PyTorch includes
import torch
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader

# Custom includes
from dataloaders.combine_dbs import CombineDBs as combine_dbs
import dataloaders.pascal as pascal
import dataloaders.sbd as sbd
from dataloaders import custom_transforms as tr
from networks.loss import class_cross_entropy_loss
from dataloaders.helpers import *
from networks.mainnetwork import *

import matplotlib.pyplot as plt

from PIL import Image
import cv2
import argparse

def process(image_name):

    # Set gpu_id to -1 to run in CPU mode, otherwise set the id of the corresponding gpu
    gpu_id = 0
    device = torch.device("cuda:"+str(gpu_id) if torch.cuda.is_available() else "cpu")
    if torch.cuda.is_available():
        print('Using GPU: {} '.format(gpu_id))

    # Setting parameters
    resume_epoch = 100  # test epoch
    nInputChannels = 5  # Number of input channels (RGB + heatmap of IOG points)

    # Network definition
    modelName = 'IOG_pascal'
    net = Network(nInputChannels=nInputChannels,
                  num_classes=1,
                  backbone='resnet101',
                  output_stride=16,
                  sync_bn=None,
                  freeze_bn=False)

    # load pretrain_dict
    pretrain_dict = torch.load('IOG_PASCAL_SBD.pth')

    net.load_state_dict(pretrain_dict)
    # net.to(device)

    # Generate result of the validation images
    net.eval()

    image = np.array(Image.open(image_name).convert('RGB').resize((1700, 850)))
    im_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    roi = cv2.selectROI(im_rgb)
    image = image.astype(np.float32)

    bbox = np.zeros_like(image[..., 0])
    bbox[int(roi[1]):int(roi[1]+roi[3]), int(roi[0]):int(roi[0]+roi[2])] = 1
    void_pixels = 1 - bbox
    sample = {'image': image, 'gt': bbox, 'void_pixels': void_pixels}

    trns = transforms.Compose([
        tr.CropFromMask(crop_elems=('image', 'gt','void_pixels'), relax=30, zero_pad=True),
        tr.FixedResize(resolutions={'gt': None, 'crop_image': (512, 512), 'crop_gt': (512, 512), 'crop_void_pixels': (512, 512)},flagvals={'gt':cv2.INTER_LINEAR,'crop_image':cv2.INTER_LINEAR,'crop_gt':cv2.INTER_LINEAR,'crop_void_pixels': cv2.INTER_LINEAR}),
        tr.IOGPoints(sigma=10, elem='crop_gt',pad_pixel=10),
        tr.ToImage(norm_elem='IOG_points'),
        tr.ConcatInputs(elems=('crop_image', 'IOG_points')),
        tr.ToTensor()])

    tr_sample = trns(sample)

    inputs = tr_sample['concat'][None]
    # inputs = inputs.to(device)
    outputs = net.forward(inputs)[-1]
    # outputs = fine_out.to(torch.device('cpu'))
    pred = np.transpose(outputs.data.numpy()[0, :, :, :], (1, 2, 0))
    pred = 1 / (1 + np.exp(-pred))
    pred = np.squeeze(pred)
    gt = tens2image(tr_sample['gt'])
    bbox = get_bbox(gt, pad=30, zero_pad=True)
    result = crop2fullmask(pred, bbox, gt, zero_pad=True, relax=0,mask_relax=False)

    light = np.zeros_like(image)
    light[:, :, 2] = 255.

    alpha = 0.5

    blending = (alpha * light + (1 - alpha) * image) * result[..., None] + (1 - result[..., None]) * image

    blending[blending > 255.] = 255

    print('sahpe image: ', blending.shape)
    cv2.imshow('resulting ', cv2.cvtColor(blending.astype(np.uint8), cv2.COLOR_RGB2BGR))
    cv2.waitKey(0)
    cv2.imshow('resulting segmentation', cv2.cvtColor(result.astype(np.uint8), cv2.COLOR_RGB2BGR))
    cv2.waitKey(0)
    cv2.destroyAllWindows()

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Run class agnostic segmentation')
    parser.add_argument('--image_name', type=str, default='test360.png',
        help='path to target image')

    args = parser.parse_args()

    process(args.image_name)
duxuan11 commented 1 year ago

This is a simple code (you need to download the model first tho):

from datetime import datetime
import scipy.misc as sm
from collections import OrderedDict
import glob
import numpy as np
import socket

# PyTorch includes
import torch
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader

# Custom includes
from dataloaders.combine_dbs import CombineDBs as combine_dbs
import dataloaders.pascal as pascal
import dataloaders.sbd as sbd
from dataloaders import custom_transforms as tr
from networks.loss import class_cross_entropy_loss
from dataloaders.helpers import *
from networks.mainnetwork import *

import matplotlib.pyplot as plt

from PIL import Image
import cv2
import argparse

def process(image_name):

    # Set gpu_id to -1 to run in CPU mode, otherwise set the id of the corresponding gpu
    gpu_id = 0
    device = torch.device("cuda:"+str(gpu_id) if torch.cuda.is_available() else "cpu")
    if torch.cuda.is_available():
        print('Using GPU: {} '.format(gpu_id))

    # Setting parameters
    resume_epoch = 100  # test epoch
    nInputChannels = 5  # Number of input channels (RGB + heatmap of IOG points)

    # Network definition
    modelName = 'IOG_pascal'
    net = Network(nInputChannels=nInputChannels,
                  num_classes=1,
                  backbone='resnet101',
                  output_stride=16,
                  sync_bn=None,
                  freeze_bn=False)

    # load pretrain_dict
    pretrain_dict = torch.load('IOG_PASCAL_SBD.pth')

    net.load_state_dict(pretrain_dict)
    # net.to(device)

    # Generate result of the validation images
    net.eval()

    image = np.array(Image.open(image_name).convert('RGB').resize((1700, 850)))
    im_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    roi = cv2.selectROI(im_rgb)
    image = image.astype(np.float32)

    bbox = np.zeros_like(image[..., 0])
    bbox[int(roi[1]):int(roi[1]+roi[3]), int(roi[0]):int(roi[0]+roi[2])] = 1
    void_pixels = 1 - bbox
    sample = {'image': image, 'gt': bbox, 'void_pixels': void_pixels}

    trns = transforms.Compose([
        tr.CropFromMask(crop_elems=('image', 'gt','void_pixels'), relax=30, zero_pad=True),
        tr.FixedResize(resolutions={'gt': None, 'crop_image': (512, 512), 'crop_gt': (512, 512), 'crop_void_pixels': (512, 512)},flagvals={'gt':cv2.INTER_LINEAR,'crop_image':cv2.INTER_LINEAR,'crop_gt':cv2.INTER_LINEAR,'crop_void_pixels': cv2.INTER_LINEAR}),
        tr.IOGPoints(sigma=10, elem='crop_gt',pad_pixel=10),
        tr.ToImage(norm_elem='IOG_points'),
        tr.ConcatInputs(elems=('crop_image', 'IOG_points')),
        tr.ToTensor()])

    tr_sample = trns(sample)

    inputs = tr_sample['concat'][None]
    # inputs = inputs.to(device)
    outputs = net.forward(inputs)[-1]
    # outputs = fine_out.to(torch.device('cpu'))
    pred = np.transpose(outputs.data.numpy()[0, :, :, :], (1, 2, 0))
    pred = 1 / (1 + np.exp(-pred))
    pred = np.squeeze(pred)
    gt = tens2image(tr_sample['gt'])
    bbox = get_bbox(gt, pad=30, zero_pad=True)
    result = crop2fullmask(pred, bbox, gt, zero_pad=True, relax=0,mask_relax=False)

    light = np.zeros_like(image)
    light[:, :, 2] = 255.

    alpha = 0.5

    blending = (alpha * light + (1 - alpha) * image) * result[..., None] + (1 - result[..., None]) * image

    blending[blending > 255.] = 255

    print('sahpe image: ', blending.shape)
    cv2.imshow('resulting ', cv2.cvtColor(blending.astype(np.uint8), cv2.COLOR_RGB2BGR))
    cv2.waitKey(0)
    cv2.imshow('resulting segmentation', cv2.cvtColor(result.astype(np.uint8), cv2.COLOR_RGB2BGR))
    cv2.waitKey(0)
    cv2.destroyAllWindows()

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Run class agnostic segmentation')
    parser.add_argument('--image_name', type=str, default='test360.png',
        help='path to target image')

    args = parser.parse_args()

    process(args.image_name)

Hi, thanks for your code. I have some problems, you just passed the background box and foreground point, but you didn't use refinementnetwork.py. How do I modify the object mask by positive and negative clicks after the object was selected? Can you share the code ? Please.