cuiaiyu / dressing-in-order

(ICCV'21) Official code of "Dressing in Order: Recurrent Person Image Generation for Pose Transfer, Virtual Try-on and Outfit Editing" by Aiyu Cui, Daniel McKee and Svetlana Lazebnik
https://cuiaiyu.github.io/dressing-in-order
Other
507 stars 126 forks source link

While trying custom images my output skeleton tilted -90 degrees #91

Open pamanimaran07 opened 10 months ago

pamanimaran07 commented 10 months ago

Hi @cuiaiyu thank you so much for your work i am trying to get output for custom images. so for pose points i used a code snippet from https://github.com/cuiaiyu/dressing-in-order/issues/12 #issue 12 after running the code my pose skeleton seem to be tilted -90 degrees.

output_1

pose points are : img = [(80, 33), (88, 72), (61, 77), (45, 139), (34, 189), (118, 66), (153, 111), (110, 139), (65, 183), (-1, -1), (-1, -1), (-1, -1), (-1, -1), (-1, -1), (72, 27), (84, 27), (68, 33), (95, 27)]

so am i missing any steps in this process , it would be great if you guide me through this. Thank you

cuiaiyu commented 10 months ago

Maybe you can swap the x coordinates and y coordinates?

pamanimaran07 commented 10 months ago

HI @cuiaiyu Thank you for the response, swapping the points worked for me. So i tried pose transfer an i am not satisfied with the output i am getting. I'll share the code that i have used for your reference. Thanks in advance.

import json
import numpy as np
import torch

def pose_points(name):
    # Create a dictionary to map name to the corresponding list of points
    print("pose_points",name)

    point_lists = {

        "fashionMENTees_Tanksid0000481201_1front": [(91, 44),(84, 83), (53, 77), (26, 139), (38, 183), (118, 83),(126, 144),(133, 200),(61, 211),(-1, -1),(-1, -1),(99, 200),(-1, -1),(-1, -1),(84, 33),(99, 38),(72, 33),(103, 44)],

        "fashionMENTees_Tanksid0000595506_2side": [(91, 38),(114, 77),(122, 77),(-1, -1),(-1, -1),(107, 77),(110, 144),(91, 200),(-1, -1),(-1, -1),(-1, -1),(110, 205),(-1, -1),(-1, -1),(84, 27),(99, 27),(-1, -1),(114, 33)],

        "fashionMENTees_Tanksid0000599401_1front": [(89, 44), (91, 86), (54, 88), (44, 145), (44, 199), (128, 86), (137, 146), (137, 200), (70, 204), (-1, -1), (-1, -1), (111, 204), (-1, -1), (-1, -1), (82, 37), (96, 37), (73, 39), (106, 39)],

    }

    if name in point_lists:
        selected_points = point_lists[name]
        y_points, x_points = zip(*selected_points)
        y_points_list = list(x_points)
        x_points_list = list(y_points)
        return _load_kpt(y_points_list, x_points_list)
    else:
        return None  # Handle the case when 'name' is not found

def _load_kpt(keypoints_y, keypoints_x):
    y_str = json.dumps(keypoints_y)
    x_str = json.dumps(keypoints_x)
    # Assume that pose_utils.load_pose_cords_from_strings and other functions are defined elsewhere
    # Replace this with your actual implementation
    array = pose_utils.load_pose_cords_from_strings(y_str, x_str)
    #load_size = (256, 176)
    pose = pose_utils.cords_to_map(array, (256, 176), (256, 256))
    pose = np.transpose(pose, (2,0,1))
    pose = torch.Tensor(pose)
    return pose

import torch.utils.data as data
from PIL import Image
import numpy as np
import cv2
import torchvision.transforms as transforms
import torch
import copy, os, collections
import json
from datasets.human_parse_labels import get_label_map, DF_LABEL, YF_LABEL
import pandas as pd
from utils import pose_utils

resize = transforms.Resize((256, 176))
toTensor = transforms.ToTensor()
toPIL = transforms.ToPILImage()
normalize = transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
n_human_part=8
dir = 'D:\\dior_aug_1_2023\\imgs\\'

def load_img(dir):

  img = Image.open(dir).convert("RGB")
  img = resize(img)
  img = toTensor(img)
  img = normalize(img)
  return img

aiyu2atr, atr2aiyu = get_label_map(n_human_part)

def load_mask(dir):
  mask = Image.open(dir)
  mask = resize(mask)
  mask = torch.from_numpy(np.array(mask))

  texture_mask = copy.deepcopy(mask)
  for atr in atr2aiyu:
      aiyu = atr2aiyu[atr]
      texture_mask[mask == atr] = aiyu
  return texture_mask

def load_data(name):
    print(dir + name)
    parse = load_mask(dir + name + "_parsed.png")
    pimg = load_img(dir +name +".jpg")
    pose = pose_points(name)
    return pimg.cuda().squeeze(), parse.cuda().squeeze(), pose.cuda().squeeze()

def plot_img(pimg=[], gimgs=[], oimgs=[], gen_img=[], pose=None):
    if pose != None:
        import utils.pose_utils as pose_utils
        print(pose.size())
        kpt = pose_utils.draw_pose_from_map(pose.cpu().numpy().transpose(1,2,0),radius=6)
        kpt = kpt[0]
    if not isinstance(pimg, list):
        pimg = [pimg]
    if not isinstance(gen_img, list):
        gen_img = [gen_img]
    out = pimg + gimgs + oimgs + gen_img
    if out:
        out = torch.cat(out, 2).float().cpu().detach().numpy()
        out = (out + 1) / 2 # denormalize
        out = np.transpose(out, [1,2,0])

        if pose != None:
            out = np.concatenate((kpt, out),1)
    else:
        out = kpt
    fig = plt.figure(figsize=(10,20), dpi= 200, facecolor='w', edgecolor='k')
    plt.axis('off')
    plt.imshow(out)

# define dressing-in-order function (the pipeline)
def dress_in_order(model, pid,pose_id=None, gids=[], ogids=[], order=[5,1,3,2]):
    PID = [0,4,6,7]
    GID = [2,5,1,3]
    # encode person

    pimg, parse, from_pose = load_data(pid)

    if not pose_id:
        to_pose = from_pose
    else:
        to_img,_,to_pose=load_data(pose_id)

    psegs = model.encode_attr(pimg[None], parse[None], from_pose[None], to_pose[None], PID)

    # encode base garments
    gsegs = model.encode_attr(pimg[None], parse[None], from_pose[None], to_pose[None])

    # swap base garment if any
    gimgs = []

    for gid in gids:
        # dir, k = gid
        _,_,k = gid
        gimg, gparse, pose =  load_data(dir)
        seg = model.encode_single_attr(gimg[None], gparse[None], pose[None], to_pose[None], i=k)
        gsegs[k] = seg
        gimgs += [gimg * (gparse == k)]

    # encode garment (overlay)
    garments = []
    over_gsegs = []
    oimgs = []
    for gid in ogids:
        # dir, k = gid
        _,_,k = gid
        oimg, oparse, pose = load_data(dir)
        oimgs += [oimg * (oparse == k)]
        seg = model.encode_single_attr(oimg[None], oparse[None], pose[None], to_pose[None], i=k)
        over_gsegs += [seg]

    gsegs = [gsegs[i] for i in order] + over_gsegs
    gen_img = model.netG(to_pose[None], psegs, gsegs)

    return pimg, gimgs, oimgs, gen_img[0], to_pose

pose_transfer code
%matplotlib inline
pimg, gimgs, oimgs, gen_img, pose = dress_in_order(model, pid='fashionMENTees_Tanksid0000481201_1front',pose_id='fashionMENTees_Tanksid0000599401_1front')
plot_img(pimg, gimgs, oimgs, gen_img, pose)

Output image

output4

cuiaiyu commented 7 months ago

Your skeleton looks like originally detected for (256,256), but you squeeze it to (256,176).