zhangboshen / A2J

Code for paper "A2J: Anchor-to-Joint Regression Network for 3D Articulated Pose Estimation from a Single Depth Image". ICCV2019
MIT License
287 stars 46 forks source link

Can this model work when I don't give the bndbox? #35

Open zeroXscorpion7 opened 4 years ago

zeroXscorpion7 commented 4 years ago

I am trying to use this model to Identify my pictures, if the bndbox is need?

zhangboshen commented 4 years ago

Hi, @zeroXscorpion7 . You can try inference without bndbox, but, the performance can not be guaranteed, some skeleton flex is very likely to happen because of the mean/std shift.

zeroXscorpion7 commented 4 years ago

I want to test the efficacy in real-time, how do I identify one picture at a time?

zeroXscorpion7 commented 4 years ago

def main():

net = model.A2J_model(num_classes = keypointsNumber)
net.load_state_dict(torch.load(model_dir)) 
net = net.cuda()
net.eval()

post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None)

output = torch.FloatTensor()

data4DTemp = scio.loadmat(testingImageDir + str(1) + '.mat')['DepthNormal']       
depthTemp = data4DTemp[:,:,3]

img=depthTemp
heads = net(img)
pred_keypoints = post_precess(heads,voting=False)
output = torch.cat([output,pred_keypoints.data.cpu()], 0)

result = output.cpu().data.numpy()
assert np.shape(result)==np.shape(result), "source has different shape with target"
Test1_ = np.zeros(result.shape)
Test1_[:, 0] = result[:,1]
Test1_[:, 1] = result[:,0]
Test1_[:, 2] = result[:,2]
Test1 = Test1_
Test1[:,0] = Test1_[:,0]*(Bndbox[i,2]-Bndbox[i,0])/cropWidth + Bndbox[i,0]  # x
Test1[:,1] = Test1_[:,1]*(Bndbox[i,3]-Bndbox[i,1])/cropHeight + Bndbox[i,1]  # y
Test1[:,2] = Test1_[:,2]/depthFactor 
TestWorld = np.ones((len(Test1),keypointsNumber,3))    
TestWorld_tuple = pixel2world(Test1[:,0],Test1[:,1],Test1[:,2])
X=np.zeros((15),np.uint8)
Y=np.zeros((15),np.uint8)
for j in range(keypointsNumber):
    X[j],Y[j]=world2pixel(TestWorld[0,j,0],TestWorld[0,j,1],TestWorld[0,j,2])
IMGX=np.zeros((240,320,3),np.uint8)
cv2.line(IMGX,(X[0],Y[0]),(X[1],Y[1]),(0,0,255),2)
cv2.line(IMGX,(X[1],Y[1]),(X[2],Y[2]),(0,0,255),2)
cv2.line(IMGX,(X[1],Y[1]),(X[3],Y[3]),(0,0,255),2)
cv2.line(IMGX,(X[1],Y[1]),(X[8],Y[8]),(0,0,255),2)
cv2.line(IMGX,(X[2],Y[2]),(X[4],Y[4]),(0,0,255),2)
cv2.line(IMGX,(X[4],Y[4]),(X[6],Y[6]),(0,0,255),2)
cv2.line(IMGX,(X[3],Y[3]),(X[5],Y[5]),(0,0,255),2)
cv2.line(IMGX,(X[5],Y[5]),(X[7],Y[7]),(0,0,255),2)
cv2.line(IMGX,(X[8],Y[8]),(X[9],Y[9]),(0,0,255),2)
cv2.line(IMGX,(X[8],Y[8]),(X[10],Y[10]),(0,0,255),2)
cv2.line(IMGX,(X[9],Y[9]),(X[11],Y[11]),(0,0,255),2)
cv2.line(IMGX,(X[11],Y[11]),(X[13],Y[13]),(0,0,255),2)
cv2.line(IMGX,(X[10],Y[10]),(X[12],Y[12]),(0,0,255),2)
cv2.line(IMGX,(X[12],Y[12]),(X[14],Y[14]),(0,0,255),2)
for i in range(keypointsNumber):
    cv2.circle(IMGX,(X[i],Y[i]),4,(255,255,255),-1)
cv2.imshow('img',IMGX)
cv2.waitKey(0)
cv2.destroyAllWindows()

This is the code I edited, but it has some promble. How do I edit it?

mstc-xqp commented 4 years ago

def main():

net = model.A2J_model(num_classes = keypointsNumber)
net.load_state_dict(torch.load(model_dir)) 
net = net.cuda()
net.eval()

post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None)

output = torch.FloatTensor()

data4DTemp = scio.loadmat(testingImageDir + str(1) + '.mat')['DepthNormal']       
depthTemp = data4DTemp[:,:,3]

img=depthTemp
heads = net(img)
pred_keypoints = post_precess(heads,voting=False)
output = torch.cat([output,pred_keypoints.data.cpu()], 0)

result = output.cpu().data.numpy()
assert np.shape(result)==np.shape(result), "source has different shape with target"
Test1_ = np.zeros(result.shape)
Test1_[:, 0] = result[:,1]
Test1_[:, 1] = result[:,0]
Test1_[:, 2] = result[:,2]
Test1 = Test1_
Test1[:,0] = Test1_[:,0]*(Bndbox[i,2]-Bndbox[i,0])/cropWidth + Bndbox[i,0]  # x
Test1[:,1] = Test1_[:,1]*(Bndbox[i,3]-Bndbox[i,1])/cropHeight + Bndbox[i,1]  # y
Test1[:,2] = Test1_[:,2]/depthFactor 
TestWorld = np.ones((len(Test1),keypointsNumber,3))    
TestWorld_tuple = pixel2world(Test1[:,0],Test1[:,1],Test1[:,2])
X=np.zeros((15),np.uint8)
Y=np.zeros((15),np.uint8)
for j in range(keypointsNumber):
    X[j],Y[j]=world2pixel(TestWorld[0,j,0],TestWorld[0,j,1],TestWorld[0,j,2])
IMGX=np.zeros((240,320,3),np.uint8)
cv2.line(IMGX,(X[0],Y[0]),(X[1],Y[1]),(0,0,255),2)
cv2.line(IMGX,(X[1],Y[1]),(X[2],Y[2]),(0,0,255),2)
cv2.line(IMGX,(X[1],Y[1]),(X[3],Y[3]),(0,0,255),2)
cv2.line(IMGX,(X[1],Y[1]),(X[8],Y[8]),(0,0,255),2)
cv2.line(IMGX,(X[2],Y[2]),(X[4],Y[4]),(0,0,255),2)
cv2.line(IMGX,(X[4],Y[4]),(X[6],Y[6]),(0,0,255),2)
cv2.line(IMGX,(X[3],Y[3]),(X[5],Y[5]),(0,0,255),2)
cv2.line(IMGX,(X[5],Y[5]),(X[7],Y[7]),(0,0,255),2)
cv2.line(IMGX,(X[8],Y[8]),(X[9],Y[9]),(0,0,255),2)
cv2.line(IMGX,(X[8],Y[8]),(X[10],Y[10]),(0,0,255),2)
cv2.line(IMGX,(X[9],Y[9]),(X[11],Y[11]),(0,0,255),2)
cv2.line(IMGX,(X[11],Y[11]),(X[13],Y[13]),(0,0,255),2)
cv2.line(IMGX,(X[10],Y[10]),(X[12],Y[12]),(0,0,255),2)
cv2.line(IMGX,(X[12],Y[12]),(X[14],Y[14]),(0,0,255),2)
for i in range(keypointsNumber):
    cv2.circle(IMGX,(X[i],Y[i]),4,(255,255,255),-1)
cv2.imshow('img',IMGX)
cv2.waitKey(0)
cv2.destroyAllWindows()

This is the code I edited, but it has some promble. How do I edit it?

do u solove it ?

zeroXscorpion7 commented 4 years ago

I remove my_dataloader and enter depthTemp into dataPreprocess, then I use torch.from_numpy to make it to tensor

def dataPreprocess(img, depth_thres=0.4):

imageOutputs = np.ones((cropHeight, cropWidth, 1), dtype='float32') 

imCrop = img.copy()[:, :]
imgResize = cv2.resize(imCrop, (cropWidth, cropHeight), interpolation=cv2.INTER_NEAREST)
imgResize = np.asarray(imgResize,dtype = 'float32')  # H*W*C
imgResize = imgResize /5

imageOutputs[:,:,0] = imgResize

imageOutputs = np.asarray(imageOutputs)
imageNCHWOut = imageOutputs.transpose(2, 0, 1)  # [H, W, C] --->>>  [C, H, W]
imageNCHWOut = np.asarray(imageNCHWOut)

data = torch.from_numpy(imageNCHWOut)

return data

img=np.zeros((1,1,288,288),np.float32) img[0,:,:,:]= dataPreprocess(depth_map, 0.4) img=torch.from_numpy(img) post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None) output = torch.FloatTensor()

Like these.

mstc-xqp commented 4 years ago

I remove my_dataloader and enter depthTemp into dataPreprocess, then I use torch.from_numpy to make it to tensor

def dataPreprocess(img, depth_thres=0.4):

imageOutputs = np.ones((cropHeight, cropWidth, 1), dtype='float32') 

imCrop = img.copy()[:, :]
imgResize = cv2.resize(imCrop, (cropWidth, cropHeight), interpolation=cv2.INTER_NEAREST)
imgResize = np.asarray(imgResize,dtype = 'float32')  # H*W*C
imgResize = imgResize /5

imageOutputs[:,:,0] = imgResize

imageOutputs = np.asarray(imageOutputs)
imageNCHWOut = imageOutputs.transpose(2, 0, 1)  # [H, W, C] --->>>  [C, H, W]
imageNCHWOut = np.asarray(imageNCHWOut)

data = torch.from_numpy(imageNCHWOut)

return data

img=np.zeros((1,1,288,288),np.float32) img[0,:,:,:]= dataPreprocess(depth_map, 0.4) img=torch.from_numpy(img) post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None) output = torch.FloatTensor()

Like these.

thank you !! I am trying to use this model to Identify my pictures, if the bndbox is need? i see u ask that .Did u try that? what is the performace

zeroXscorpion7 commented 4 years ago

If you don't to use bndbox, you may train the data by yourself, or modify the train code.

mstc-xqp commented 4 years ago

If you don't to use bndbox, you may train the data by yourself, or modify the train code.

I see. Can u share the train code? you train on ITOP or K2PHD?

I have seen some people use just use depth map to train a alphapose or openpose model .

zeroXscorpion7 commented 4 years ago

import cv2 import torch import torch.utils.data import torch.optim.lr_scheduler as lr_scheduler import numpy as np import scipy.io as scio import os from PIL import Image from torch.autograd import Variable import model as model import anchor as anchor from tqdm import tqdm import random_erasing import logging import time import datetime import random

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

DataHyperParms

TrainImgFrames = 1000 TestImgFrames = 100 keypointsNumber = 15 cropWidth = 288 cropHeight = 288 batch_size = 64 learning_rate = 0.00035 Weight_Decay = 1e-4 nepoch = 35 RegLossFactor = 3 spatialFactor = 0.5 RandCropShift = 5 RandshiftDepth = 1 RandRotate = 180 RandScale = (1.0, 0.5)

randomseed = 12345 random.seed(randomseed) np.random.seed(randomseed) torch.manual_seed(randomseed)

save_dir = ''

try: os.makedirs(save_dir) except OSError: pass

trainingImageDir = '' testingImageDir = '' # mat images keypointsfileTest = '' keypointsfileTrain = '' model_dir = '' result_file = 'result_test.txt'

def pixel2world(x): x[:, :, 0] = (x[:, :, 0] - 160.0) x[:, :, 2] 0.0035 x[:, :, 1] = (120.0 - x[:, :, 1]) x[:, :, 2] 0.0035 return x

def world2pixel(x): x[:, :, 0] = 160.0 + x[:, :, 0] / (x[:, :, 2] 0.0035) x[:, :, 1] = 120.0 - x[:, :, 1] / (x[:, :, 2] 0.0035) return x

joint_id_to_name = { 0: 'Head', 1: 'Neck', 2: 'RShoulder', 3: 'LShoulder', 4: 'RElbow', 5: 'LElbow', 6: 'RHand', 7: 'LHand', 8: 'Torso', 9: 'RHip', 10: 'LHip', 11: 'RKnee', 12: 'LKnee', 13: 'RFoot', 14: 'LFoot', }

loading GT keypoints and center points

keypointsWorldtest = scio.loadmat(keypointsfileTest)['keypoints3D'].astype(np.float32) keypointsPixeltest = np.ones((len(keypointsWorldtest),15,2),dtype='float32') keypointsPixeltest = world2pixel(keypointsWorldtest)

keypointsWorldtrain = scio.loadmat(keypointsfileTrain)['keypoints3D'].astype(np.float32) keypointsPixeltrain = np.ones((len(keypointsWorldtrain),15,2),dtype='float32') keypointsPixeltrain = world2pixel(keypointsWorldtrain)

def transform(img, label, matrix): ''' img: [H, W] label, [N,2]
''' img_out = cv2.warpAffine(img,matrix,(cropWidth,cropHeight)) label_out = np.ones((keypointsNumber, 3)) label_out[:,:2] = label[:,:2].copy() label_out = np.matmul(matrix, label_out.transpose()) label_out = label_out.transpose()

return img_out, label_out

def dataPreprocess(index, img, keypointsUVD, depth_thres=0.4, augment=True):

imageOutputs = np.ones((cropHeight, cropWidth, 1), dtype='float32') 
labelOutputs = np.ones((keypointsNumber, 3), dtype = 'float32') 

if augment:
    RandomOffset_1 = np.random.randint(-1*RandCropShift,RandCropShift)
    RandomOffset_2 = np.random.randint(-1*RandCropShift,RandCropShift)
    RandomOffset_3 = np.random.randint(-1*RandCropShift,RandCropShift)
    RandomOffset_4 = np.random.randint(-1*RandCropShift,RandCropShift)
    RandomOffsetDepth = np.random.normal(0, RandshiftDepth, cropHeight*cropWidth).reshape(cropHeight,cropWidth) 
    RandomOffsetDepth[np.where(RandomOffsetDepth < RandshiftDepth)] = 0
    RandomRotate = np.random.randint(-1*RandRotate,RandRotate)
    RandomScale = np.random.rand()*RandScale[0]+RandScale[1]
    matrix = cv2.getRotationMatrix2D((cropWidth/2,cropHeight/2),RandomRotate,RandomScale)
else:
    RandomOffset_1, RandomOffset_2, RandomOffset_3, RandomOffset_4 = 0, 0, 0, 0
    RandomRotate = 0
    RandomScale = 1
    RandomOffsetDepth = 0
    matrix = cv2.getRotationMatrix2D((cropWidth/2,cropHeight/2),RandomRotate,RandomScale)

imCrop = img[:, :].copy()

imgResize = cv2.resize(imCrop, (cropWidth, cropHeight), interpolation=cv2.INTER_NEAREST)

imgResize = np.asarray(imgResize,dtype = 'float32')  # H*W*C
imgResize = imgResize / 5

## label
label_xy = np.ones((keypointsNumber, 2), dtype = 'float32') 
label_xy[:,0] = keypointsUVD[index,:,0].copy()*cropWidth/320 # x
label_xy[:,1] = keypointsUVD[index,:,1].copy()*cropHeight/240 # y

if augment:
    imgResize, label_xy = transform(imgResize, label_xy, matrix)  ## rotation, scale

imageOutputs[:,:,0] = imgResize

labelOutputs[:,1] = label_xy[:,0]
labelOutputs[:,0] = label_xy[:,1]
labelOutputs[:,2] = (keypointsUVD[index,:,2])*RandomScale   # Z  

imageOutputs = np.asarray(imageOutputs)
imageNCHWOut = imageOutputs.transpose(2, 0, 1)  # [H, W, C] --->>>  [C, H, W]
imageNCHWOut = np.asarray(imageNCHWOut)
labelOutputs = np.asarray(labelOutputs)

data, label = torch.from_numpy(imageNCHWOut), torch.from_numpy(labelOutputs)

return data, label

###################### Pytorch dataloader ################# class my_dataloader(torch.utils.data.Dataset):

def __init__(self, ImgDir, keypointsUVD, num, augment=True):

    self.ImgDir = ImgDir
    self.keypointsUVD = keypointsUVD
    self.num = num
    self.augment = augment
    self.randomErase = random_erasing.RandomErasing(probability = 0.5, sl = 0.02, sh = 0.4, r1 = 0.3, mean=[0])

def __getitem__(self, index):

    data4D = scio.loadmat(self.ImgDir + str(index+1) + '.mat')['DepthNormal']
    depth = data4D[:,:]

    data, label = dataPreprocess(index, depth, self.keypointsUVD, self.augment)

    if self.augment:
        data = self.randomErase(data)

    return data, label

def __len__(self):
    return self.num

train_image_datasets = my_dataloader(trainingImageDir, keypointsWorldtrain, TrainImgFrames, augment=True) train_dataloaders = torch.utils.data.DataLoader(train_image_datasets, batch_size = batch_size, shuffle = True, num_workers = 8)

test_image_datasets = my_dataloader(testingImageDir, keypointsWorldtest, TestImgFrames, augment=False) test_dataloaders = torch.utils.data.DataLoader(test_image_datasets, batch_size = batch_size, shuffle = False, num_workers = 8)

def train():

net = model.A2J_model(num_classes = keypointsNumber)
net = net.cuda()

post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None)
criterion = anchor.A2J_loss(shape=[cropHeight//16,cropWidth//16],thres = [16.0,32.0],stride=16,\
    spatialFactor=spatialFactor,img_shape=[cropHeight, cropWidth],P_h=None, P_w=None)
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, weight_decay=Weight_Decay)
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.2)

logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%Y/%m/%d %H:%M:%S', \
                filename=os.path.join(save_dir, 'train.log'), level=logging.INFO)
logging.info('======================================================')

for epoch in range(nepoch):
    net = net.train()
    train_loss_add = 0.0
    Cls_loss_add = 0.0
    Reg_loss_add = 0.0
    timer = time.time()

    # Training loop
    for i, (img, label) in enumerate(train_dataloaders):

        torch.cuda.synchronize() 

        img, label = img.cuda(), label.cuda()
        heads  = net(img)
        #print(regression)     
        optimizer.zero_grad()  

        Cls_loss, Reg_loss = criterion(heads, label)

        loss = 1*Cls_loss + Reg_loss*RegLossFactor
        loss.backward()
        optimizer.step()

        torch.cuda.synchronize()

        train_loss_add = train_loss_add + (loss.item())*len(img)
        Cls_loss_add = Cls_loss_add + (Cls_loss.item())*len(img)
        Reg_loss_add = Reg_loss_add + (Reg_loss.item())*len(img)

        # printing loss info
        if i%10 == 0:
            print('epoch: ',epoch, ' step: ', i, 'Cls_loss ',Cls_loss.item(), 'Reg_loss ',Reg_loss.item(), ' total loss ',loss.item())

    scheduler.step(epoch)

    # time taken
    torch.cuda.synchronize()
    timer = time.time() - timer
    timer = timer / TrainImgFrames
    print('==> time to learn 1 sample = %f (ms)' %(timer*1000))

    train_loss_add = train_loss_add / TrainImgFrames
    Cls_loss_add = Cls_loss_add / TrainImgFrames
    Reg_loss_add = Reg_loss_add / TrainImgFrames
    print('mean train_loss_add of 1 sample: %f, #train_indexes = %d' %(train_loss_add, TrainImgFrames))
    print('mean Cls_loss_add of 1 sample: %f, #train_indexes = %d' %(Cls_loss_add, TrainImgFrames))
    print('mean Reg_loss_add of 1 sample: %f, #train_indexes = %d' %(Reg_loss_add, TrainImgFrames))

    Error_test = 0
    Error_train = 0
    Error_test_wrist = 0

    if (epoch % 1 == 0):  
        net = net.eval()
        output = torch.FloatTensor()
        outputTrain = torch.FloatTensor()

        for i, (img, label) in tqdm(enumerate(test_dataloaders)):
            with torch.no_grad():
                img, label = img.cuda(), label.cuda()       
                heads = net(img)  
                pred_keypoints = post_precess(heads, voting=False)
                output = torch.cat([output,pred_keypoints.data.cpu()], 0)

        result = output.cpu().data.numpy()
        Error_test = errorCompute(result,keypointsWorldtest,)
        print('epoch: ', epoch, 'Test error:', Error_test)
        saveNamePrefix = '%s/net_%d_wetD_' % (save_dir, epoch) + str(Weight_Decay) + '_depFact_' + str(spatialFactor) + '_RegFact_' + str(RegLossFactor) + '_rndShft_' + str(RandCropShift)
        torch.save(net.state_dict(), saveNamePrefix + '.pth')

    # log
    logging.info('Epoch#%d: total loss=%.4f, Cls_loss=%.4f, Reg_loss=%.4f, Err_test=%.4f, lr = %.6f'
    %(epoch, train_loss_add, Cls_loss_add, Reg_loss_add, Error_test, scheduler.get_lr()[0]))

def test():
net = model.A2J_model(num_classes = keypointsNumber) net.load_state_dict(torch.load(model_dir)) net = net.cuda() net.eval()

post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None)

output = torch.FloatTensor()
torch.cuda.synchronize() 
for i, (img, label) in tqdm(enumerate(test_dataloaders)):    
    with torch.no_grad():

        img, label = img.cuda(), label.cuda()    
        heads = net(img)  
        pred_keypoints = post_precess(heads,voting=False)
        output = torch.cat([output,pred_keypoints.data.cpu()], 0)

torch.cuda.synchronize()       

result = output.cpu().data.numpy()
writeTxt(result)
error = errorCompute(result, keypointsWorldtest)
print('Error:', error)

def errorCompute(source, target): assert np.shape(source)==np.shape(target), "source has different shape with target"

Test1_ = source.copy()
target_ = target.copy()
Test1_[:, :, 0] = source[:,:,1]
Test1_[:, :, 1] = source[:,:,0]
Test1 = Test1_  # [x, y, z]

for i in range(len(Test1_)):

    Test1[i,:,0] = Test1_[i,:,0]*320/cropWidth  # x
    Test1[i,:,1] = Test1_[i,:,1]*240/cropHeight  # y
    Test1[i,:,2] = source[i,:,2]

labels = pixel2world(target_)
outputs = pixel2world(Test1.copy())

errors = np.sqrt(np.sum((labels - outputs) ** 2, axis=2))

return np.mean(errors)

def writeTxt(result):

resultUVD_ = result.copy()
resultUVD_[:, :, 0] = result[:,:,1]
resultUVD_[:, :, 1] = result[:,:,0]
resultUVD = resultUVD_  # [x, y, z]

for i in range(len(result)):

    resultUVD[i,:,0] = resultUVD_[i,:,0]*320/cropWidth  # x
    resultUVD[i,:,1] = resultUVD_[i,:,1]*240/cropHeight  # y
    resultUVD[i,:,2] = result[i,:,2]

resultReshape = resultUVD.reshape(len(result), -1)

with open(os.path.join(save_dir, result_file), 'w') as f:     
    for i in range(len(resultReshape)):
        for j in range(keypointsNumber*3):
            f.write(str(resultReshape[i, j])+' ')
        f.write('\n') 

f.close()

if name == 'main': train() test()

mstc-xqp commented 4 years ago

import cv2 import torch import torch.utils.data import torch.optim.lr_scheduler as lr_scheduler import numpy as np import scipy.io as scio import os from PIL import Image from torch.autograd import Variable import model as model import anchor as anchor from tqdm import tqdm import random_erasing import logging import time import datetime import random

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

DataHyperParms

TrainImgFrames = 1000 TestImgFrames = 100 keypointsNumber = 15 cropWidth = 288 cropHeight = 288 batch_size = 64 learning_rate = 0.00035 Weight_Decay = 1e-4 nepoch = 35 RegLossFactor = 3 spatialFactor = 0.5 RandCropShift = 5 RandshiftDepth = 1 RandRotate = 180 RandScale = (1.0, 0.5)

randomseed = 12345 random.seed(randomseed) np.random.seed(randomseed) torch.manual_seed(randomseed)

save_dir = ''

try: os.makedirs(save_dir) except OSError: pass

trainingImageDir = '' testingImageDir = '' # mat images keypointsfileTest = '' keypointsfileTrain = '' model_dir = '' result_file = 'result_test.txt'

def pixel2world(x): x[:, :, 0] = (x[:, :, 0] - 160.0) x[:, :, 2] 0.0035 x[:, :, 1] = (120.0 - x[:, :, 1]) x[:, :, 2] 0.0035 return x

def world2pixel(x): x[:, :, 0] = 160.0 + x[:, :, 0] / (x[:, :, 2] 0.0035) x[:, :, 1] = 120.0 - x[:, :, 1] / (x[:, :, 2] 0.0035) return x

joint_id_to_name = { 0: 'Head', 1: 'Neck', 2: 'RShoulder', 3: 'LShoulder', 4: 'RElbow', 5: 'LElbow', 6: 'RHand', 7: 'LHand', 8: 'Torso', 9: 'RHip', 10: 'LHip', 11: 'RKnee', 12: 'LKnee', 13: 'RFoot', 14: 'LFoot', }

loading GT keypoints and center points

keypointsWorldtest = scio.loadmat(keypointsfileTest)['keypoints3D'].astype(np.float32) keypointsPixeltest = np.ones((len(keypointsWorldtest),15,2),dtype='float32') keypointsPixeltest = world2pixel(keypointsWorldtest)

keypointsWorldtrain = scio.loadmat(keypointsfileTrain)['keypoints3D'].astype(np.float32) keypointsPixeltrain = np.ones((len(keypointsWorldtrain),15,2),dtype='float32') keypointsPixeltrain = world2pixel(keypointsWorldtrain)

def transform(img, label, matrix): ''' img: [H, W] label, [N,2] ''' img_out = cv2.warpAffine(img,matrix,(cropWidth,cropHeight)) label_out = np.ones((keypointsNumber, 3)) label_out[:,:2] = label[:,:2].copy() label_out = np.matmul(matrix, label_out.transpose()) label_out = label_out.transpose()

return img_out, label_out

def dataPreprocess(index, img, keypointsUVD, depth_thres=0.4, augment=True):

imageOutputs = np.ones((cropHeight, cropWidth, 1), dtype='float32') 
labelOutputs = np.ones((keypointsNumber, 3), dtype = 'float32') 

if augment:
    RandomOffset_1 = np.random.randint(-1*RandCropShift,RandCropShift)
    RandomOffset_2 = np.random.randint(-1*RandCropShift,RandCropShift)
    RandomOffset_3 = np.random.randint(-1*RandCropShift,RandCropShift)
    RandomOffset_4 = np.random.randint(-1*RandCropShift,RandCropShift)
    RandomOffsetDepth = np.random.normal(0, RandshiftDepth, cropHeight*cropWidth).reshape(cropHeight,cropWidth) 
    RandomOffsetDepth[np.where(RandomOffsetDepth < RandshiftDepth)] = 0
    RandomRotate = np.random.randint(-1*RandRotate,RandRotate)
    RandomScale = np.random.rand()*RandScale[0]+RandScale[1]
    matrix = cv2.getRotationMatrix2D((cropWidth/2,cropHeight/2),RandomRotate,RandomScale)
else:
    RandomOffset_1, RandomOffset_2, RandomOffset_3, RandomOffset_4 = 0, 0, 0, 0
    RandomRotate = 0
    RandomScale = 1
    RandomOffsetDepth = 0
    matrix = cv2.getRotationMatrix2D((cropWidth/2,cropHeight/2),RandomRotate,RandomScale)

imCrop = img[:, :].copy()

imgResize = cv2.resize(imCrop, (cropWidth, cropHeight), interpolation=cv2.INTER_NEAREST)

imgResize = np.asarray(imgResize,dtype = 'float32')  # H*W*C
imgResize = imgResize / 5

## label
label_xy = np.ones((keypointsNumber, 2), dtype = 'float32') 
label_xy[:,0] = keypointsUVD[index,:,0].copy()*cropWidth/320 # x
label_xy[:,1] = keypointsUVD[index,:,1].copy()*cropHeight/240 # y

if augment:
    imgResize, label_xy = transform(imgResize, label_xy, matrix)  ## rotation, scale

imageOutputs[:,:,0] = imgResize

labelOutputs[:,1] = label_xy[:,0]
labelOutputs[:,0] = label_xy[:,1]
labelOutputs[:,2] = (keypointsUVD[index,:,2])*RandomScale   # Z  

imageOutputs = np.asarray(imageOutputs)
imageNCHWOut = imageOutputs.transpose(2, 0, 1)  # [H, W, C] --->>>  [C, H, W]
imageNCHWOut = np.asarray(imageNCHWOut)
labelOutputs = np.asarray(labelOutputs)

data, label = torch.from_numpy(imageNCHWOut), torch.from_numpy(labelOutputs)

return data, label

###################### Pytorch dataloader ################# class my_dataloader(torch.utils.data.Dataset):

def __init__(self, ImgDir, keypointsUVD, num, augment=True):

    self.ImgDir = ImgDir
    self.keypointsUVD = keypointsUVD
    self.num = num
    self.augment = augment
    self.randomErase = random_erasing.RandomErasing(probability = 0.5, sl = 0.02, sh = 0.4, r1 = 0.3, mean=[0])

def __getitem__(self, index):

    data4D = scio.loadmat(self.ImgDir + str(index+1) + '.mat')['DepthNormal']
    depth = data4D[:,:]

    data, label = dataPreprocess(index, depth, self.keypointsUVD, self.augment)

    if self.augment:
        data = self.randomErase(data)

    return data, label

def __len__(self):
    return self.num

train_image_datasets = my_dataloader(trainingImageDir, keypointsWorldtrain, TrainImgFrames, augment=True) train_dataloaders = torch.utils.data.DataLoader(train_image_datasets, batch_size = batch_size, shuffle = True, num_workers = 8)

test_image_datasets = my_dataloader(testingImageDir, keypointsWorldtest, TestImgFrames, augment=False) test_dataloaders = torch.utils.data.DataLoader(test_image_datasets, batch_size = batch_size, shuffle = False, num_workers = 8)

def train():

net = model.A2J_model(num_classes = keypointsNumber)
net = net.cuda()

post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None)
criterion = anchor.A2J_loss(shape=[cropHeight//16,cropWidth//16],thres = [16.0,32.0],stride=16,\
    spatialFactor=spatialFactor,img_shape=[cropHeight, cropWidth],P_h=None, P_w=None)
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, weight_decay=Weight_Decay)
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.2)

logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%Y/%m/%d %H:%M:%S', \
                filename=os.path.join(save_dir, 'train.log'), level=logging.INFO)
logging.info('======================================================')

for epoch in range(nepoch):
    net = net.train()
    train_loss_add = 0.0
    Cls_loss_add = 0.0
    Reg_loss_add = 0.0
    timer = time.time()

    # Training loop
    for i, (img, label) in enumerate(train_dataloaders):

        torch.cuda.synchronize() 

        img, label = img.cuda(), label.cuda()
        heads  = net(img)
        #print(regression)     
        optimizer.zero_grad()  

        Cls_loss, Reg_loss = criterion(heads, label)

        loss = 1*Cls_loss + Reg_loss*RegLossFactor
        loss.backward()
        optimizer.step()

        torch.cuda.synchronize()

        train_loss_add = train_loss_add + (loss.item())*len(img)
        Cls_loss_add = Cls_loss_add + (Cls_loss.item())*len(img)
        Reg_loss_add = Reg_loss_add + (Reg_loss.item())*len(img)

        # printing loss info
        if i%10 == 0:
            print('epoch: ',epoch, ' step: ', i, 'Cls_loss ',Cls_loss.item(), 'Reg_loss ',Reg_loss.item(), ' total loss ',loss.item())

    scheduler.step(epoch)

    # time taken
    torch.cuda.synchronize()
    timer = time.time() - timer
    timer = timer / TrainImgFrames
    print('==> time to learn 1 sample = %f (ms)' %(timer*1000))

    train_loss_add = train_loss_add / TrainImgFrames
    Cls_loss_add = Cls_loss_add / TrainImgFrames
    Reg_loss_add = Reg_loss_add / TrainImgFrames
    print('mean train_loss_add of 1 sample: %f, #train_indexes = %d' %(train_loss_add, TrainImgFrames))
    print('mean Cls_loss_add of 1 sample: %f, #train_indexes = %d' %(Cls_loss_add, TrainImgFrames))
    print('mean Reg_loss_add of 1 sample: %f, #train_indexes = %d' %(Reg_loss_add, TrainImgFrames))

    Error_test = 0
    Error_train = 0
    Error_test_wrist = 0

    if (epoch % 1 == 0):  
        net = net.eval()
        output = torch.FloatTensor()
        outputTrain = torch.FloatTensor()

        for i, (img, label) in tqdm(enumerate(test_dataloaders)):
            with torch.no_grad():
                img, label = img.cuda(), label.cuda()       
                heads = net(img)  
                pred_keypoints = post_precess(heads, voting=False)
                output = torch.cat([output,pred_keypoints.data.cpu()], 0)

        result = output.cpu().data.numpy()
        Error_test = errorCompute(result,keypointsWorldtest,)
        print('epoch: ', epoch, 'Test error:', Error_test)
        saveNamePrefix = '%s/net_%d_wetD_' % (save_dir, epoch) + str(Weight_Decay) + '_depFact_' + str(spatialFactor) + '_RegFact_' + str(RegLossFactor) + '_rndShft_' + str(RandCropShift)
        torch.save(net.state_dict(), saveNamePrefix + '.pth')

    # log
    logging.info('Epoch#%d: total loss=%.4f, Cls_loss=%.4f, Reg_loss=%.4f, Err_test=%.4f, lr = %.6f'
    %(epoch, train_loss_add, Cls_loss_add, Reg_loss_add, Error_test, scheduler.get_lr()[0]))

def test(): net = model.A2J_model(num_classes = keypointsNumber) net.load_state_dict(torch.load(model_dir)) net = net.cuda() net.eval()

post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None)

output = torch.FloatTensor()
torch.cuda.synchronize() 
for i, (img, label) in tqdm(enumerate(test_dataloaders)):    
    with torch.no_grad():

        img, label = img.cuda(), label.cuda()    
        heads = net(img)  
        pred_keypoints = post_precess(heads,voting=False)
        output = torch.cat([output,pred_keypoints.data.cpu()], 0)

torch.cuda.synchronize()       

result = output.cpu().data.numpy()
writeTxt(result)
error = errorCompute(result, keypointsWorldtest)
print('Error:', error)

def errorCompute(source, target): assert np.shape(source)==np.shape(target), "source has different shape with target"

Test1_ = source.copy()
target_ = target.copy()
Test1_[:, :, 0] = source[:,:,1]
Test1_[:, :, 1] = source[:,:,0]
Test1 = Test1_  # [x, y, z]

for i in range(len(Test1_)):

    Test1[i,:,0] = Test1_[i,:,0]*320/cropWidth  # x
    Test1[i,:,1] = Test1_[i,:,1]*240/cropHeight  # y
    Test1[i,:,2] = source[i,:,2]

labels = pixel2world(target_)
outputs = pixel2world(Test1.copy())

errors = np.sqrt(np.sum((labels - outputs) ** 2, axis=2))

return np.mean(errors)

def writeTxt(result):

resultUVD_ = result.copy()
resultUVD_[:, :, 0] = result[:,:,1]
resultUVD_[:, :, 1] = result[:,:,0]
resultUVD = resultUVD_  # [x, y, z]

for i in range(len(result)):

    resultUVD[i,:,0] = resultUVD_[i,:,0]*320/cropWidth  # x
    resultUVD[i,:,1] = resultUVD_[i,:,1]*240/cropHeight  # y
    resultUVD[i,:,2] = result[i,:,2]

resultReshape = resultUVD.reshape(len(result), -1)

with open(os.path.join(save_dir, result_file), 'w') as f:     
    for i in range(len(resultReshape)):
        for j in range(keypointsNumber*3):
            f.write(str(resultReshape[i, j])+' ')
        f.write('\n') 

f.close()

if name == 'main': train() test() Thank you very much!! bro! mind i add your email or skype .my email is 15619217958@163.com

mstc-xqp commented 4 years ago

If you don't to use bndbox, you may train the data by yourself, or modify the train code.

Hi bro.Can u share the itop model or k2phd model. best regards!

Shreyas-NR commented 2 years ago

Hi @zeroXscorpion7 ,

  1. Were you able to utilize this model to predict the Joints for a custom dataset?
  2. I'm also trying to pass one depth frame along with the ITOP side dataset and change the mean value so that the input depth frame to the model matches with the ITOP_side dataset depth frame. Unfortunately, the results are very bad.
  3. Could you tell me if you were able to do something more on this?