Open zeroXscorpion7 opened 4 years ago
Hi, @zeroXscorpion7 . You can try inference without bndbox, but, the performance can not be guaranteed, some skeleton flex is very likely to happen because of the mean/std shift.
I want to test the efficacy in real-time, how do I identify one picture at a time?
def main():
net = model.A2J_model(num_classes = keypointsNumber)
net.load_state_dict(torch.load(model_dir))
net = net.cuda()
net.eval()
post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None)
output = torch.FloatTensor()
data4DTemp = scio.loadmat(testingImageDir + str(1) + '.mat')['DepthNormal']
depthTemp = data4DTemp[:,:,3]
img=depthTemp
heads = net(img)
pred_keypoints = post_precess(heads,voting=False)
output = torch.cat([output,pred_keypoints.data.cpu()], 0)
result = output.cpu().data.numpy()
assert np.shape(result)==np.shape(result), "source has different shape with target"
Test1_ = np.zeros(result.shape)
Test1_[:, 0] = result[:,1]
Test1_[:, 1] = result[:,0]
Test1_[:, 2] = result[:,2]
Test1 = Test1_
Test1[:,0] = Test1_[:,0]*(Bndbox[i,2]-Bndbox[i,0])/cropWidth + Bndbox[i,0] # x
Test1[:,1] = Test1_[:,1]*(Bndbox[i,3]-Bndbox[i,1])/cropHeight + Bndbox[i,1] # y
Test1[:,2] = Test1_[:,2]/depthFactor
TestWorld = np.ones((len(Test1),keypointsNumber,3))
TestWorld_tuple = pixel2world(Test1[:,0],Test1[:,1],Test1[:,2])
X=np.zeros((15),np.uint8)
Y=np.zeros((15),np.uint8)
for j in range(keypointsNumber):
X[j],Y[j]=world2pixel(TestWorld[0,j,0],TestWorld[0,j,1],TestWorld[0,j,2])
IMGX=np.zeros((240,320,3),np.uint8)
cv2.line(IMGX,(X[0],Y[0]),(X[1],Y[1]),(0,0,255),2)
cv2.line(IMGX,(X[1],Y[1]),(X[2],Y[2]),(0,0,255),2)
cv2.line(IMGX,(X[1],Y[1]),(X[3],Y[3]),(0,0,255),2)
cv2.line(IMGX,(X[1],Y[1]),(X[8],Y[8]),(0,0,255),2)
cv2.line(IMGX,(X[2],Y[2]),(X[4],Y[4]),(0,0,255),2)
cv2.line(IMGX,(X[4],Y[4]),(X[6],Y[6]),(0,0,255),2)
cv2.line(IMGX,(X[3],Y[3]),(X[5],Y[5]),(0,0,255),2)
cv2.line(IMGX,(X[5],Y[5]),(X[7],Y[7]),(0,0,255),2)
cv2.line(IMGX,(X[8],Y[8]),(X[9],Y[9]),(0,0,255),2)
cv2.line(IMGX,(X[8],Y[8]),(X[10],Y[10]),(0,0,255),2)
cv2.line(IMGX,(X[9],Y[9]),(X[11],Y[11]),(0,0,255),2)
cv2.line(IMGX,(X[11],Y[11]),(X[13],Y[13]),(0,0,255),2)
cv2.line(IMGX,(X[10],Y[10]),(X[12],Y[12]),(0,0,255),2)
cv2.line(IMGX,(X[12],Y[12]),(X[14],Y[14]),(0,0,255),2)
for i in range(keypointsNumber):
cv2.circle(IMGX,(X[i],Y[i]),4,(255,255,255),-1)
cv2.imshow('img',IMGX)
cv2.waitKey(0)
cv2.destroyAllWindows()
This is the code I edited, but it has some promble. How do I edit it?
def main():
net = model.A2J_model(num_classes = keypointsNumber) net.load_state_dict(torch.load(model_dir)) net = net.cuda() net.eval() post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None) output = torch.FloatTensor() data4DTemp = scio.loadmat(testingImageDir + str(1) + '.mat')['DepthNormal'] depthTemp = data4DTemp[:,:,3] img=depthTemp heads = net(img) pred_keypoints = post_precess(heads,voting=False) output = torch.cat([output,pred_keypoints.data.cpu()], 0) result = output.cpu().data.numpy() assert np.shape(result)==np.shape(result), "source has different shape with target" Test1_ = np.zeros(result.shape) Test1_[:, 0] = result[:,1] Test1_[:, 1] = result[:,0] Test1_[:, 2] = result[:,2] Test1 = Test1_ Test1[:,0] = Test1_[:,0]*(Bndbox[i,2]-Bndbox[i,0])/cropWidth + Bndbox[i,0] # x Test1[:,1] = Test1_[:,1]*(Bndbox[i,3]-Bndbox[i,1])/cropHeight + Bndbox[i,1] # y Test1[:,2] = Test1_[:,2]/depthFactor TestWorld = np.ones((len(Test1),keypointsNumber,3)) TestWorld_tuple = pixel2world(Test1[:,0],Test1[:,1],Test1[:,2]) X=np.zeros((15),np.uint8) Y=np.zeros((15),np.uint8) for j in range(keypointsNumber): X[j],Y[j]=world2pixel(TestWorld[0,j,0],TestWorld[0,j,1],TestWorld[0,j,2]) IMGX=np.zeros((240,320,3),np.uint8) cv2.line(IMGX,(X[0],Y[0]),(X[1],Y[1]),(0,0,255),2) cv2.line(IMGX,(X[1],Y[1]),(X[2],Y[2]),(0,0,255),2) cv2.line(IMGX,(X[1],Y[1]),(X[3],Y[3]),(0,0,255),2) cv2.line(IMGX,(X[1],Y[1]),(X[8],Y[8]),(0,0,255),2) cv2.line(IMGX,(X[2],Y[2]),(X[4],Y[4]),(0,0,255),2) cv2.line(IMGX,(X[4],Y[4]),(X[6],Y[6]),(0,0,255),2) cv2.line(IMGX,(X[3],Y[3]),(X[5],Y[5]),(0,0,255),2) cv2.line(IMGX,(X[5],Y[5]),(X[7],Y[7]),(0,0,255),2) cv2.line(IMGX,(X[8],Y[8]),(X[9],Y[9]),(0,0,255),2) cv2.line(IMGX,(X[8],Y[8]),(X[10],Y[10]),(0,0,255),2) cv2.line(IMGX,(X[9],Y[9]),(X[11],Y[11]),(0,0,255),2) cv2.line(IMGX,(X[11],Y[11]),(X[13],Y[13]),(0,0,255),2) cv2.line(IMGX,(X[10],Y[10]),(X[12],Y[12]),(0,0,255),2) cv2.line(IMGX,(X[12],Y[12]),(X[14],Y[14]),(0,0,255),2) for i in range(keypointsNumber): cv2.circle(IMGX,(X[i],Y[i]),4,(255,255,255),-1) cv2.imshow('img',IMGX) cv2.waitKey(0) cv2.destroyAllWindows()
This is the code I edited, but it has some promble. How do I edit it?
do u solove it ?
I remove my_dataloader and enter depthTemp into dataPreprocess, then I use torch.from_numpy to make it to tensor
def dataPreprocess(img, depth_thres=0.4):
imageOutputs = np.ones((cropHeight, cropWidth, 1), dtype='float32')
imCrop = img.copy()[:, :]
imgResize = cv2.resize(imCrop, (cropWidth, cropHeight), interpolation=cv2.INTER_NEAREST)
imgResize = np.asarray(imgResize,dtype = 'float32') # H*W*C
imgResize = imgResize /5
imageOutputs[:,:,0] = imgResize
imageOutputs = np.asarray(imageOutputs)
imageNCHWOut = imageOutputs.transpose(2, 0, 1) # [H, W, C] --->>> [C, H, W]
imageNCHWOut = np.asarray(imageNCHWOut)
data = torch.from_numpy(imageNCHWOut)
return data
img=np.zeros((1,1,288,288),np.float32) img[0,:,:,:]= dataPreprocess(depth_map, 0.4) img=torch.from_numpy(img) post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None) output = torch.FloatTensor()
Like these.
I remove my_dataloader and enter depthTemp into dataPreprocess, then I use torch.from_numpy to make it to tensor
def dataPreprocess(img, depth_thres=0.4):
imageOutputs = np.ones((cropHeight, cropWidth, 1), dtype='float32') imCrop = img.copy()[:, :] imgResize = cv2.resize(imCrop, (cropWidth, cropHeight), interpolation=cv2.INTER_NEAREST) imgResize = np.asarray(imgResize,dtype = 'float32') # H*W*C imgResize = imgResize /5 imageOutputs[:,:,0] = imgResize imageOutputs = np.asarray(imageOutputs) imageNCHWOut = imageOutputs.transpose(2, 0, 1) # [H, W, C] --->>> [C, H, W] imageNCHWOut = np.asarray(imageNCHWOut) data = torch.from_numpy(imageNCHWOut) return data
img=np.zeros((1,1,288,288),np.float32) img[0,:,:,:]= dataPreprocess(depth_map, 0.4) img=torch.from_numpy(img) post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None) output = torch.FloatTensor()
Like these.
thank you !! I am trying to use this model to Identify my pictures, if the bndbox is need? i see u ask that .Did u try that? what is the performace
If you don't to use bndbox, you may train the data by yourself, or modify the train code.
If you don't to use bndbox, you may train the data by yourself, or modify the train code.
I see. Can u share the train code? you train on ITOP or K2PHD?
I have seen some people use just use depth map to train a alphapose or openpose model .
import cv2 import torch import torch.utils.data import torch.optim.lr_scheduler as lr_scheduler import numpy as np import scipy.io as scio import os from PIL import Image from torch.autograd import Variable import model as model import anchor as anchor from tqdm import tqdm import random_erasing import logging import time import datetime import random
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
TrainImgFrames = 1000 TestImgFrames = 100 keypointsNumber = 15 cropWidth = 288 cropHeight = 288 batch_size = 64 learning_rate = 0.00035 Weight_Decay = 1e-4 nepoch = 35 RegLossFactor = 3 spatialFactor = 0.5 RandCropShift = 5 RandshiftDepth = 1 RandRotate = 180 RandScale = (1.0, 0.5)
randomseed = 12345 random.seed(randomseed) np.random.seed(randomseed) torch.manual_seed(randomseed)
save_dir = ''
try: os.makedirs(save_dir) except OSError: pass
trainingImageDir = '' testingImageDir = '' # mat images keypointsfileTest = '' keypointsfileTrain = '' model_dir = '' result_file = 'result_test.txt'
def pixel2world(x): x[:, :, 0] = (x[:, :, 0] - 160.0) x[:, :, 2] 0.0035 x[:, :, 1] = (120.0 - x[:, :, 1]) x[:, :, 2] 0.0035 return x
def world2pixel(x): x[:, :, 0] = 160.0 + x[:, :, 0] / (x[:, :, 2] 0.0035) x[:, :, 1] = 120.0 - x[:, :, 1] / (x[:, :, 2] 0.0035) return x
joint_id_to_name = { 0: 'Head', 1: 'Neck', 2: 'RShoulder', 3: 'LShoulder', 4: 'RElbow', 5: 'LElbow', 6: 'RHand', 7: 'LHand', 8: 'Torso', 9: 'RHip', 10: 'LHip', 11: 'RKnee', 12: 'LKnee', 13: 'RFoot', 14: 'LFoot', }
keypointsWorldtest = scio.loadmat(keypointsfileTest)['keypoints3D'].astype(np.float32) keypointsPixeltest = np.ones((len(keypointsWorldtest),15,2),dtype='float32') keypointsPixeltest = world2pixel(keypointsWorldtest)
keypointsWorldtrain = scio.loadmat(keypointsfileTrain)['keypoints3D'].astype(np.float32) keypointsPixeltrain = np.ones((len(keypointsWorldtrain),15,2),dtype='float32') keypointsPixeltrain = world2pixel(keypointsWorldtrain)
def transform(img, label, matrix):
'''
img: [H, W] label, [N,2]
'''
img_out = cv2.warpAffine(img,matrix,(cropWidth,cropHeight))
label_out = np.ones((keypointsNumber, 3))
label_out[:,:2] = label[:,:2].copy()
label_out = np.matmul(matrix, label_out.transpose())
label_out = label_out.transpose()
return img_out, label_out
def dataPreprocess(index, img, keypointsUVD, depth_thres=0.4, augment=True):
imageOutputs = np.ones((cropHeight, cropWidth, 1), dtype='float32')
labelOutputs = np.ones((keypointsNumber, 3), dtype = 'float32')
if augment:
RandomOffset_1 = np.random.randint(-1*RandCropShift,RandCropShift)
RandomOffset_2 = np.random.randint(-1*RandCropShift,RandCropShift)
RandomOffset_3 = np.random.randint(-1*RandCropShift,RandCropShift)
RandomOffset_4 = np.random.randint(-1*RandCropShift,RandCropShift)
RandomOffsetDepth = np.random.normal(0, RandshiftDepth, cropHeight*cropWidth).reshape(cropHeight,cropWidth)
RandomOffsetDepth[np.where(RandomOffsetDepth < RandshiftDepth)] = 0
RandomRotate = np.random.randint(-1*RandRotate,RandRotate)
RandomScale = np.random.rand()*RandScale[0]+RandScale[1]
matrix = cv2.getRotationMatrix2D((cropWidth/2,cropHeight/2),RandomRotate,RandomScale)
else:
RandomOffset_1, RandomOffset_2, RandomOffset_3, RandomOffset_4 = 0, 0, 0, 0
RandomRotate = 0
RandomScale = 1
RandomOffsetDepth = 0
matrix = cv2.getRotationMatrix2D((cropWidth/2,cropHeight/2),RandomRotate,RandomScale)
imCrop = img[:, :].copy()
imgResize = cv2.resize(imCrop, (cropWidth, cropHeight), interpolation=cv2.INTER_NEAREST)
imgResize = np.asarray(imgResize,dtype = 'float32') # H*W*C
imgResize = imgResize / 5
## label
label_xy = np.ones((keypointsNumber, 2), dtype = 'float32')
label_xy[:,0] = keypointsUVD[index,:,0].copy()*cropWidth/320 # x
label_xy[:,1] = keypointsUVD[index,:,1].copy()*cropHeight/240 # y
if augment:
imgResize, label_xy = transform(imgResize, label_xy, matrix) ## rotation, scale
imageOutputs[:,:,0] = imgResize
labelOutputs[:,1] = label_xy[:,0]
labelOutputs[:,0] = label_xy[:,1]
labelOutputs[:,2] = (keypointsUVD[index,:,2])*RandomScale # Z
imageOutputs = np.asarray(imageOutputs)
imageNCHWOut = imageOutputs.transpose(2, 0, 1) # [H, W, C] --->>> [C, H, W]
imageNCHWOut = np.asarray(imageNCHWOut)
labelOutputs = np.asarray(labelOutputs)
data, label = torch.from_numpy(imageNCHWOut), torch.from_numpy(labelOutputs)
return data, label
###################### Pytorch dataloader ################# class my_dataloader(torch.utils.data.Dataset):
def __init__(self, ImgDir, keypointsUVD, num, augment=True):
self.ImgDir = ImgDir
self.keypointsUVD = keypointsUVD
self.num = num
self.augment = augment
self.randomErase = random_erasing.RandomErasing(probability = 0.5, sl = 0.02, sh = 0.4, r1 = 0.3, mean=[0])
def __getitem__(self, index):
data4D = scio.loadmat(self.ImgDir + str(index+1) + '.mat')['DepthNormal']
depth = data4D[:,:]
data, label = dataPreprocess(index, depth, self.keypointsUVD, self.augment)
if self.augment:
data = self.randomErase(data)
return data, label
def __len__(self):
return self.num
train_image_datasets = my_dataloader(trainingImageDir, keypointsWorldtrain, TrainImgFrames, augment=True) train_dataloaders = torch.utils.data.DataLoader(train_image_datasets, batch_size = batch_size, shuffle = True, num_workers = 8)
test_image_datasets = my_dataloader(testingImageDir, keypointsWorldtest, TestImgFrames, augment=False) test_dataloaders = torch.utils.data.DataLoader(test_image_datasets, batch_size = batch_size, shuffle = False, num_workers = 8)
def train():
net = model.A2J_model(num_classes = keypointsNumber)
net = net.cuda()
post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None)
criterion = anchor.A2J_loss(shape=[cropHeight//16,cropWidth//16],thres = [16.0,32.0],stride=16,\
spatialFactor=spatialFactor,img_shape=[cropHeight, cropWidth],P_h=None, P_w=None)
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, weight_decay=Weight_Decay)
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.2)
logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%Y/%m/%d %H:%M:%S', \
filename=os.path.join(save_dir, 'train.log'), level=logging.INFO)
logging.info('======================================================')
for epoch in range(nepoch):
net = net.train()
train_loss_add = 0.0
Cls_loss_add = 0.0
Reg_loss_add = 0.0
timer = time.time()
# Training loop
for i, (img, label) in enumerate(train_dataloaders):
torch.cuda.synchronize()
img, label = img.cuda(), label.cuda()
heads = net(img)
#print(regression)
optimizer.zero_grad()
Cls_loss, Reg_loss = criterion(heads, label)
loss = 1*Cls_loss + Reg_loss*RegLossFactor
loss.backward()
optimizer.step()
torch.cuda.synchronize()
train_loss_add = train_loss_add + (loss.item())*len(img)
Cls_loss_add = Cls_loss_add + (Cls_loss.item())*len(img)
Reg_loss_add = Reg_loss_add + (Reg_loss.item())*len(img)
# printing loss info
if i%10 == 0:
print('epoch: ',epoch, ' step: ', i, 'Cls_loss ',Cls_loss.item(), 'Reg_loss ',Reg_loss.item(), ' total loss ',loss.item())
scheduler.step(epoch)
# time taken
torch.cuda.synchronize()
timer = time.time() - timer
timer = timer / TrainImgFrames
print('==> time to learn 1 sample = %f (ms)' %(timer*1000))
train_loss_add = train_loss_add / TrainImgFrames
Cls_loss_add = Cls_loss_add / TrainImgFrames
Reg_loss_add = Reg_loss_add / TrainImgFrames
print('mean train_loss_add of 1 sample: %f, #train_indexes = %d' %(train_loss_add, TrainImgFrames))
print('mean Cls_loss_add of 1 sample: %f, #train_indexes = %d' %(Cls_loss_add, TrainImgFrames))
print('mean Reg_loss_add of 1 sample: %f, #train_indexes = %d' %(Reg_loss_add, TrainImgFrames))
Error_test = 0
Error_train = 0
Error_test_wrist = 0
if (epoch % 1 == 0):
net = net.eval()
output = torch.FloatTensor()
outputTrain = torch.FloatTensor()
for i, (img, label) in tqdm(enumerate(test_dataloaders)):
with torch.no_grad():
img, label = img.cuda(), label.cuda()
heads = net(img)
pred_keypoints = post_precess(heads, voting=False)
output = torch.cat([output,pred_keypoints.data.cpu()], 0)
result = output.cpu().data.numpy()
Error_test = errorCompute(result,keypointsWorldtest,)
print('epoch: ', epoch, 'Test error:', Error_test)
saveNamePrefix = '%s/net_%d_wetD_' % (save_dir, epoch) + str(Weight_Decay) + '_depFact_' + str(spatialFactor) + '_RegFact_' + str(RegLossFactor) + '_rndShft_' + str(RandCropShift)
torch.save(net.state_dict(), saveNamePrefix + '.pth')
# log
logging.info('Epoch#%d: total loss=%.4f, Cls_loss=%.4f, Reg_loss=%.4f, Err_test=%.4f, lr = %.6f'
%(epoch, train_loss_add, Cls_loss_add, Reg_loss_add, Error_test, scheduler.get_lr()[0]))
def test():
net = model.A2J_model(num_classes = keypointsNumber)
net.load_state_dict(torch.load(model_dir))
net = net.cuda()
net.eval()
post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None)
output = torch.FloatTensor()
torch.cuda.synchronize()
for i, (img, label) in tqdm(enumerate(test_dataloaders)):
with torch.no_grad():
img, label = img.cuda(), label.cuda()
heads = net(img)
pred_keypoints = post_precess(heads,voting=False)
output = torch.cat([output,pred_keypoints.data.cpu()], 0)
torch.cuda.synchronize()
result = output.cpu().data.numpy()
writeTxt(result)
error = errorCompute(result, keypointsWorldtest)
print('Error:', error)
def errorCompute(source, target): assert np.shape(source)==np.shape(target), "source has different shape with target"
Test1_ = source.copy()
target_ = target.copy()
Test1_[:, :, 0] = source[:,:,1]
Test1_[:, :, 1] = source[:,:,0]
Test1 = Test1_ # [x, y, z]
for i in range(len(Test1_)):
Test1[i,:,0] = Test1_[i,:,0]*320/cropWidth # x
Test1[i,:,1] = Test1_[i,:,1]*240/cropHeight # y
Test1[i,:,2] = source[i,:,2]
labels = pixel2world(target_)
outputs = pixel2world(Test1.copy())
errors = np.sqrt(np.sum((labels - outputs) ** 2, axis=2))
return np.mean(errors)
def writeTxt(result):
resultUVD_ = result.copy()
resultUVD_[:, :, 0] = result[:,:,1]
resultUVD_[:, :, 1] = result[:,:,0]
resultUVD = resultUVD_ # [x, y, z]
for i in range(len(result)):
resultUVD[i,:,0] = resultUVD_[i,:,0]*320/cropWidth # x
resultUVD[i,:,1] = resultUVD_[i,:,1]*240/cropHeight # y
resultUVD[i,:,2] = result[i,:,2]
resultReshape = resultUVD.reshape(len(result), -1)
with open(os.path.join(save_dir, result_file), 'w') as f:
for i in range(len(resultReshape)):
for j in range(keypointsNumber*3):
f.write(str(resultReshape[i, j])+' ')
f.write('\n')
f.close()
if name == 'main': train() test()
import cv2 import torch import torch.utils.data import torch.optim.lr_scheduler as lr_scheduler import numpy as np import scipy.io as scio import os from PIL import Image from torch.autograd import Variable import model as model import anchor as anchor from tqdm import tqdm import random_erasing import logging import time import datetime import random
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
DataHyperParms
TrainImgFrames = 1000 TestImgFrames = 100 keypointsNumber = 15 cropWidth = 288 cropHeight = 288 batch_size = 64 learning_rate = 0.00035 Weight_Decay = 1e-4 nepoch = 35 RegLossFactor = 3 spatialFactor = 0.5 RandCropShift = 5 RandshiftDepth = 1 RandRotate = 180 RandScale = (1.0, 0.5)
randomseed = 12345 random.seed(randomseed) np.random.seed(randomseed) torch.manual_seed(randomseed)
save_dir = ''
try: os.makedirs(save_dir) except OSError: pass
trainingImageDir = '' testingImageDir = '' # mat images keypointsfileTest = '' keypointsfileTrain = '' model_dir = '' result_file = 'result_test.txt'
def pixel2world(x): x[:, :, 0] = (x[:, :, 0] - 160.0) x[:, :, 2] 0.0035 x[:, :, 1] = (120.0 - x[:, :, 1]) x[:, :, 2] 0.0035 return x
def world2pixel(x): x[:, :, 0] = 160.0 + x[:, :, 0] / (x[:, :, 2] 0.0035) x[:, :, 1] = 120.0 - x[:, :, 1] / (x[:, :, 2] 0.0035) return x
joint_id_to_name = { 0: 'Head', 1: 'Neck', 2: 'RShoulder', 3: 'LShoulder', 4: 'RElbow', 5: 'LElbow', 6: 'RHand', 7: 'LHand', 8: 'Torso', 9: 'RHip', 10: 'LHip', 11: 'RKnee', 12: 'LKnee', 13: 'RFoot', 14: 'LFoot', }
loading GT keypoints and center points
keypointsWorldtest = scio.loadmat(keypointsfileTest)['keypoints3D'].astype(np.float32) keypointsPixeltest = np.ones((len(keypointsWorldtest),15,2),dtype='float32') keypointsPixeltest = world2pixel(keypointsWorldtest)
keypointsWorldtrain = scio.loadmat(keypointsfileTrain)['keypoints3D'].astype(np.float32) keypointsPixeltrain = np.ones((len(keypointsWorldtrain),15,2),dtype='float32') keypointsPixeltrain = world2pixel(keypointsWorldtrain)
def transform(img, label, matrix): ''' img: [H, W] label, [N,2] ''' img_out = cv2.warpAffine(img,matrix,(cropWidth,cropHeight)) label_out = np.ones((keypointsNumber, 3)) label_out[:,:2] = label[:,:2].copy() label_out = np.matmul(matrix, label_out.transpose()) label_out = label_out.transpose()
return img_out, label_out
def dataPreprocess(index, img, keypointsUVD, depth_thres=0.4, augment=True):
imageOutputs = np.ones((cropHeight, cropWidth, 1), dtype='float32') labelOutputs = np.ones((keypointsNumber, 3), dtype = 'float32') if augment: RandomOffset_1 = np.random.randint(-1*RandCropShift,RandCropShift) RandomOffset_2 = np.random.randint(-1*RandCropShift,RandCropShift) RandomOffset_3 = np.random.randint(-1*RandCropShift,RandCropShift) RandomOffset_4 = np.random.randint(-1*RandCropShift,RandCropShift) RandomOffsetDepth = np.random.normal(0, RandshiftDepth, cropHeight*cropWidth).reshape(cropHeight,cropWidth) RandomOffsetDepth[np.where(RandomOffsetDepth < RandshiftDepth)] = 0 RandomRotate = np.random.randint(-1*RandRotate,RandRotate) RandomScale = np.random.rand()*RandScale[0]+RandScale[1] matrix = cv2.getRotationMatrix2D((cropWidth/2,cropHeight/2),RandomRotate,RandomScale) else: RandomOffset_1, RandomOffset_2, RandomOffset_3, RandomOffset_4 = 0, 0, 0, 0 RandomRotate = 0 RandomScale = 1 RandomOffsetDepth = 0 matrix = cv2.getRotationMatrix2D((cropWidth/2,cropHeight/2),RandomRotate,RandomScale) imCrop = img[:, :].copy() imgResize = cv2.resize(imCrop, (cropWidth, cropHeight), interpolation=cv2.INTER_NEAREST) imgResize = np.asarray(imgResize,dtype = 'float32') # H*W*C imgResize = imgResize / 5 ## label label_xy = np.ones((keypointsNumber, 2), dtype = 'float32') label_xy[:,0] = keypointsUVD[index,:,0].copy()*cropWidth/320 # x label_xy[:,1] = keypointsUVD[index,:,1].copy()*cropHeight/240 # y if augment: imgResize, label_xy = transform(imgResize, label_xy, matrix) ## rotation, scale imageOutputs[:,:,0] = imgResize labelOutputs[:,1] = label_xy[:,0] labelOutputs[:,0] = label_xy[:,1] labelOutputs[:,2] = (keypointsUVD[index,:,2])*RandomScale # Z imageOutputs = np.asarray(imageOutputs) imageNCHWOut = imageOutputs.transpose(2, 0, 1) # [H, W, C] --->>> [C, H, W] imageNCHWOut = np.asarray(imageNCHWOut) labelOutputs = np.asarray(labelOutputs) data, label = torch.from_numpy(imageNCHWOut), torch.from_numpy(labelOutputs) return data, label
###################### Pytorch dataloader ################# class my_dataloader(torch.utils.data.Dataset):
def __init__(self, ImgDir, keypointsUVD, num, augment=True): self.ImgDir = ImgDir self.keypointsUVD = keypointsUVD self.num = num self.augment = augment self.randomErase = random_erasing.RandomErasing(probability = 0.5, sl = 0.02, sh = 0.4, r1 = 0.3, mean=[0]) def __getitem__(self, index): data4D = scio.loadmat(self.ImgDir + str(index+1) + '.mat')['DepthNormal'] depth = data4D[:,:] data, label = dataPreprocess(index, depth, self.keypointsUVD, self.augment) if self.augment: data = self.randomErase(data) return data, label def __len__(self): return self.num
train_image_datasets = my_dataloader(trainingImageDir, keypointsWorldtrain, TrainImgFrames, augment=True) train_dataloaders = torch.utils.data.DataLoader(train_image_datasets, batch_size = batch_size, shuffle = True, num_workers = 8)
test_image_datasets = my_dataloader(testingImageDir, keypointsWorldtest, TestImgFrames, augment=False) test_dataloaders = torch.utils.data.DataLoader(test_image_datasets, batch_size = batch_size, shuffle = False, num_workers = 8)
def train():
net = model.A2J_model(num_classes = keypointsNumber) net = net.cuda() post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None) criterion = anchor.A2J_loss(shape=[cropHeight//16,cropWidth//16],thres = [16.0,32.0],stride=16,\ spatialFactor=spatialFactor,img_shape=[cropHeight, cropWidth],P_h=None, P_w=None) optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, weight_decay=Weight_Decay) scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.2) logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%Y/%m/%d %H:%M:%S', \ filename=os.path.join(save_dir, 'train.log'), level=logging.INFO) logging.info('======================================================') for epoch in range(nepoch): net = net.train() train_loss_add = 0.0 Cls_loss_add = 0.0 Reg_loss_add = 0.0 timer = time.time() # Training loop for i, (img, label) in enumerate(train_dataloaders): torch.cuda.synchronize() img, label = img.cuda(), label.cuda() heads = net(img) #print(regression) optimizer.zero_grad() Cls_loss, Reg_loss = criterion(heads, label) loss = 1*Cls_loss + Reg_loss*RegLossFactor loss.backward() optimizer.step() torch.cuda.synchronize() train_loss_add = train_loss_add + (loss.item())*len(img) Cls_loss_add = Cls_loss_add + (Cls_loss.item())*len(img) Reg_loss_add = Reg_loss_add + (Reg_loss.item())*len(img) # printing loss info if i%10 == 0: print('epoch: ',epoch, ' step: ', i, 'Cls_loss ',Cls_loss.item(), 'Reg_loss ',Reg_loss.item(), ' total loss ',loss.item()) scheduler.step(epoch) # time taken torch.cuda.synchronize() timer = time.time() - timer timer = timer / TrainImgFrames print('==> time to learn 1 sample = %f (ms)' %(timer*1000)) train_loss_add = train_loss_add / TrainImgFrames Cls_loss_add = Cls_loss_add / TrainImgFrames Reg_loss_add = Reg_loss_add / TrainImgFrames print('mean train_loss_add of 1 sample: %f, #train_indexes = %d' %(train_loss_add, TrainImgFrames)) print('mean Cls_loss_add of 1 sample: %f, #train_indexes = %d' %(Cls_loss_add, TrainImgFrames)) print('mean Reg_loss_add of 1 sample: %f, #train_indexes = %d' %(Reg_loss_add, TrainImgFrames)) Error_test = 0 Error_train = 0 Error_test_wrist = 0 if (epoch % 1 == 0): net = net.eval() output = torch.FloatTensor() outputTrain = torch.FloatTensor() for i, (img, label) in tqdm(enumerate(test_dataloaders)): with torch.no_grad(): img, label = img.cuda(), label.cuda() heads = net(img) pred_keypoints = post_precess(heads, voting=False) output = torch.cat([output,pred_keypoints.data.cpu()], 0) result = output.cpu().data.numpy() Error_test = errorCompute(result,keypointsWorldtest,) print('epoch: ', epoch, 'Test error:', Error_test) saveNamePrefix = '%s/net_%d_wetD_' % (save_dir, epoch) + str(Weight_Decay) + '_depFact_' + str(spatialFactor) + '_RegFact_' + str(RegLossFactor) + '_rndShft_' + str(RandCropShift) torch.save(net.state_dict(), saveNamePrefix + '.pth') # log logging.info('Epoch#%d: total loss=%.4f, Cls_loss=%.4f, Reg_loss=%.4f, Err_test=%.4f, lr = %.6f' %(epoch, train_loss_add, Cls_loss_add, Reg_loss_add, Error_test, scheduler.get_lr()[0]))
def test(): net = model.A2J_model(num_classes = keypointsNumber) net.load_state_dict(torch.load(model_dir)) net = net.cuda() net.eval()
post_precess = anchor.post_process(shape=[cropHeight//16,cropWidth//16],stride=16,P_h=None, P_w=None) output = torch.FloatTensor() torch.cuda.synchronize() for i, (img, label) in tqdm(enumerate(test_dataloaders)): with torch.no_grad(): img, label = img.cuda(), label.cuda() heads = net(img) pred_keypoints = post_precess(heads,voting=False) output = torch.cat([output,pred_keypoints.data.cpu()], 0) torch.cuda.synchronize() result = output.cpu().data.numpy() writeTxt(result) error = errorCompute(result, keypointsWorldtest) print('Error:', error)
def errorCompute(source, target): assert np.shape(source)==np.shape(target), "source has different shape with target"
Test1_ = source.copy() target_ = target.copy() Test1_[:, :, 0] = source[:,:,1] Test1_[:, :, 1] = source[:,:,0] Test1 = Test1_ # [x, y, z] for i in range(len(Test1_)): Test1[i,:,0] = Test1_[i,:,0]*320/cropWidth # x Test1[i,:,1] = Test1_[i,:,1]*240/cropHeight # y Test1[i,:,2] = source[i,:,2] labels = pixel2world(target_) outputs = pixel2world(Test1.copy()) errors = np.sqrt(np.sum((labels - outputs) ** 2, axis=2)) return np.mean(errors)
def writeTxt(result):
resultUVD_ = result.copy() resultUVD_[:, :, 0] = result[:,:,1] resultUVD_[:, :, 1] = result[:,:,0] resultUVD = resultUVD_ # [x, y, z] for i in range(len(result)): resultUVD[i,:,0] = resultUVD_[i,:,0]*320/cropWidth # x resultUVD[i,:,1] = resultUVD_[i,:,1]*240/cropHeight # y resultUVD[i,:,2] = result[i,:,2] resultReshape = resultUVD.reshape(len(result), -1) with open(os.path.join(save_dir, result_file), 'w') as f: for i in range(len(resultReshape)): for j in range(keypointsNumber*3): f.write(str(resultReshape[i, j])+' ') f.write('\n') f.close()
if name == 'main': train() test() Thank you very much!! bro! mind i add your email or skype .my email is 15619217958@163.com
If you don't to use bndbox, you may train the data by yourself, or modify the train code.
Hi bro.Can u share the itop model or k2phd model. best regards!
Hi @zeroXscorpion7 ,
I am trying to use this model to Identify my pictures, if the bndbox is need?