ocissor commented 7 months ago

Hi, First of all great work. I was going through your train.py file in which you import Trainer from base, but in the base.py there is no class named Trainer, can you please help me with this.

Thank you!

YangChangHee commented 7 months ago

We apologize for any inconvenience. I implemented the training code too complicatedly, so I'm re-editing it! Also, I'm in a hurry with another conference right now, so I'm trying to revise this as soon as it's finished. Can you wait for a few days?

Thank you! :)

ocissor commented 7 months ago

Thank you for your response. Is it possible for you to share the function trainer.set_lr() and the trainer.optimizer. If not I totally understand it. Conference submissions are very hectic. All the best for your submission.

Thank you!

YangChangHee commented 7 months ago

The Trainer class we used is as follows!

class Trainer(Base):
    def __init__(self):
        super(Trainer, self).__init__(log_name = 'train_logs.txt')

    def get_optimizer(self, model):
        optimizer = torch.optim.Adam([
            {'params': model.module.backbone.parameters(), 'lr': cfg.lr_backbone},
            {'params': model.module.pose2feat.parameters()},
            {'params': model.module.position_net.parameters()},
            {'params': model.module.rotation_net.parameters()},
            #{'params': model.module.edge_module.parameters()},
        print('The parameters of backbone, pose2feat, position_net, rotation_net, are added to the optimizer.')

        return optimizer

    def get_optimizer_dilation(self, model):
        optimizer = torch.optim.Adam([
            {'params': model.module.backbone.parameters(), 'lr': cfg.lr_backbone},
            {'params': model.module.pose2feat.parameters()},
            {'params': model.module.position_net.parameters()},
            {'params': model.module.rotation_net.parameters()},
            {'params': model.module.ced.parameters()},
        # kornia toolkit make gradients
        print('The parameters of backbone, pose2feat, position_net, rotation_net, are added to the optimizer.')

        return optimizer

    def save_model(self, state, epoch):
        file_path = osp.join(cfg.model_dir,'snapshot_{}.pth.tar'.format(str(epoch)))
        torch.save(state, file_path)
        self.logger.info("Write snapshot into {}".format(file_path))

    def load_model(self, model, optimizer):
        model_file_list = glob.glob(osp.join(cfg.model_dir,'*.pth.tar'))
        cur_epoch = max([int(file_name[file_name.find('snapshot_') + 9 : file_name.find('.pth.tar')]) for file_name in model_file_list])
        ckpt_path = osp.join(cfg.model_dir, 'snapshot_' + str(cur_epoch) + '.pth.tar')
        ckpt = torch.load(ckpt_path)
        start_epoch = ckpt['epoch'] + 1

        if cfg.distillation_pretrained==True:
            model.load_state_dict(ckpt['network'], strict=False)
            model = DataParallel(model).cuda()

        self.logger.info('Load checkpoint from {}'.format(ckpt_path))
        return start_epoch, model, optimizer

    def set_lr(self, epoch):
        for e in cfg.lr_dec_epoch:
            if epoch < e:
        if epoch < cfg.lr_dec_epoch[-1]:
            idx = cfg.lr_dec_epoch.index(e)
            for g in self.optimizer.param_groups:
                g['lr'] = cfg.lr / (cfg.lr_dec_factor ** idx)
            for g in self.optimizer.param_groups:
                g['lr'] = cfg.lr / (cfg.lr_dec_factor ** len(cfg.lr_dec_epoch))

    def get_lr(self):
        for g in self.optimizer.param_groups:
            cur_lr = g['lr']
        return cur_lr

    def _make_batch_generator(self):
        # data load and construct batch generator
        self.logger.info("Creating dataset...")
        trainset3d_loader = []
        if cfg.smplify==False:
            for i in range(len(cfg.trainset_3d)):
                trainset3d_loader.append(eval(cfg.trainset_3d[i])(transforms.ToTensor(), "train"))
        trainset2d_loader = []
        for i in range(len(cfg.trainset_2d)):
            trainset2d_loader.append(eval(cfg.trainset_2d[i])(transforms.ToTensor(), "train"))

        if len(trainset3d_loader) > 0 and len(trainset2d_loader) > 0:
            self.vertex_num = trainset3d_loader[0].vertex_num
            self.joint_num = trainset3d_loader[0].joint_num
            trainset3d_loader = MultipleDatasets(trainset3d_loader, make_same_len=False)
            trainset2d_loader = MultipleDatasets(trainset2d_loader, make_same_len=False)
            trainset_loader = MultipleDatasets([trainset3d_loader, trainset2d_loader], make_same_len=True)
        elif len(trainset3d_loader) > 0:
            self.vertex_num = trainset3d_loader[0].vertex_num
            self.joint_num = trainset3d_loader[0].joint_num
            trainset_loader = MultipleDatasets(trainset3d_loader, make_same_len=False)
        elif len(trainset2d_loader) > 0:
            self.vertex_num = trainset2d_loader[0].vertex_num
            self.joint_num = trainset2d_loader[0].joint_num
            trainset_loader = MultipleDatasets(trainset2d_loader, make_same_len=False)
            assert 0, "Both 3D training set and 2D training set have zero length."

        self.itr_per_epoch = math.ceil(len(trainset_loader) / cfg.num_gpus / cfg.train_batch_size)
        self.batch_generator = DataLoader(dataset=trainset_loader, batch_size=cfg.num_gpus*cfg.train_batch_size, shuffle=True, num_workers=cfg.num_thread, pin_memory=True)

    def _make_model(self):
        # prepare network
        self.logger.info("Creating graph and optimizer...")
        if cfg.distillation_pretrained==True and cfg.smplify==False:
            smpl_overlap_model = DataParallel(smpl_overlap_model).cuda()
            print("Load SMPL_overlap_module success!")

        model = get_model(self.vertex_num, self.joint_num,smpl_overlap_model, 'train')
        model = DataParallel(model).cuda()
        optimizer = self.get_optimizer(model)
        if cfg.continue_train:
            start_epoch, model, optimizer = self.load_model(model, optimizer)
            start_epoch = 0
        optimizer = self.get_optimizer(model)

        self.start_epoch = start_epoch
        self.model = model
        self.optimizer = optimizer

I apologize for sending you the code in such a raw way. We will update you as soon as preparations for this conference are over! Please wait a little longer!

good luck!

YangChangHee commented 7 months ago

We upload train class :) I close this