open-mmlab / mmengine

OpenMMLab Foundational Library for Training Deep Learning Models
https://mmengine.readthedocs.io/
Apache License 2.0
1.17k stars 351 forks source link

How to change dataset settings and add optimizer to parameters other than model parameters when using Runner? #1414

Open haochuan-li opened 12 months ago

haochuan-li commented 12 months ago

Prerequisite

Environment

sys.platform: linux Python: 3.9.18 | packaged by conda-forge | (main, Aug 30 2023, 03:49:32) [GCC 12.3.0] CUDA available: True numpy_random_seed: 2147483648 GPU 0,1,2,3,4,5,6,7: Tesla V100-SXM2-32GB CUDA_HOME: /usr/local/cuda-11.7 NVCC: Cuda compilation tools, release 11.7, V11.7.64 GCC: gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0 PyTorch: 2.0.1+cu117 PyTorch compiling details: PyTorch built with:

TorchVision: 0.15.2+cu117 OpenCV: 4.8.1 MMEngine: 0.9.0 MMAction2: 1.2.0+4d6c934 MMCV: 2.1.0

Reproduces the problem - code sample

Hi! I wonder is it possible to change the dataset settings and add optimizer to parameters other than model parameters when using Runner. For example,

# My Custom Dataset
def CustomDataset(BaseDataset):
    def load_data_list(self):
         ...
         return [dict(inputs=torch.rand(1,3,224,224).numpy(), gt_label = torch.rand(1).numpy() for i in range(bs)]

# Configs for Runner -> cfg
train_pipeline_cfg = [dict(type=...)]
ds = dict(type='CustomDataset',pipeline=train_pipeline_cfg)
train_dataloader_cfg = dict(
                    batch_size=32,
                    num_workers=8,
                    persistent_workers=True,
                    sampler=dict(type='DefaultSampler', shuffle=True),
                    dataset=ds)
...
runner = Runner.from_cfg(cfg)

# After created runner, here I want to initialize an optimizer for custom dataset and want to set requires_grad -> True in the custom dataset

# Set the require_grad of data in Custom Dataset to True
(????) = (????).requires_grad_(True) # What should I put in the (????) 
optimizer_data = torch.optim.SGD([????]) # What should I put in the optimizer input to get the grad in Custom Dataset? 

runner.train()

Can anyone enlighten me on this? Thanks.

Hi! I wonder is it possible to change the dataset settings and add optimizer to parameters other than model parameters when using Runner. For example,

# My Custom Dataset
def CustomDataset(BaseDataset):
    def load_data_list(self):
         ...
         return [dict(inputs=torch.rand(1,3,224,224).numpy(), gt_label = torch.rand(1).numpy() for i in range(bs)]

# Configs for Runner -> cfg
train_pipeline_cfg = [dict(type=...)]
ds = dict(type='CustomDataset',pipeline=train_pipeline_cfg)
train_dataloader_cfg = dict(
                    batch_size=32,
                    num_workers=8,
                    persistent_workers=True,
                    sampler=dict(type='DefaultSampler', shuffle=True),
                    dataset=ds)
...
runner = Runner.from_cfg(cfg)

# After created runner, here I want to initialize an optimizer for custom dataset and want to set requires_grad -> True in the custom dataset

# Set the require_grad of data in Custom Dataset to True
(????) = (????).requires_grad_(True) # What should I put in the (????) 
optimizer_data = torch.optim.SGD([????]) # What should I put in the optimizer input to get the grad in Custom Dataset? 

runner.train()

Can anyone enlighten me on this? Thanks.

Reproduces the problem - command or script

@TRANSFORMS.register_module()
class PackDistillInputs(BaseTransform):

    mapping_table = {
        'gt_bboxes': 'bboxes',
        'gt_labels': 'labels',
    }

    def __init__(
            self,
            collect_keys: Optional[Tuple[str]] = None,
            meta_keys: Sequence[str] = ('img_shape', 'img_key', 'video_id',
                                        'timestamp'),
            algorithm_keys: Sequence[str] = (),
    ) -> None:
        self.collect_keys = collect_keys
        self.meta_keys = meta_keys
        self.algorithm_keys = algorithm_keys

    def transform(self, results: Dict) -> Dict:
        packed_results = dict()
        if self.collect_keys is not None:
            packed_results['inputs'] = dict()
            for key in self.collect_keys:
                packed_results['inputs'][key] = to_tensor(results[key])
        else:
            if 'imgs' in results:
                imgs = results['imgs']
                imgs_syn = to_tensor(imgs)
                imgs_syn = imgs_syn.detach().requires_grad_(True)
                packed_results['inputs'] = to_tensor(imgs)

            else:
                raise ValueError(
                    'Cannot get `imgs`, `keypoint`, `heatmap_imgs`, '
                    '`audios` or `text` in the input dict of '
                    '`PackActionInputs`.')

        data_sample = ActionDataSample()

        if 'label' in results:
            data_sample.set_gt_label(results['label'])

        # Set custom algorithm keys
        for key in self.algorithm_keys:
            if key in results:
                data_sample.set_field(results[key], key)

        # Set meta keys
        img_meta = {k: results[k] for k in self.meta_keys if k in results}
        data_sample.set_metainfo(img_meta)
        packed_results['data_samples'] = data_sample
        return packed_results

    def __repr__(self) -> str:
        repr_str = self.__class__.__name__
        repr_str += f'(collect_keys={self.collect_keys}, '
        repr_str += f'meta_keys={self.meta_keys})'
        return repr_str

@DATASETS.register_module()
class SyntheticDataset(BaseDataset):
    def __init__(self, pipeline, ipc = 20,
                 test_mode=False, **kwargs):

        self.ipc = ipc
        super(SyntheticDataset, self).__init__(pipeline=pipeline, test_mode=test_mode, **kwargs)

    def load_data_list(self):
        channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test, testloader, loader_train_dict, class_map, class_map_inv = get_dataset('ucf101')
        # channel = 3
        # num_classes =101
        # im_size=[224,224]
        images_all, indices_class = build_original_dataset(channel, num_classes, dst_train, class_map)
        # del dst_train
        # gc.collect()

        def get_images(c, n): # get random n images from class c
            idx_shuffle = np.random.permutation(indices_class[c])[:n]
            return images_all[idx_shuffle]

        ''' initialize the synthetic data '''
        label_syn = torch.tensor([np.ones(self.ipc,dtype=np.int_)*i for i in range(num_classes)], dtype=torch.long, requires_grad=False).view(-1)
        print("label_syn shape:", label_syn.shape)

        image_syn = torch.randn(size=(num_classes * self.ipc, channel, im_size[0], im_size[1]), dtype=torch.float)

        pix_init = 'real'
        if pix_init == 'real':
            print('initialize synthetic data from random real images')
            for c in range(num_classes):
                image_syn.data[c * self.ipc:(c + 1) * self.ipc] = get_images(c, self.ipc).detach().data
        else:
            print('initialize synthetic data from random noise')
        # print("Image_syn", image_syn[0])
        print("image_syn shape:", image_syn.shape,np.expand_dims(image_syn, 0).shape)

        data_list = []
        for x, y in zip(image_syn, label_syn):
            data_list.append(dict(imgs=x.unsqueeze(0).numpy(), label=y.numpy(), input_shape=x.unsqueeze(0).shape, img_shape=[224,224]))
        return data_list

    def get_data_info(self, idx: int) -> dict:
        data_info = super().get_data_info(idx)
        return data_info

def main():   
    args = parse_args()

    train_pipeline_cfg = [
        dict(type='PackDistillInputs')
    ]

    dataset_type = 'SyntheticDataset'

    dataset=dict(
            type=dataset_type,
            pipeline=train_pipeline_cfg,
            ipc=args.ipc)

    ds = DATASETS.build(dataset)

    train_dataloader = dict(
        batch_size=32,
        num_workers=8,
        persistent_workers=True,
        sampler=dict(type='DefaultSampler', shuffle=True),
        dataset=ds)

    tdl = Runner.build_dataloader(train_dataloader)
    print(type(tdl))
    # optimizer_img = torch.optim.SGD([image_syn], lr=args.lr_img, momentum=0.5)

    cfg = Config.fromfile(args.config)
    # print(cfg)
    cfg.train_pipeline = train_pipeline_cfg
    cfg.train_dataloader = tdl
    # merge cli arguments to config
    cfg = merge_args(cfg, args)

    # build the runner from config
    # if 'runner_type' not in cfg:
    #     # build the default runner
    #     runner = Runner.from_cfg(cfg)
    # else:
    #     # build customized runner from the registry
    #     # if 'runner_type' is set in the cfg
    #     runner = RUNNERS.build(cfg)
    runner = Runner.from_cfg(cfg)
    # start training
    runner.train()

### Reproduces the problem - error message

I cannot pass torch.utils.data.DataLoader to Runner(neither in hook nor in cfg)

53 Traceback (most recent call last): 54 File "/home/wangkai/miniconda3/envs/vdd/lib/python3.9/site-packages/yapf/pytree/pytree_utils.py", line 113, in ParseCodeToTree 55 tree = parser_driver.parse_string(code, debug=False) 56 File "/home/wangkai/miniconda3/envs/vdd/lib/python3.9/site-packages/yapf_third_party/_ylib2to3/pgen2/driver.py", line 188, in parse_string 57 return self.parse_tokens(tokens, debug) 58 File "/home/wangkai/miniconda3/envs/vdd/lib/python3.9/site-packages/yapf_third_party/_ylib2to3/pgen2/driver.py", line 157, in parse_tokens 59 if p.addtoken(type, value, (prefix, start)): 60 File "/home/wangkai/miniconda3/envs/vdd/lib/python3.9/site-packages/yapf_third_party/_ylib2to3/pgen2/parse.py", line 230, in addtoken 61 return self._addtoken(ilabel, type, value, context) 62 File "/home/wangkai/miniconda3/envs/vdd/lib/python3.9/site-packages/yapf_third_party/_ylib2to3/pgen2/parse.py", line 313, in _addtoken 63 raise ParseError('bad input', type, value, context) 64 yapf_third_party._ylib2to3.pgen2.parse.ParseError: bad input: type=20, value='<', context=('', (168, 17)) 65 During handling of the above exception, another exception occurred: 66 Traceback (most recent call last): 67 File "/home/wangkai/miniconda3/envs/vdd/lib/python3.9/site-packages/yapf/yapflib/yapf_api.py", line 198, in FormatCode 68 tree = pytree_utils.ParseCodeToTree(unformatted_source) 69 File "/home/wangkai/miniconda3/envs/vdd/lib/python3.9/site-packages/yapf/pytree/pytree_utils.py", line 116, in ParseCodeToTree 70 ast.parse(code) 71 File "/home/wangkai/miniconda3/envs/vdd/lib/python3.9/ast.py", line 50, in parse 72 return compile(source, filename, mode, flags, 73 File "", line 168 74 train_dataloader=<torch.utils.data.dataloader.DataLoader object at 0x7ff7773939a0> 75 ^ 76 SyntaxError: invalid syntax 77 During handling of the above exception, another exception occurred: 78 Traceback (most recent call last): 79 File "/home/wangkai/miniconda3/envs/vdd/lib/python3.9/site-packages/mmengine/config/config.py", line 1477, in prettytext 80 text, = FormatCode(text, style_config=yapf_style) 81 File "/home/wangkai/miniconda3/envs/vdd/lib/python3.9/site-packages/yapf/yapflib/yapf_api.py", line 201, in FormatCode 82 raise errors.YapfError(errors.FormatErrorMsg(e)) 83 yapf.yapflib.errors.YapfError: :168:18: invalid syntax 84 During handling of the above exception, another exception occurred: 85 Traceback (most recent call last): 86 File "/home/ssd7T/haochuan/mmaction2/tools/custom_train.py", line 347, in 87 main() 88 File "/home/ssd7T/haochuan/mmaction2/tools/custom_train.py", line 336, in main 89 # runner = Runner.from_cfg(cfg) 90 File "/home/wangkai/miniconda3/envs/vdd/lib/python3.9/site-packages/mmengine/runner/runner.py", line 462, in from_cfg 91 runner = cls( 92 File "/home/wangkai/miniconda3/envs/vdd/lib/python3.9/site-packages/mmengine/runner/runner.py", line 403, in init 93 self._log_env(env_cfg) 94 File "/home/wangkai/miniconda3/envs/vdd/lib/python3.9/site-packages/mmengine/runner/runner.py", line 2388, in _log_env 95 self.logger.info(f'Config:\n{self.cfg.pretty_text}') 96 File "/home/wangkai/miniconda3/envs/vdd/lib/python3.9/site-packages/mmengine/config/config.py", line 1482, in pretty_text 97 raise SyntaxError('Failed to format the config file, please ' 98 SyntaxError: Failed to format the config file, please check the syntax of:

Additional information

No response

YiyaoYang1 commented 11 months ago

Sorry, mmengine does not support torch's optimizer now. To observe the gradient in custom dataset, you can override BaseModel.train_step, where the forward function returns a differentiable loss dict when its parameter mode='loss' or 'tensor'. You may get the gradient from the loss dict. For more information about the loss dict, see forward function in https://mmengine.readthedocs.io/en/latest/tutorials/model.html