microsoft / nni

An open source AutoML toolkit for automate machine learning lifecycle, including feature engineering, neural architecture search, model compression and hyper-parameter tuning.
https://nni.readthedocs.io
MIT License
14.04k stars 1.81k forks source link

TypeError: forward() takes 2 positional arguments but 3 were given #5426

Open Timerunning opened 1 year ago

Timerunning commented 1 year ago

When I try to prune my model using the NNI tool, the following error occurs in the code. And I can't find the corresponding reason, I don't know if my model architecture design doesn't meet your specification. I hope you can help me to solve this problem.

This is the code for the pruning part of my project:

    # init config
    config_list = [{
        'sparsity_per_layer': 0.5,
        'op_types': ['Linear', 'Conv2d']
    }]

    from nni.compression.pytorch.pruning import L2NormPruner
    model_ori = model_ori.to(args.device)
    pruner = L2NormPruner(model_ori, config_list)
    # compress the model and generate the masks
    _, masks = pruner.compress()
    pruner._unwrap_model()
    # speedup the model
    from nni.compression.pytorch.speedup import ModelSpeedup
    ModelSpeedup(model_ori, torch.rand(3, 1, 80, 80).to(args.device), masks).speedup_model()

This is the structure of my model:

def cnn_7layer_bn2(in_ch=3, in_dim=32, width=64, linear_size=512, num_class=10):
    model = nn.Sequential(
        nn.Conv2d(in_ch, width, 3, stride=1, padding=1),
        nn.BatchNorm2d(width),
        nn.ReLU(),
        nn.Conv2d(width, width, 3, stride=1, padding=1),
        nn.BatchNorm2d(width),
        nn.ReLU(),
        nn.Conv2d(width, 2 * width, 3, stride=2, padding=1),
        nn.BatchNorm2d(2 * width),
        nn.ReLU(),
        nn.Conv2d(2 * width, 2 * width, 3, stride=1, padding=1),
        nn.BatchNorm2d(2 * width),
        nn.ReLU(),
        nn.Conv2d(2 * width, 2 * width, 3, stride=1, padding=1),
        nn.BatchNorm2d(2 * width),
        nn.ReLU(),
        Flatten(),
        nn.Linear((in_dim//2) * (in_dim//2) * 2 * width, linear_size),
        nn.BatchNorm1d(linear_size),
        nn.ReLU(),
        nn.Linear(linear_size,num_class)
    )
    return model

This is error:

Sequential(
  (0): PrunerModuleWrapper(
    (module): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU()
  (3): PrunerModuleWrapper(
    (module): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (5): ReLU()
  (6): PrunerModuleWrapper(
    (module): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  )
  (7): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (8): ReLU()
  (9): PrunerModuleWrapper(
    (module): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (10): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (11): ReLU()
  (12): PrunerModuleWrapper(
    (module): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  )
  (13): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (14): ReLU()
  (15): Flatten()
  (16): PrunerModuleWrapper(
    (module): Linear(in_features=204800, out_features=512, bias=True)
  )
  (17): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (18): ReLU()
  (19): PrunerModuleWrapper(
    (module): Linear(in_features=512, out_features=18, bias=True)
  )
)
0  sparsity :  0.5
3  sparsity :  0.5
6  sparsity :  0.5
9  sparsity :  0.5
12  sparsity :  0.5
16  sparsity :  0.5
19  sparsity :  0.5
[2023-03-08 14:08:29] start to speedup the model
INFO     14:08:29     start to speedup the model
[2023-03-08 14:08:29] infer module masks...
INFO     14:08:29     infer module masks...
[2023-03-08 14:08:29] Update mask for 0
INFO     14:08:29     Update mask for 0
[2023-03-08 14:08:29] Update mask for 1
INFO     14:08:29     Update mask for 1
[2023-03-08 14:08:29] Update mask for 2
INFO     14:08:29     Update mask for 2
[2023-03-08 14:08:29] Update mask for 3
INFO     14:08:29     Update mask for 3
[2023-03-08 14:08:29] Update mask for 4
INFO     14:08:29     Update mask for 4
[2023-03-08 14:08:29] Update mask for 5
INFO     14:08:29     Update mask for 5
[2023-03-08 14:08:29] Update mask for 6
INFO     14:08:29     Update mask for 6
[2023-03-08 14:08:29] Update mask for 7
INFO     14:08:29     Update mask for 7
[2023-03-08 14:08:29] Update mask for 8
INFO     14:08:29     Update mask for 8
[2023-03-08 14:08:29] Update mask for 9
INFO     14:08:29     Update mask for 9
[2023-03-08 14:08:29] Update mask for 10
INFO     14:08:29     Update mask for 10
[2023-03-08 14:08:29] Update mask for 11
INFO     14:08:29     Update mask for 11
[2023-03-08 14:08:29] Update mask for 12
INFO     14:08:29     Update mask for 12
[2023-03-08 14:08:29] Update mask for 13
INFO     14:08:29     Update mask for 13
[2023-03-08 14:08:29] Update mask for 14
INFO     14:08:29     Update mask for 14
[2023-03-08 14:08:29] Update mask for 15
INFO     14:08:29     Update mask for 15
Traceback (most recent call last):
  File "C:\Users\XuRui\Desktop\Fast-Certified-Robust-Training\prunerModel.py", line 75, in <module>
    main(args)
  File "C:\Users\XuRui\Desktop\Fast-Certified-Robust-Training\prunerModel.py", line 69, in main
    ModelSpeedup(model_ori, torch.rand(3, 1, 80, 80).to(args.device), masks).speedup_model()
  File "C:\Users\XuRui\miniconda3\envs\nni\lib\site-packages\nni\compression\pytorch\speedup\compressor.py", line 546, in speedup_model
    self.infer_modules_masks()
  File "C:\Users\XuRui\miniconda3\envs\nni\lib\site-packages\nni\compression\pytorch\speedup\compressor.py", line 383, in infer_modules_masks
    self.update_direct_sparsity(curnode)
  File "C:\Users\XuRui\miniconda3\envs\nni\lib\site-packages\nni\compression\pytorch\speedup\compressor.py", line 244, in update_direct_sparsity
    _auto_infer = AutoMaskInference(
  File "C:\Users\XuRui\miniconda3\envs\nni\lib\site-packages\nni\compression\pytorch\speedup\infer_mask.py", line 80, in __init__
    self.output = self.module(*dummy_input)
  File "C:\Users\XuRui\miniconda3\envs\nni\lib\site-packages\torch\nn\modules\module.py", line 889, in _call_impl
    result = self.forward(*input, **kwargs)
TypeError: forward() takes 2 positional arguments but 3 were given

Process finished with exit code 1
J-shang commented 1 year ago

hello @Timerunning , I have tried with your code on nni 2.10, and do not meet your issue, which nni version you used, or any other info?

import torch
import torch.nn as nn

def cnn_7layer_bn2(in_ch=3, in_dim=32, width=64, linear_size=512, num_class=10):
    model = nn.Sequential(
        nn.Conv2d(in_ch, width, 3, stride=1, padding=1),
        nn.BatchNorm2d(width),
        nn.ReLU(),
        nn.Conv2d(width, width, 3, stride=1, padding=1),
        nn.BatchNorm2d(width),
        nn.ReLU(),
        nn.Conv2d(width, 2 * width, 3, stride=2, padding=1),
        nn.BatchNorm2d(2 * width),
        nn.ReLU(),
        nn.Conv2d(2 * width, 2 * width, 3, stride=1, padding=1),
        nn.BatchNorm2d(2 * width),
        nn.ReLU(),
        nn.Conv2d(2 * width, 2 * width, 3, stride=1, padding=1),
        nn.BatchNorm2d(2 * width),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear((in_dim//2) * (in_dim//2) * 2 * width, linear_size),
        nn.BatchNorm1d(linear_size),
        nn.ReLU(),
        nn.Linear(linear_size,num_class)
    )
    return model

# init config
config_list = [{
    'sparsity_per_layer': 0.5,
    'op_types': ['Linear', 'Conv2d']
}]

from nni.compression.pytorch.pruning import L2NormPruner
model_ori = cnn_7layer_bn2()
model_ori(torch.rand(8, 3, 32, 32))
pruner = L2NormPruner(model_ori, config_list)
# compress the model and generate the masks
_, masks = pruner.compress()
pruner._unwrap_model()
# speedup the model
from nni.compression.pytorch.speedup import ModelSpeedup
ModelSpeedup(model_ori, torch.rand(8, 3, 32, 32), masks).speedup_model()
Lijiaoa commented 1 year ago

suggest to use this issue templete to refine your issue! image

Timerunning commented 1 year ago

hello @Timerunning , I have tried with your code on nni 2.10, and do not meet your issue, which nni version you used, or any other info?

import torch
import torch.nn as nn

def cnn_7layer_bn2(in_ch=3, in_dim=32, width=64, linear_size=512, num_class=10):
    model = nn.Sequential(
        nn.Conv2d(in_ch, width, 3, stride=1, padding=1),
        nn.BatchNorm2d(width),
        nn.ReLU(),
        nn.Conv2d(width, width, 3, stride=1, padding=1),
        nn.BatchNorm2d(width),
        nn.ReLU(),
        nn.Conv2d(width, 2 * width, 3, stride=2, padding=1),
        nn.BatchNorm2d(2 * width),
        nn.ReLU(),
        nn.Conv2d(2 * width, 2 * width, 3, stride=1, padding=1),
        nn.BatchNorm2d(2 * width),
        nn.ReLU(),
        nn.Conv2d(2 * width, 2 * width, 3, stride=1, padding=1),
        nn.BatchNorm2d(2 * width),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear((in_dim//2) * (in_dim//2) * 2 * width, linear_size),
        nn.BatchNorm1d(linear_size),
        nn.ReLU(),
        nn.Linear(linear_size,num_class)
    )
    return model

# init config
config_list = [{
    'sparsity_per_layer': 0.5,
    'op_types': ['Linear', 'Conv2d']
}]

from nni.compression.pytorch.pruning import L2NormPruner
model_ori = cnn_7layer_bn2()
model_ori(torch.rand(8, 3, 32, 32))
pruner = L2NormPruner(model_ori, config_list)
# compress the model and generate the masks
_, masks = pruner.compress()
pruner._unwrap_model()
# speedup the model
from nni.compression.pytorch.speedup import ModelSpeedup
ModelSpeedup(model_ori, torch.rand(8, 3, 32, 32), masks).speedup_model()

Thanks for your reply @J-shang! The version of NNI I am using is also 2.10.

And I found a strange thing! When I run the following code, I don't get any problems either. But I do have this problem in my own project.

import torch
import torch.nn as nn

def cnn_7layer_bn2(in_ch=3, in_dim=32, width=64, linear_size=512, num_class=10):
    model = nn.Sequential(
        nn.Conv2d(in_ch, width, 3, stride=1, padding=1),
        nn.BatchNorm2d(width),
        nn.ReLU(),
        nn.Conv2d(width, width, 3, stride=1, padding=1),
        nn.BatchNorm2d(width),
        nn.ReLU(),
        nn.Conv2d(width, 2 * width, 3, stride=2, padding=1),
        nn.BatchNorm2d(2 * width),
        nn.ReLU(),
        nn.Conv2d(2 * width, 2 * width, 3, stride=1, padding=1),
        nn.BatchNorm2d(2 * width),
        nn.ReLU(),
        nn.Conv2d(2 * width, 2 * width, 3, stride=1, padding=1),
        nn.BatchNorm2d(2 * width),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear((in_dim//2) * (in_dim//2) * 2 * width, linear_size),
        nn.BatchNorm1d(linear_size),
        nn.ReLU(),
        nn.Linear(linear_size,num_class)
    )
    return model

# init config
config_list = [{
    'sparsity_per_layer': 0.5,
    'op_types': ['Linear', 'Conv2d']
}]

from nni.compression.pytorch.pruning import L2NormPruner
model_ori = cnn_7layer_bn2()
model_ori(torch.rand(8, 3, 32, 32))
pruner = L2NormPruner(model_ori, config_list)
# compress the model and generate the masks
_, masks = pruner.compress()
pruner._unwrap_model()
# speedup the model
from nni.compression.pytorch.speedup import ModelSpeedup
ModelSpeedup(model_ori, torch.rand(8, 3, 32, 32), masks).speedup_model()

I found the problem by debug in File "*\miniconda3\envs\nni\lib\site-packages\nni\compression\pytorch\speedup\compressor.py", line 219. The self._prepare_dummy_input(node) function returns dummy_input is a python list type of length 2. So the self.forward(*input, **kwargs) function reports the error TypeError: forward() takes 2 positional arguments but 3 were given.

Therefore, I added the following code to File "*\miniconda3\envs\nni\lib\site-packages\nni\compression\pytorch\speedup\compressor.py", line 219 to prevent this error from occurring.

dummy_input, input_debugname = self._prepare_dummy_input(node)
        if len(dummy_input)! =1:
            if dummy_input[0].equal(dummy_input[1]):
                dummy_input = [dummy_input[0]]
            else:
                raise RuntimeError('dummy_inputError')

After adding these lines of code, my project works fine. Although the code works fine, I found that even if the compression rate is very low (say 50%, or lower), the performance of the compressed model is completely lost. That is, the classification accuracy is 0%

Timerunning commented 1 year ago

These are the key parts of my project code:

def prepare_model(args, logger, config):
    model = args.model

    if config['data'] == 'MNIST':
        input_shape = (1, 28, 28)
    elif config['data'] == 'CIFAR':
        input_shape = (3, 32, 32)
    elif config['data'] == 'atari':
        input_shape = (1, 80, 80)
    elif config['data'] == 'tinyimagenet':
        input_shape = (3, 64, 64)
    else:
        raise NotImplementedError(config['data'])

    if config['data'] == 'atari':
        model_ori = eval(model)(in_ch=input_shape[0], in_dim=input_shape[1], num_class=args.action_space, **parse_opts(args.model_params))
    else:
        model_ori = eval(model)(in_ch=input_shape[0], in_dim=input_shape[1], **parse_opts(args.model_params))

    checkpoint = None
    if args.auto_load:
        path_last = os.path.join(args.dir, 'ckpt_best')
        if os.path.exists(path_last):
            args.load = path_last
            logger.info('Use last checkpoint {}'.format(path_last))
        else:
            latest = -1
            for filename in os.listdir(args.dir):
                if filename.startswith('ckpt_'):
                    latest = max(latest, int(filename[5:]))
            if latest != -1:
                args.load = os.path.join(args.dir, 'ckpt_{}'.format(latest))
                try:
                    checkpoint = torch.load(args.load)
                except:
                    logger.warning('Cannot load {}'.format(args.load))
                    args.load = os.path.join(args.dir, 'ckpt_{}'.format(latest - 1))
                    logger.warning('Trying {}'.format(args.load))
    if checkpoint is None and args.load:
        checkpoint = torch.load(args.load)
    if checkpoint is not None:
        epoch, state_dict = checkpoint['epoch'], checkpoint['state_dict']
        best = checkpoint.get('best', (100., 100., -1))
        model_ori.load_state_dict(state_dict, strict=False)
        logger.info(f'Checkpoint loaded: {args.load}, epoch {epoch}')
    else:
        epoch = 0
        best = (100., 100., -1)#TODO

    return model_ori, checkpoint, epoch, best

def main(args):
    config = load_config(args.config)
    logger.info('config: {}'.format(json.dumps(config)))
    set_seed(args.seed or config['seed'])

    # init model
    args.auto_load = True
    model_ori, checkpoint, epoch, best = prepare_model(args, logger, config)
    logger.info('Model structure: \n {}'.format(str(model_ori)))

    print('\nThe accuracy without masks:')
    # model_ori = model_ori.cuda()
    # evaluator(model_ori)

    # init config
    config_list = [{
        'sparsity_per_layer': 0.1,
        'op_types': ['Linear', 'Conv2d']
    }]

    # model_ori = model_ori.to(args.device)
    # model_ori = model_ori.cpu()
    pruner = L1NormPruner(model_ori, config_list)
    _, masks = pruner.compress()

    # print('\nThe accuracy with masks:')
    # evaluator(model_ori)

    # need to unwrap the model, if the model is wrapped before speedup
    pruner._unwrap_model()
    # speedup the model
    ModelSpeedup(model_ori, dummy_input=torch.rand(3, 1, 80, 80), masks_file=masks).speedup_model()

    print('\nThe accuracy after speedup:')
    model_pressed  = model_ori
    evaluator(model_pressed)

    # Need a new optimizer due to the modules in model will be replaced during speedup.
    print('\nFinetune the model after speedup:')
    trainer(model_input=model_pressed, config=config)
    evaluator(model_pressed)

if __name__ == '__main__':
    main(args)
J-shang commented 1 year ago

@Timerunning sorry for long time no response, do you still have issues? For you concern, finetuning is required after pruning the model to recover the acc.