Janspiry / Palette-Image-to-Image-Diffusion-Models

Unofficial implementation of Palette: Image-to-Image Diffusion Models by Pytorch
MIT License
1.49k stars 199 forks source link

`Model [Palette() form models.model] not recognized`, installation inside `conda` environment #21

Closed sgbaird closed 2 years ago

sgbaird commented 2 years ago

setup

Running on Windows Subsystem for Linux 2 (WSL2).

git clone https://github.com/Janspiry/Palette-Image-to-Image-Diffusion-Models.git
cd Palette-Image-to-Image-Diffusion-Models

conda installation per #20

command

python run.py -p train -c config/inpainting_celebahq_dummy.json --debug

inpainting_celebahq_dummy.json

{
    "name": "inpainting_celebahq", // experiments name
    "gpu_ids": [
        0
    ], // gpu ids list, default is single 0
    "seed": -1, // random seed, seed <0 represents randomization not used 
    "finetune_norm": false, // find the parameters to optimize
    "path": { //set every part file path
        "base_dir": "experiments", // base path for all log except resume_state
        "code": "code", // code backup
        "tb_logger": "tb_logger", // path of tensorboard logger
        "results": "results",
        "checkpoint": "checkpoint",
        "resume_state": "experiments/train_inpainting_celebahq_220426_233652/checkpoint/190"
        // "resume_state": null // ex: 100, loading .state  and .pth from given epoch and iteration
    },
    "datasets": { // train or test
        "train": {
            "which_dataset": { // import designated dataset using arguments 
                "name": [
                    "data.dataset",
                    "InpaintDataset"
                ], // import Dataset() class / function(not recommend) from data.dataset.py (default is [data.dataset.py])
                "args": { // arguments to initialize dataset
                    "data_root": "datasets/celebahq_dummy/flist/train.flist",
                    "data_len": -1,
                    "mask_config": {
                        "mask_mode": "hybrid"
                    }
                }
            },
            "dataloader": {
                "validation_split": 2, // percent or number 
                "args": { // arguments to initialize train_dataloader
                    "batch_size": 3, // batch size in each gpu
                    "num_workers": 4,
                    "shuffle": true,
                    "pin_memory": true,
                    "drop_last": true
                },
                "val_args": { // arguments to initialize valid_dataloader, will overwrite the parameters in train_dataloader
                    "batch_size": 1, // batch size in each gpu
                    "num_workers": 4,
                    "shuffle": false,
                    "pin_memory": true,
                    "drop_last": false
                }
            }
        },
        "test": {
            "which_dataset": {
                "name": "InpaintDataset", // import Dataset() class / function(not recommend) from default file
                "args": {
                    "data_root": "datasets/celebahq_dummy/flist/test.flist",
                    "mask_config": {
                        "mask_mode": "center"
                    }
                }
            },
            "dataloader": {
                "args": {
                    "batch_size": 8,
                    "num_workers": 4,
                    "pin_memory": true
                }
            }
        }
    },
    "model": { // networks/metrics/losses/optimizers/lr_schedulers is a list and model is a dict
        "which_model": { // import designated  model(trainer) using arguments 
            "name": [
                "models.model",
                "Palette"
            ], // import Model() class / function(not recommend) from models.model.py (default is [models.model.py])
            "args": {
                "sample_num": 8, // process of each image
                "task": "inpainting",
                "ema_scheduler": {
                    "ema_start": 1,
                    "ema_iter": 1,
                    "ema_decay": 0.9999
                },
                "optimizers": [
                    {
                        "lr": 5e-5,
                        "weight_decay": 0
                    }
                ]
            }
        },
        "which_networks": [ // import designated list of networks using arguments
            {
                "name": [
                    "models.network",
                    "Network"
                ], // import Network() class / function(not recommend) from default file (default is [models/network.py]) 
                "args": { // arguments to initialize network
                    "init_type": "kaiming", // method can be [normal | xavier| xavier_uniform | kaiming | orthogonal], default is kaiming
                    "module_name": "guided_diffusion", // sr3 | guided_diffusion
                    "unet": {
                        "in_channel": 6,
                        "out_channel": 3,
                        "inner_channel": 64,
                        "channel_mults": [
                            1,
                            2,
                            4,
                            8
                        ],
                        "attn_res": [
                            // 32,
                            16
                            // 8
                        ],
                        "num_head_channels": 32,
                        "res_blocks": 2,
                        "dropout": 0.2,
                        "image_size": 256
                    },
                    "beta_schedule": {
                        "train": {
                            "schedule": "linear",
                            "n_timestep": 2000,
                            // "n_timestep": 10, // debug
                            "linear_start": 1e-6,
                            "linear_end": 0.01
                        },
                        "test": {
                            "schedule": "linear",
                            "n_timestep": 1000,
                            "linear_start": 1e-4,
                            "linear_end": 0.09
                        }
                    }
                }
            }
        ],
        "which_losses": [ // import designated list of losses without arguments
            "mse_loss" // import mse_loss() function/class from default file (default is [models/losses.py]), equivalent to { "name": "mse_loss", "args":{}}
        ],
        "which_metrics": [ // import designated list of metrics without arguments
            "mae" // import mae() function/class from default file (default is [models/metrics.py]), equivalent to { "name": "mae", "args":{}}
        ]
    },
    "train": { // arguments for basic training
        "n_epoch": 1e8, // max epochs, not limited now
        "n_iter": 1e8, // max interations
        "val_epoch": 5, // valdation every specified number of epochs
        "save_checkpoint_epoch": 10,
        "log_iter": 1e3, // log every specified number of iterations
        "tensorboard": true // tensorboardX enable
    },
    "debug": { // arguments in debug mode, which will replace arguments in train
        "val_epoch": 1,
        "save_checkpoint_epoch": 1,
        "log_iter": 2,
        "debug_split": 50 // percent or number, change the size of dataloder to debug_split.
    }
}

Directory Structure

image

Error

Exception has occurred: NotImplementedError       (note: full exception trace is shown but execution is paused at: _run_module_as_main)
Model [Palette() form models.model] not recognized.
  File "/home/sgbaird/GitHub/Palette-Image-to-Image-Diffusion-Models/core/praser.py", line 41, in init_obj
    ret = attr(*args, **kwargs)
  File "/home/sgbaird/GitHub/Palette-Image-to-Image-Diffusion-Models/models/model.py", line 49, in __init__
    self.netG.set_new_noise_schedule(phase=self.phase)
  File "/home/sgbaird/GitHub/Palette-Image-to-Image-Diffusion-Models/models/network.py", line 36, in set_new_noise_schedule
    self.register_buffer('gammas', to_torch(gammas))
  File "/home/sgbaird/miniconda3/envs/palette/lib/python3.9/site-packages/torch/cuda/__init__.py", line 166, in _lazy_init
    raise AssertionError("Torch not compiled with CUDA enabled")

During handling of the above exception, another exception occurred:

  File "/home/sgbaird/GitHub/Palette-Image-to-Image-Diffusion-Models/core/praser.py", line 49, in init_obj
    raise NotImplementedError('{} [{:s}() form {:s}] not recognized.'.format(init_type, class_name, file_name))
  File "/home/sgbaird/GitHub/Palette-Image-to-Image-Diffusion-Models/models/__init__.py", line 10, in create_model
    model = init_obj(model_opt, logger, default_file_name='models.model', init_type='Model')
  File "/home/sgbaird/GitHub/Palette-Image-to-Image-Diffusion-Models/run.py", line 44, in main_worker
    model = create_model(
  File "/home/sgbaird/GitHub/Palette-Image-to-Image-Diffusion-Models/run.py", line 92, in <module>
    main_worker(0, 1, opt)
  File "/home/sgbaird/miniconda3/envs/palette/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/home/sgbaird/miniconda3/envs/palette/lib/python3.9/runpy.py", line 97, in _run_module_code
    _run_code(code, mod_globals, init_globals,
  File "/home/sgbaird/miniconda3/envs/palette/lib/python3.9/runpy.py", line 268, in run_path
    return _run_module_code(code, init_globals, run_name,
  File "/home/sgbaird/miniconda3/envs/palette/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/home/sgbaird/miniconda3/envs/palette/lib/python3.9/runpy.py", line 197, in _run_module_as_main (Current frame)
    return _run_code(code, main_globals, None,
sgbaird commented 2 years ago

https://github.com/Janspiry/Palette-Image-to-Image-Diffusion-Models/issues/5 talks about the same error Model [Palette() form models.model] not recognized

sgbaird commented 2 years ago

train.log

22-06-09 22:55:24.592 - INFO: Create the log file in directory experiments/debug_inpainting_celebahq_220609_225523.

22-06-09 22:55:24.684 - INFO: Dataset [InpaintDataset() form data.dataset] is created.
22-06-09 22:55:24.684 - INFO: Dataset for train have 48 samples.
22-06-09 22:55:24.685 - INFO: Dataset for val have 2 samples.
22-06-09 22:55:25.194 - INFO: Network [Network() form models.network] is created.
22-06-09 22:55:25.195 - INFO: Network [Network] weights initialize using [kaiming] method.
22-06-09 22:55:25.692 - WARNING: Config is a str, converts to a dict {'name': 'mae'}
22-06-09 22:55:26.060 - INFO: Metric [mae() form models.metric] is created.
22-06-09 22:55:26.060 - WARNING: Config is a str, converts to a dict {'name': 'mse_loss'}
22-06-09 22:55:26.068 - INFO: Loss [mse_loss() form models.loss] is created.
22-06-09 22:55:26.257 - INFO: Beign loading pretrained model [Network] ...
22-06-09 22:55:26.257 - WARNING: Pretrained model in [experiments/train_inpainting_celebahq_220426_233652/checkpoint/190_Network.pth] is not existed, Skip it
22-06-09 22:55:26.257 - INFO: Beign loading pretrained model [Network_ema] ...
22-06-09 22:55:26.258 - WARNING: Pretrained model in [experiments/train_inpainting_celebahq_220426_233652/checkpoint/190_Network_ema.pth] is not existed, Skip it
22-06-09 22:55:26.281 - INFO: Beign loading training states
22-06-09 22:55:26.282 - WARNING: Training state in [experiments/train_inpainting_celebahq_220426_233652/checkpoint/190.state] is not existed, Skip it
luislofer89 commented 2 years ago

I'm having the same issue with the current status of "master" branch. Commit "https://github.com/Janspiry/Palette-Image-to-Image-Diffusion-Models/tree/d1b9b010edeee177aaa15850002766e370a8307b" is working fine for me. A bug was probably introduced in commit "ed29b1ce9ff2ae41791d52642004385886f0680f"

Janspiry commented 2 years ago

I'm having the same issue with the current status of "master" branch. Commit "https://github.com/Janspiry/Palette-Image-to-Image-Diffusion-Models/tree/d1b9b010edeee177aaa15850002766e370a8307b" is working fine for me. A bug was probably introduced in commit "ed29b1ce9ff2ae41791d52642004385886f0680f"

Hi, could you show me the log and configure file? Since the lastest code have changed the configure file structure.

Janspiry commented 2 years ago

Feel free to reopen the issue if there is any question.

JianghaiSCU commented 1 year ago

I have the same problem when I train from scratch on my own dataset, but I don't know how to solve it. Here is my log: 23-04-22 20:28:58.357 - INFO: Create the log file in directory experiments/debug_colorization_mirflickr25k_230422_202856.

23-04-22 20:28:58.409 - INFO: Dataset [ColorizationDataset() form data.dataset] is created. 23-04-22 20:28:58.409 - INFO: Dataset for train have 48 samples. 23-04-22 20:28:58.409 - INFO: Dataset for val have 2 samples. 23-04-22 20:28:58.910 - INFO: Network [Network() form models.network] is created. 23-04-22 20:28:58.910 - INFO: Network [Network] weights initialize using [kaiming] method. 23-04-22 20:28:59.314 - INFO: Config is a str, converts to a dict {'name': 'mae'} 23-04-22 20:28:59.865 - INFO: Metric [mae() form models.metric] is created. 23-04-22 20:28:59.865 - INFO: Config is a str, converts to a dict {'name': 'mse_loss'} 23-04-22 20:28:59.897 - INFO: Loss [mse_loss() form models.loss] is created. 23-04-22 20:28:59.900 - INFO: Optimizer [Adam() form default file] is created. 23-04-22 20:28:59.900 - INFO: Scheduler [LinearLR() form default file] is created.