I'm trying to train the MBT model on my own dataset. I get the following error. Any help is appreciated.
Traceback (most recent call last):
File "/home/eftekhar/anaconda3/lib/python3.9/runpy.py", line 197, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/home/eftekhar/anaconda3/lib/python3.9/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/home/eftekhar/models/scenic/scenic/projects/mbt/main.py", line 49, in
app.run(main=main)
File "/home/eftekhar/anaconda3/lib/python3.9/site-packages/scenic/app.py", line 65, in run
app.run(functools.partial(_run_main, main=main))
File "/home/eftekhar/anaconda3/lib/python3.9/site-packages/absl/app.py", line 308, in run
_run_main(main, args)
File "/home/eftekhar/anaconda3/lib/python3.9/site-packages/absl/app.py", line 254, in _run_main
sys.exit(main(argv))
File "/home/eftekhar/anaconda3/lib/python3.9/site-packages/scenic/app.py", line 100, in _run_main
main(rng=rng, config=FLAGS.config, workdir=FLAGS.workdir, writer=writer)
File "/home/eftekhar/models/scenic/scenic/projects/mbt/main.py", line 39, in main
trainer.train(
File "/home/eftekhar/anaconda3/lib/python3.9/site-packages/scenic/projects/mbt/trainer.py", line 425, in train
gflops) = mbt_train_utils.initialize_model(
File "/home/eftekhar/anaconda3/lib/python3.9/site-packages/scenic/projects/mbt/train_utils.py", line 83, in initialize_model
for modality_name, spec in input_spec.items():
AttributeError: 'list' object has no attribute 'items'
The config file also is:
r"""Multimodal sound classification on the balanced (mini) AudioSet.
"""
import ml_collections
AUDIOSET_TRAIN_SIZE = 20361
def get_config():
"""Returns the base experiment configuration."""
config = ml_collections.ConfigDict()
config.experiment_name = 'mbt_balanced_audioset_classification'
I'm trying to train the MBT model on my own dataset. I get the following error. Any help is appreciated.
Traceback (most recent call last): File "/home/eftekhar/anaconda3/lib/python3.9/runpy.py", line 197, in _run_module_as_main return _run_code(code, main_globals, None, File "/home/eftekhar/anaconda3/lib/python3.9/runpy.py", line 87, in _run_code exec(code, run_globals) File "/home/eftekhar/models/scenic/scenic/projects/mbt/main.py", line 49, in
app.run(main=main)
File "/home/eftekhar/anaconda3/lib/python3.9/site-packages/scenic/app.py", line 65, in run
app.run(functools.partial(_run_main, main=main))
File "/home/eftekhar/anaconda3/lib/python3.9/site-packages/absl/app.py", line 308, in run
_run_main(main, args)
File "/home/eftekhar/anaconda3/lib/python3.9/site-packages/absl/app.py", line 254, in _run_main
sys.exit(main(argv))
File "/home/eftekhar/anaconda3/lib/python3.9/site-packages/scenic/app.py", line 100, in _run_main
main(rng=rng, config=FLAGS.config, workdir=FLAGS.workdir, writer=writer)
File "/home/eftekhar/models/scenic/scenic/projects/mbt/main.py", line 39, in main
trainer.train(
File "/home/eftekhar/anaconda3/lib/python3.9/site-packages/scenic/projects/mbt/trainer.py", line 425, in train
gflops) = mbt_train_utils.initialize_model(
File "/home/eftekhar/anaconda3/lib/python3.9/site-packages/scenic/projects/mbt/train_utils.py", line 83, in initialize_model
for modality_name, spec in input_spec.items():
AttributeError: 'list' object has no attribute 'items'
The config file also is:
r"""Multimodal sound classification on the balanced (mini) AudioSet.
"""
import ml_collections
AUDIOSET_TRAIN_SIZE = 20361
def get_config(): """Returns the base experiment configuration.""" config = ml_collections.ConfigDict() config.experiment_name = 'mbt_balanced_audioset_classification'
config.dataset_configs = ml_collections.ConfigDict() config.dataset_configs.base_dir = '/home/eth/tfrecords_builder/tmp/generated_dataset' config.dataset_configs.tables = { 'train': 'train', 'validation': 'valid', 'test': 'test', } config.dataset_configs.examples_per_subset = { 'train': 4189, 'validation': 898, 'test': 898 } config.dataset_configs.num_classes = 20 config.data_dtype_str = 'float32' config.dataset_name = 'video_tfrecord_dataset' config.dataset_configs.modalities = ('spectrogram', 'rgb') config.dataset_configs.return_as_dict = False config.dataset_configs.num_frames = 32 config.dataset_configs.stride = 2 config.dataset_configs.num_spec_frames = 8 config.dataset_configs.spec_stride = 1
config.dataset_configs.spec_mean = 1.102 config.dataset_configs.spec_stddev = 2.762
config.dataset_configs.min_resize = 256 config.dataset_configs.crop_size = 224 config.dataset_configs.spec_shape = (100, 128)
config.dataset_configs.one_hot_labels = True config.dataset_configs.zero_centering = True
config.dataset_configs.do_multicrop_test = True config.dataset_configs.log_test_epochs = 4 config.dataset_configs.num_test_clips = 4 config.dataset_configs.test_batch_size = 8 # Needs to be num_local_devices config.multicrop_clips_per_device = 2
config.dataset_configs.augmentation_params = ml_collections.ConfigDict() config.dataset_configs.augmentation_params.do_jitter_scale = True config.dataset_configs.augmentation_params.scale_min_factor = 0.9 config.dataset_configs.augmentation_params.scale_max_factor = 1.33 config.dataset_configs.augmentation_params.prob_scale_jitter = 1.0 config.dataset_configs.augmentation_params.do_color_augment = True config.dataset_configs.augmentation_params.prob_color_augment = 0.8 config.dataset_configs.augmentation_params.prob_color_drop = 0.1
config.dataset_configs.prefetch_to_device = 2
config.dataset_configs.spec_augment = True config.dataset_configs.spec_augment_params = ml_collections.ConfigDict() config.dataset_configs.spec_augment_params.freq_mask_max_bins = 48 config.dataset_configs.spec_augment_params.freq_mask_count = 1 config.dataset_configs.spec_augment_params.time_mask_max_frames = 48 config.dataset_configs.spec_augment_params.time_mask_count = 4 config.dataset_configs.spec_augment_params.time_warp_max_frames = 1.0 config.dataset_configs.spec_augment_params.time_warp_max_ratio = 0 config.dataset_configs.spec_augment_params.time_mask_max_ratio = 0
config.model_name = 'mbt_multilabel_classification' config.model = ml_collections.ConfigDict() config.model.modality_fusion = ('spectrogram', 'rgb') config.model.use_bottleneck = True config.model.test_with_bottlenecks = True config.model.share_encoder = False config.model.n_bottlenecks = 4 config.model.fusion_layer = 8 config.model.hidden_size = 768 config.model.patches = ml_collections.ConfigDict() config.model.attention_config = ml_collections.ConfigDict() config.model.attention_config.type = 'spacetime' config.model.num_heads = 12 config.model.mlp_dim = 3072 config.model.num_layers = 12 config.model.representation_size = None config.model.classifier = 'gap' config.model.attention_dropout_rate = 0. config.model.dropout_rate = 0. config.model_dtype_str = 'float32'
config.model.temporal_encoding_config = ml_collections.ConfigDict() config.model.temporal_encoding_config.method = '3d_conv' config.model.patches.size = [16, 16, 2] config.model.temporal_encoding_config.kernel_init_method = 'central_frame_initializer' config.model.temporal_encoding_config.n_sampled_frames = 4 # Unused here.
config.trainer_name = 'mbt_trainer' config.optimizer = 'momentum' config.optimizer_configs = ml_collections.ConfigDict() config.l2_decay_factor = 0 config.max_grad_norm = 1 config.label_smoothing = 0.3 config.num_training_epochs = 50 config.batch_size = 64 config.rng_seed = 0 config.mixup = ml_collections.ConfigDict() config.mixup.alpha = 0.5 config.mixmod = False config.model.stochastic_droplayer_rate = 0.3
config.init_from = ml_collections.ConfigDict() config.init_from.model_config = None disable=line-too-long config.init_from.checkpoint_path = '/home/eth/models/scenic/scenic/projects/mbt/vit_base' config.init_from.checkpoint_format = 'scenic' config.init_from.model_config = ml_collections.ConfigDict() config.init_from.model_config.model = ml_collections.ConfigDict() config.init_from.model_config.model.classifier = 'token' # Specify if this is 'token' or 'gap'. pylint: disable=line-too-long config.init_from.restore_positional_embedding = True config.init_from.restore_input_embedding = True config.init_from.positional_embed_size_change = 'resize_tile'
steps_per_epoch = AUDIOSET_TRAIN_SIZE // config.batch_size total_steps = config.num_training_epochs steps_per_epoch config.lr_configs = ml_collections.ConfigDict() config.lr_configs.learning_rate_schedule = 'compound' config.lr_configs.factors = 'constant cosine_decay linear_warmup' config.lr_configs.warmup_steps = 2.5 steps_per_epoch config.lr_configs.steps_per_cycle = total_steps config.lr_configs.base_learning_rate = 5e-1
config.write_summary = True config.checkpoint = True # Do checkpointing. config.debug_train = False # Debug mode during training. config.debug_eval = False # Debug mode during eval. config.checkpoint_steps = 500 # Checkpoint more frequently than a val epoch. return config