facebookresearch / mmf

A modular framework for vision & language multimodal research from Facebook AI Research (FAIR)
https://mmf.sh/
Other
5.49k stars 935 forks source link

Don't have enough hard disk memory for scene text visual question answering #672

Closed AMiiR-S closed 3 years ago

AMiiR-S commented 3 years ago

❓ Questions and Help

My Dataset is about 40GB and I think i need 350GB at the end Colab pro version can help me? or can i connect my google drive memory with Colab? """MY Code:

!python tools/run.py --tasks vqa --datasets textvqa --model s_mmgnn --config ensemble/foo/s_mmgnn.yml -dev cuda:0 --run_type train WARNING: Device specified is 'cuda' but cuda is not present. Switching to CPU version Logging to: ./save/vqa_textvqa_s_mmgnn/logs/vqa_textvqa_s_mmgnn_2020-11-01T12:55:58.log 2020-11-01T12:55:58 INFO: ===== Training Parameters ===== 2020-11-01T12:55:58 INFO:

{ "batch_size": 128, "clip_gradients": true, "clip_norm_mode": "all", "data_parallel": false, "device": "cpu", "distributed": false, "evalai_inference": false, "experiment_name": "run", "load_pretrained": false, "local_rank": null, "log_dir": "./logs", "log_interval": 100, "logger_level": "info", "lr_ratio": 0.5, "lr_scheduler": true, "lr_steps": [ 2500 ], "max_epochs": null, "max_grad_l2_norm": 0.25, "max_iterations": 5000, "metric_minimize": false, "monitored_metric": "vqa_accuracy", "num_workers": 4, "patience": 4000, "pin_memory": false, "pretrained_mapping": { "image_feature_embeddings_list": "image_feature_embeddings_list", "image_feature_encoders": "image_feature_encoders", "image_text_multi_modal_combine_layer": "image_text_multi_modal_combine_layer", "text_embeddings": "text_embeddings" }, "resume": false, "resume_file": null, "run_type": "train", "save_dir": "./save", "seed": null, "should_early_stop": false, "should_not_log": false, "snapshot_interval": 100, "task_size_proportional_sampling": true, "trainer": "base_trainer", "use_warmup": true, "verbose_dump": false, "warmup_factor": 0.2, "warmup_iterations": 1000 } 2020-11-01T12:55:58 INFO: ====== Task Attributes ====== 2020-11-01T12:55:58 INFO: ======== vqa/textvqa ======= 2020-11-01T12:55:58 INFO: { "data_root_dir": "../data", "fast_dir": "/content/mmgnn_textvqa/data", "fast_read": false, "features_max_len": 137, "image_depth_first": false, "image_features": { "test": [ "open_images/detectron_fix_100/fc6/test,open_images/resnet152/test" ], "train": [ "open_images/detectron_fix_100/fc6/train,open_images/resnet152/train" ], "val": [ "open_images/detectron_fix_100/fc6/train,open_images/resnet152/train" ] }, "imdb_files": { "test": [ "imdb/textvqa_0.5/imdb_textvqa_test_trimmed.npy" ], "train": [ "imdb/textvqa_0.5/imdb_textvqa_train_trimmed.npy" ], "val": [ "imdb/textvqa_0.5/imdb_textvqa_val_trimmed.npy" ] }, "processors": { "answer_processor": { "params": { "context_preprocessor": { "params": {}, "type": "simple_word" }, "max_length": 50, "num_answers": 10, "preprocessor": { "params": {}, "type": "simple_word" }, "vocab_file": "vocabs/answers_textvqa_more_than_1.txt" }, "type": "soft_copy_answer" }, "bbox_processor": { "params": { "max_length": 50 }, "type": "bbox" }, "context_processor": { "params": { "max_length": 50, "model_file": ".vector_cache/wiki.en.bin" }, "type": "fasttext" }, "ocr_token_processor": { "params": {}, "type": "simple_word" }, "text_processor": { "params": { "max_length": 14, "preprocessor": { "params": {}, "type": "simple_sentence" }, "vocab": { "embedding_name": "glove.6B.300d", "type": "intersected", "vocab_file": "vocabs/vocabulary_100k.txt" } }, "type": "vocab" } }, "return_info": true, "use_ocr": true, "use_ocr_info": true } 2020-11-01T12:55:58 INFO: ====== Optimizer Attributes ====== 2020-11-01T12:55:58 INFO: { "params": { "eps": 1e-08, "lr": 0.01, "weight_decay": 0 }, "type": "Adamax" } 2020-11-01T12:55:58 INFO: ====== Model (s_mmgnn) Attributes ====== 2020-11-01T12:55:58 INFO: { "bb_dim": 50, "code_name": "both_cooling", "context_embeddings": [ { "params": { "embedding_dim": 350 }, "type": "identity" } ], "context_feature_dim": 1200, "context_feature_embeddings": [ { "modal_combine": { "params": { "dropout": 0, "hidden_dim": 4096 }, "type": "non_linear_element_multiply" }, "normalization": "sigmoid", "transform": { "params": { "out_dim": 1 }, "type": "linear" } } ], "context_feature_encodings": [ { "params": {}, "type": "default" } ], "context_max_len": 50, "dropout": 0, "f_engineer": 4, "fsd": 300, "fvd": 2048, "image_feature_dim": 2048, "image_feature_embeddings": [ { "modal_combine": { "params": { "dropout": 0, "hidden_dim": 2048 }, "type": "non_linear_element_multiply" }, "normalization": "softmax", "transform": { "params": { "out_dim": 1 }, "type": "linear" } } ], "image_feature_encodings": [ { "params": { "bias_file": "detectron/fc6/fc7_b.pkl", "weights_file": "detectron/fc6/fc7_w.pkl" }, "type": "finetune_faster_rcnn_fpn_fc7" } ], "l_dim": 2048, "losses": [ { "type": "logit_bce" } ], "metrics": [ { "type": "vqa_accuracy" } ], "model_data_dir": "../data", "num_context_features": 1, "output": { "inter_dim": 4096 }, "s_gnn": { "inter_dim": 4096, "iteration": 1, "penalty": 25 }, "si_gnn": { "K": 12, "inter_dim": 128, "iteration": 1, "k_valve": 4, "penalty": 25 }, "text_embeddings": [ { "params": { "conv1_out": 512, "conv2_out": 2, "dropout": 0, "embedding_dim": 300, "hidden_dim": 1024, "kernel_size": 1, "num_layers": 1, "padding": 0 }, "type": "attention" } ] }

apsdehal commented 3 years ago

hi, @AMiiR93. The issues don't seem to be related to MMF repository and seems to be for a fork for it which we can't support actively.

apsdehal commented 3 years ago

but based on your error, you can probably connect the Google drive to store the extra features. We don't have first class support of Google Drive though.

hackgoofer commented 3 years ago

Closing as the recommendation is to use Google Drive to store features. Feel free to reopen and let us know how we can help. Thanks!