Closed nitinajithkumar closed 3 years ago
@nitinajithkumar How you are running the train.py? Also, check config.json and entities folder for your custom dataset
Hi @AtulKumar4 , Thanks for responding. Since I dont have system for distributed processing nor do i have a GPU. I am running it using "python train.py" with all the attributes i need as default (just testing) .
Here is a snippet:
if __name__ == '__main__':
args = argparse.ArgumentParser(description='PyTorch PICK Distributed Training')
args.add_argument('-c', '--config', default="D:\PICK-pytorch\config.json", type=str,
help='config file path (default: None)')
args.add_argument('-r', '--resume', default=None, type=str,
help='path to latest checkpoint (default: None)')
args.add_argument('-d', '--device', default='0', type=str,
help='indices of GPUs to be available (default: all)')
# custom cli options to modify configuration from default values given in json file.
CustomArgs = collections.namedtuple('CustomArgs', 'flags default type target help') # CustomArgs.flags, CustomArgs.default
options = [
# CustomArgs(['--lr', '--learning_rate'], default=0.0001, type=float, target='optimizer;args;lr',
# help='learning rate (default: 0.0001)'),
CustomArgs(['--bs', '--batch_size'], default=2, type=int, target='train_data_loader;args;batch_size',
help='batch size (default: 2)'),
# CustomArgs(['--ng', '--n_gpu'], default=2, type=int, target='n_gpu',
# help='num of gpu (default: 2)'),
CustomArgs(['-dist', '--distributed'], default='false', type=str, target='distributed',
help='run distributed training. (true or false, default: true)'),
CustomArgs(['--local_world_size'], default=1, type=int, target='local_world_size',
help='the number of processes running on each node, this is passed in explicitly '
'and is typically either $1$ or the number of GPUs per node. (default: 1)'),
CustomArgs(['--local_rank'], default=0, type=int, target='local_rank',
help='this is automatically passed in via torch.distributed.launch.py, '
'process will be assigned a local rank ID in [0, local_world_size-1]. (default: 0)')
]
Quoting my Config,json here: { "name": "PICK_Default", "run_id":"test",
"distributed":false,
"local_world_size":1,
"local_rank":-1,
"model_arch": {
"type": "PICKModel",
"args": {
"embedding_kwargs":{
"num_embeddings": -1,
"embedding_dim": 512
},
"encoder_kwargs":{
"char_embedding_dim":-1,
"out_dim": 512,
"nheaders": 4,
"nlayers": 3,
"feedforward_dim": 1024,
"dropout": 0.1,
"image_encoder": "resnet50",
"roi_pooling_mode": "roi_align",
"roi_pooling_size": [7,7]
},
"graph_kwargs":{
"in_dim":-1,
"out_dim":-1,
"eta": 1,
"gamma": 1,
"learning_dim": 128,
"num_layers": 2
},
"decoder_kwargs":{
"bilstm_kwargs":{
"input_size": -1,
"hidden_size": 512,
"num_layers": 2,
"dropout": 0.1,
"bidirectional": true,
"batch_first": true
},
"mlp_kwargs":{
"in_dim": -1,
"out_dim": -1,
"dropout": 0.1
},
"crf_kwargs":{
"num_tags":-1
}
}
}
},
"train_dataset": {
"type": "PICKDataset",
"args": {
"files_name":"D:\\PICK-pytorch\\data\\train\\train_samples_list.csv",
"boxes_and_transcripts_folder":"D:\\PICK-pytorch\\data\\train\\boxes_and_transcripts\\",
"images_folder":"D:\\PICK-pytorch\\data\\train\\images\\",
"entities_folder":"D:\\PICK-pytorch\\data\\train\\entities\\",
"iob_tagging_type":"box_level",
"resized_image_size": [100, 100],
"ignore_error": false
}
},
"validation_dataset": {
"type": "PICKDataset",
"args": {
"files_name":"D:\\PICK-pytorch\\data\\train\\train_samples_list.csv",
"boxes_and_transcripts_folder":"D:\\PICK-pytorch\\data\\train\\boxes_and_transcripts\\",
"images_folder":"D:\\PICK-pytorch\\data\\train\\images\\",
"entities_folder":"D:\\PICK-pytorch\\data\\train\\entities\\",
"iob_tagging_type":"box_level",
"resized_image_size": [100,100],
"ignore_error": false
}
},
"train_data_loader": {
"type": "DataLoader",
"args":{
"batch_size": 1,
"shuffle": true,
"drop_last": false,
"num_workers": 0,
"pin_memory":true
}
},
"val_data_loader": {
"type": "DataLoader",
"args":{
"batch_size": 1,
"shuffle": false,
"drop_last": false,
"num_workers": 0,
"pin_memory":true
}
},
"optimizer": {
"type": "Adam",
"args":{
"lr": 0.0001,
"weight_decay": 0,
"amsgrad": true
}
},
"lr_scheduler": {
"type": "StepLR",
"args": {
"step_size": 30,
"gamma": 0.1
}
},
"trainer": {
"epochs": 1,
"gl_loss_lambda": 0.01,
"log_step_interval": 5,
"val_step_interval": 10,
"save_dir": "D:\\PICK-pytorch\\saved\\",
"save_period": 1,
"log_verbosity": 2,
"monitor": "max overall-mEF",
"monitor_open": false,
"early_stop": 40,
"anomaly_detection": false,
"tensorboard": false,
"sync_batch_norm":true
}
}
entities folder is done as per their sample.
It could be a problem with windows, I would recommend using Google Colab, its free and they give you a machine with GPU and ubuntu ready to use.
Sure. I think thats the issue as well. I managed to get it running on Ubuntu. Thanks a lot.
When trying to run train.py for custom dataset, I got a type error. The custom dataset has been prepared properly as mentioned in the description. I am using Windows OS and my python version is 3.9 . Please help with this error. here is the traceback:
Traceback (most recent call last): File "d:\PICK-pytorch\train.py", line 14, in
from .graph import GLCN
File "d:\PICK-pytorch\model\graph.py", line 13, in
from data_utils import documents
File "d:\PICK-pytorch\data_utils\documents.py", line 16, in
from utils.entities_list import Entities_list
File "d:\PICK-pytorch\utils__init__.py", line 3, in
from .util import *
File "d:\PICK-pytorch\utils\util.py", line 11, in
from .class_utils import keys_vocab_cls, iob_labels_vocab_cls
File "d:\PICK-pytorch\utils\class_utils.py", line 62, in
keys_vocab_cls = ClassVocab(classes=Path(file).parent.joinpath('keys.txt'))#, specials_first=False)
File "d:\PICK-pytorch\utils\class_utils.py", line 44, in init
super().init(c,specials=specials,**kwargs)
TypeError: init() got an unexpected keyword argument 'specials'
import model.pick as pick_arch_module File "d:\PICK-pytorch\model\pick.py", line 12, in
Thanks in advance for your responses.