Tencent / NeuralNLP-NeuralClassifier

An Open-source Neural Hierarchical Multi-label Text Classification Toolkit
Other
1.83k stars 402 forks source link

IndexError: index 96 is out of bounds for axis 0 with size 96 #30

Closed BeerTai closed 4 years ago

BeerTai commented 4 years ago

你好,在跑多标签任务时,出现如下的错误:File "/home/NeuralNLP-NeuralClassifier-master/evaluate/classification_evaluate.py", line 267, in evaluate if prob_np[predict_label_idx[j]] > threshold:

SeekPoint commented 3 years ago

怎么解决的?

coderbyr commented 3 years ago

怎么解决的?

这种情况往往是标签索引超过字典大小导致,请检查标签字典是否正常生成;或提供更详细的配置信息,如任务类型、模型、关键配置

david0718 commented 3 years ago

我也遇到这种问题,也是在做多标签任务的时候。 样例数据看附件。 data.zip 看来这是个常见问题,还请多多帮助,谢谢。

david0718 commented 3 years ago

{ "task_info":{ "label_type": "multi_label", "hierarchical": false, "hierar_taxonomy": "data/rcv3.taxonomy", "hierar_penalty": 0.000001 }, "device": "cuda", "model_name": "TextCNN", "checkpoint_dir": "checkpoint_dir_rcv3", "model_dir": "trained_model_rcv3", "data": { "train_json_files": [ "data/rcv3_train.json" ], "validate_json_files": [ "data/rcv3_dev.json" ], "test_json_files": [ "data/rcv3_test.json" ], "generate_dict_using_json_files": true, "generate_dict_using_all_json_files": true, "generate_dict_using_pretrained_embedding": false, "generate_hierarchy_label": false, "dict_dir": "dict_rcv3", "num_worker": 4 }, "feature": { "feature_names": [ "token" ], "min_token_count": 2, "min_char_count": 2, "token_ngram": 0, "min_token_ngram_count": 0, "min_keyword_count": 0, "min_topic_count": 2, "max_token_dict_size": 1000000, "max_char_dict_size": 150000, "max_token_ngram_dict_size": 10000000, "max_keyword_dict_size": 100, "max_topic_dict_size": 100, "max_token_len": 256, "max_char_len": 1024, "max_char_len_per_token": 4, "token_pretrained_file": "", "keyword_pretrained_file": "" }, "train": { "batch_size": 64, "start_epoch": 1, "num_epochs": 5, "num_epochs_static_embedding": 0, "decay_steps": 1000, "decay_rate": 1.0, "clip_gradients": 100.0, "l2_lambda": 0.0, "loss_type": "BCEWithLogitsLoss", "sampler": "fixed", "num_sampled": 5, "visible_device_list": "0", "hidden_layer_dropout": 0.5 }, "embedding": { "type": "embedding", "dimension": 64, "region_embedding_type": "context_word", "region_size": 5, "initializer": "uniform", "fan_mode": "FAN_IN", "uniform_bound": 0.25, "random_stddev": 0.01, "dropout": 0.0 }, "optimizer": { "optimizer_type": "Adam", "learning_rate": 0.008, "adadelta_decay_rate": 0.95, "adadelta_epsilon": 1e-08 }, "TextCNN": { "kernel_sizes": [ 2, 3, 4 ], "num_kernels": 100, "top_k_max_pooling": 1 }, "TextRNN": { "hidden_dimension": 64, "rnn_type": "GRU", "num_layers": 1, "doc_embedding_type": "Attention", "attention_dimension": 16, "bidirectional": true }, "DRNN": { "hidden_dimension": 5, "window_size": 3, "rnn_type": "GRU", "bidirectional": true, "cell_hidden_dropout": 0.1 }, "eval": { "text_file": "data/rcv3_test.json", "threshold": 0.5, "dir": "eval_dir", "batch_size": 1024, "is_flat": true, "top_k": 96, "model_dir": "checkpoint_dir_rcv3/TextCNN_best" }, "TextVDCNN": { "vdcnn_depth": 9, "top_k_max_pooling": 8 }, "DPCNN": { "kernel_size": 3, "pooling_stride": 2, "num_kernels": 16, "blocks": 2 }, "TextRCNN": { "kernel_sizes": [ 2, 3, 4 ], "num_kernels": 100, "top_k_max_pooling": 1, "hidden_dimension":64, "rnn_type": "GRU", "num_layers": 1, "bidirectional": true }, "Transformer": { "d_inner": 128, "d_k": 32, "d_v": 32, "n_head": 4, "n_layers": 1, "dropout": 0.1, "use_star": true }, "AttentiveConvNet": { "attention_type": "bilinear", "margin_size": 3, "type": "advanced", "hidden_size": 64 }, "HMCN": { "hierarchical_depth": [0, 384, 384, 384, 384], "global2local": [0, 16, 192, 512, 64] }, "log": { "logger_file": "log_test_rcv3_hierar", "log_level": "warn" } }

hassaanseeker commented 2 years ago

Your top_k value in .\conf\train.json should be smaller or equal to the doc_labels value printed during training.