使用Hierar效果反而不如Flat

xjtuleeyf commented 2 years ago

首先，感谢您开源代码，我在用自己的数据实验的时候，发现同样的数据，在flat经过五轮训练后fscore能到达0.64，但是处理成层次化的数据之后，其fscore经过50轮的迭代也就在0.45左右徘徊，而且最优结果往往出现在10轮左右，后面的结果不升反降，实在是不清楚原因向您请教，以下这是我的conf文件，期待您的回复！ { "task_info": { "label_type": "multi_label", "hierarchical": true, "hierar_taxonomy": "my_data/hmlc.taxonomy", "hierar_penalty": 0.000001 }, "device": "cpu", "model_name": "TextRNN", "checkpoint_dir": "checkpoint_dir_rcv1", "model_dir": "trained_model_rcv1", "data": { "train_json_files": [ "my_data/hmlc_train.json" ], "validate_json_files": [ "my_data/hmlc_dev.json" ], "test_json_files": [ "my_data/hmlc_test.json" ], "generate_dict_using_json_files": true, "generate_dict_using_all_json_files": true, "generate_dict_using_pretrained_embedding": false, "generate_hierarchy_label": true, "dict_dir": "dict_hie", "num_worker": 4 }, "feature": { "feature_names": [ "token" ], "min_token_count": 2, "min_char_count": 2, "token_ngram": 0, "min_token_ngram_count": 0, "min_keyword_count": 0, "min_topic_count": 2, "max_token_dict_size": 1000000, "max_char_dict_size": 150000, "max_token_ngram_dict_size": 10000000, "max_keyword_dict_size": 100, "max_topic_dict_size": 100, "max_token_len": 256, "max_char_len": 1024, "max_char_len_per_token": 4, "token_pretrained_file": "", "keyword_pretrained_file": "" }, "train": { "batch_size": 256, "start_epoch": 1, "num_epochs": 50, "num_epochs_static_embedding": 0, "decay_steps": 1000, "decay_rate": 1.0, "clip_gradients": 100.0, "l2_lambda": 0.0, "loss_type": "BCEWithLogitsLoss", "sampler": "fixed", "num_sampled": 5, "visible_device_list": "0", "hidden_layer_dropout": 0.5 }, "embedding": { "type": "embedding", "dimension": 64, "region_embedding_type": "context_word", "region_size": 5, "initializer": "uniform", "fan_mode": "FAN_IN", "uniform_bound": 0.25, "random_stddev": 0.01, "dropout": 0.0 }, "optimizer": { "optimizer_type": "Adam", "learning_rate": 0.008, "adadelta_decay_rate": 0.95, "adadelta_epsilon": 1e-08 }, "TextCNN": { "kernel_sizes": [ 2, 3, 4 ], "num_kernels": 100, "top_k_max_pooling": 1 }, "TextRNN": { "hidden_dimension": 64, "rnn_type": "GRU", "num_layers": 1, "doc_embedding_type": "Attention", "attention_dimension": 16, "bidirectional": true }, "DRNN": { "hidden_dimension": 5, "window_size": 3, "rnn_type": "GRU", "bidirectional": true, "cell_hidden_dropout": 0.1 }, "eval": { "text_file": "my_data/hmlc_test.json", "threshold": 0.5, "dir": "eval_dir", "batch_size": 1024, "is_flat": true, "top_k": 100, "model_dir": "checkpoint_dir_rcv1/TextRNN_best" }, "TextVDCNN": { "vdcnn_depth": 9, "top_k_max_pooling": 8 }, "DPCNN": { "kernel_size": 3, "pooling_stride": 2, "num_kernels": 16, "blocks": 2 }, "TextRCNN": { "kernel_sizes": [ 2, 3, 4 ], "num_kernels": 100, "top_k_max_pooling": 1, "hidden_dimension":64, "rnn_type": "GRU", "num_layers": 1, "bidirectional": true }, "Transformer": { "d_inner": 128, "d_k": 32, "d_v": 32, "n_head": 4, "n_layers": 1, "dropout": 0.1, "use_star": true }, "AttentiveConvNet": { "attention_type": "bilinear", "margin_size": 3, "type": "advanced", "hidden_size": 64 }, "HMCN": { "hierarchical_depth": [0, 384, 384, 384, 384], "global2local": [0, 4, 55, 43, 1] }, "log": { "logger_file": "log_test_hierar", "log_level": "warn" } }

MagiaSN commented 2 years ago

你好，对于TextRNN模型，是否使用hierar的区别仅是loss是否加入hierarchical_penalty，是否有提升还要看具体数据

xjtuleeyf commented 2 years ago

非常感谢您的回答，我会在后续实验中继续调整，再问一下，我想要加入预训练词向量模型的格式大概是什么，第一次NLP做这方面的，还不太清楚

MagiaSN commented 2 years ago

非常感谢您的回答，我会在后续实验中继续调整，再问一下，我想要加入预训练词向量模型的格式大概是什么，第一次NLP做这方面的，还不太清楚

第一行是词数和词向量的维度，用空格分开；之后每行是一个词以及词向量中每个元素的值，用空格分开，例如：

400000 100
the -0.038194 -0.24487 0.72812 -0.39961 0.083172 0.043953 -0.39141 0.3344 -0.57545 0.087459 0.28787 -0.06731 0.30906 -0.26384 -0.13231 -0.20757 0.33395 -0.33848 -0.31743 -0.48336 0.1464 -0.37304 0.34577 0.052041 0.44946 -0.46971 0.02628 -0.54155 -0.15518 -0.14107 -0.039722 0.28277 0.14393 0.23464 -0.31021 0.086173 0.20397 0.52624 0.17164 -0.082378 -0.71787 -0.41531 0.20335 -0.12763 0.41367 0.55187 0.57908 -0.33477 -0.36559 -0.54857 -0.062892 0.26584 0.30205 0.99775 -0.80481 -3.0243 0.01254 -0.36942 2.2167 0.72201 -0.24978 0.92136 0.034514 0.46745 1.1079 -0.19358 -0.074575 0.23353 -0.052062 -0.22044 0.057162 -0.15806 -0.30798 -0.41625 0.37972 0.15006 -0.53212 -0.2055 -1.2526 0.071624 0.70565 0.49744 -0.42063 0.26148 -1.538 -0.30223 -0.073438 -0.28312 0.37104 -0.25217 0.016215 -0.017099 -0.38984 0.87424 -0.72569 -0.51058 -0.52028 -0.1459 0.8278 0.27062
, -0.10767 0.11053 0.59812 -0.54361 0.67396 0.10663 0.038867 0.35481 0.06351 -0.094189 0.15786 -0.81665 0.14172 0.21939 0.58505 -0.52158 0.22783 -0.16642 -0.68228 0.3587 0.42568 0.19021 0.91963 0.57555 0.46185 0.42363 -0.095399 -0.42749 -0.16567 -0.056842 -0.29595 0.26037 -0.26606 -0.070404 -0.27662 0.15821 0.69825 0.43081 0.27952 -0.45437 -0.33801 -0.58184 0.22364 -0.5778 -0.26862 -0.20425 0.56394 -0.58524 -0.14365 -0.64218 0.0054697 -0.35248 0.16162 1.1796 -0.47674 -2.7553 -0.1321 -0.047729 1.0655 1.1034 -0.2208 0.18669 0.13177 0.15117 0.7131 -0.35215 0.91348 0.61783 0.70992 0.23955 -0.14571 -0.37859 -0.045959 -0.47368 0.2385 0.20536 -0.18996 0.32507 -1.1112 -0.36341 0.98679 -0.084776 -0.54008 0.11726 -1.0194 -0.24424 0.12771 0.013884 0.080374 -0.35414 0.34951 -0.7226 0.37549 0.4441 -0.99059 0.61214 -0.35111 -0.83155 0.45293 0.082577
. -0.33979 0.20941 0.46348 -0.64792 -0.38377 0.038034 0.17127 0.15978 0.46619 -0.019169 0.41479 -0.34349 0.26872 0.04464 0.42131 -0.41032 0.15459 0.022239 -0.64653 0.25256 0.043136 -0.19445 0.46516 0.45651 0.68588 0.091295 0.21875 -0.70351 0.16785 -0.35079 -0.12634 0.66384 -0.2582 0.036542 -0.13605 0.40253 0.14289 0.38132 -0.12283 -0.45886 -0.25282 -0.30432 -0.11215 -0.26182 -0.22482 -0.44554 0.2991 -0.85612 -0.14503 -0.49086 0.0082973 -0.17491 0.27524 1.4401 -0.21239 -2.8435 -0.27958 -0.45722 1.6386 0.78808 -0.55262 0.65 0.086426 0.39012 1.0632 -0.35379 0.48328 0.346 0.84174 0.098707 -0.24213 -0.27053 0.045287 -0.40147 0.11395 0.0062226 0.036673 0.018518 -1.0213 -0.20806 0.64072 -0.068763 -0.58635 0.33476 -1.1432 -0.1148 -0.25091 -0.45907 -0.096819 -0.17946 -0.063351 -0.67412 -0.068895 0.53604 -0.87773 0.31802 -0.39242 -0.23394 0.47298 -0.028803
of -0.1529 -0.24279 0.89837 0.16996 0.53516 0.48784 -0.58826 -0.17982 -1.3581 0.42541 0.15377 0.24215 0.13474 0.41193 0.67043 -0.56418 0.42985 -0.012183 -0.11677 0.31781 0.054177 -0.054273 0.35516 -0.30241 0.31434 -0.33846 0.71715 -0.26855 -0.15837 -0.47467 0.051581 -0.33252 0.15003 -0.1299 -0.54617 -0.37843 0.64261 0.82187 -0.080006 0.078479 -0.96976 -0.57741 0.56491 -0.39873 -0.057099 0.19743 0.065706 -0.48092 -0.20125 -0.40834 0.39456 -0.02642 -0.11838 1.012 -0.53171 -2.7474 -0.042981 -0.74849 1.7574 0.59085 0.04885 0.78267 0.38497 0.42097 0.67882 0.10337 0.6328 -0.026595 0.58647 -0.44332 0.33057 -0.12022 -0.55645 0.073611 0.20915 0.43395 -0.012761 0.089874 -1.7991 0.084808 0.77112 0.63105 -0.90685 0.60326 -1.7515 0.18596 -0.50687 -0.70203 0.66578 -0.81304 0.18712 -0.018488 -0.26757 0.727 -0.59363 -0.34839 -0.56094 -0.591 1.0039 0.20664
to -0.1897 0.050024 0.19084 -0.049184 -0.089737 0.21006 -0.54952 0.098377 -0.20135 0.34241 -0.092677 0.161 -0.13268 -0.2816 0.18737 -0.42959 0.96039 0.13972 -1.0781 0.40518 0.50539 -0.55064 0.4844 0.38044 -0.0029055 -0.34942 -0.099696 -0.78368 1.0363 -0.2314 -0.47121 0.57126 -0.21454 0.35958 -0.48319 1.0875 0.28524 0.12447 -0.039248 -0.076732 -0.76343 -0.32409 -0.5749 -1.0893 -0.41811 0.4512 0.12112 -0.51367 -0.13349 -1.1378 -0.28768 0.16774 0.55804 1.5387 0.018859 -2.9721 -0.24216 -0.92495 2.1992 0.28234 -0.3478 0.51621 -0.43387 0.36852 0.74573 0.072102 0.27931 0.92569 -0.050336 -0.85856 -0.1358 -0.92551 -0.33991 -1.0394 -0.067203 -0.21379 -0.4769 0.21377 -0.84008 0.052536 0.59298 0.29604 -0.67644 0.13916 -1.5504 -0.20765 0.7222 0.52056 -0.076221 -0.15194 -0.13134 0.058617 -0.31869 -0.61419 -0.62393 -0.41548 -0.038175 -0.39804 0.47647 -0.15983
and -0.071953 0.23127 0.023731 -0.50638 0.33923 0.1959 -0.32943 0.18364 -0.18057 0.28963 0.20448 -0.5496 0.27399 0.58327 0.20468 -0.49228 0.19974 -0.070237 -0.88049 0.29485 0.14071 -0.1009 0.99449 0.36973 0.44554 0.28998 -0.1376 -0.56365 -0.029365 -0.4122 -0.25269 0.63181 -0.44767 0.24363 -0.10813 0.25164 0.46967 0.3755 -0.23613 -0.14129 -0.44537 -0.65737 -0.042421 -0.28636 -0.28811 0.063766 0.20281 -0.53542 0.41307 -0.59722 -0.38614 0.19389 -0.17809 1.6618 -0.011819 -2.3737 0.058427 -0.2698 1.2823 0.81925 -0.22322 0.72932 -0.053211 0.43507 0.85011 -0.42935 0.92664 0.39051 1.0585 -0.24561 -0.18265 -0.5328 0.059518 -0.66019 0.18991 0.28836 -0.2434 0.52784 -0.65762 -0.14081 1.0491 0.5134 -0.23816 0.69895 -1.4813 -0.2487 -0.17936 -0.059137 -0.08056 -0.48782 0.014487 -0.6259 -0.32367 0.41862 -1.0807 0.46742 -0.49931 -0.71895 0.86894 0.19539
...

Tencent / NeuralNLP-NeuralClassifier

使用Hierar效果反而不如Flat #111