taoshen58 / ReSAN

Apache License 2.0
27 stars 6 forks source link

it works well with the original config, however, after some config changes, it crashed: #3

Open andyyuan78 opened 6 years ago

andyyuan78 commented 6 years ago

--- a/SICK_rl_pub/configs.py +++ b/SICK_rl_pub/configs.py @@ -19,20 +19,20 @@ class Configs(object): parser.add_argument('--network_type', type=str, default='test', help='None') parser.add_argument('--log_period', type=int, default=2000, help='log_period') parser.add_argument('--eval_period', type=int, default=500, help='eval_period')


test_batch_size: 100 load_model: False highway_layer_num: 3

Trying to load processed data from /gruntdata/app_data//ReSAN/SICK_rl_pub/result/processed_data/processed_lw_True_ugut_True_gc_6B_wel_300.pickle Have found the file, loading... Done

building resan neural network structure... regularization var num: 14 trainable var num: 38 Trainable Parameters Number: 2527207 Traceback (most recent call last): File "/home//.local/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1139, in _do_call return fn(*args) "nohup.out" 153L, 10057C 65,1 39% File "/home//.local/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 789, in run run_metadata_ptr) File "/home//.local/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 997, in _run feed_dict_string, options, run_metadata) File "/home//.local/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1132, in _do_run target_list, options, run_metadata) File "/home//.local/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1152, in _do_call raise type(e)(node_def, op, message) tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [256,5] vs. [256] [[Node: resan/sub = Sub[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/gpu:0"](resan/output/Reshape_1, _arg_resan/gold_label_0_0/_125)]] [[Node: resan/gradients/resan/emb/gene_token_emb_mat/concat_grad/tuple/control_dependency_1/_273 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_2041_resan/gradients/resan/emb/gene_token_emb_mat/concat_grad/tuple/control_dependency_1", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

Caused by op 'resan/sub', defined at: File "sick_rl_main.py", line 127, in tf.app.run() File "/home/.local/lib/python3.6/site-packages/tensorflow/python/platform/app.py", line 48, in run _sys.exit(main(_sys.argv[:1] + flags_passthrough)) File "sick_rl_main.py", line 112, in main train() File "sick_rl_main.py", line 49, in train len(train_data_obj.dicts['char']), train_data_obj.max_lens['token'], scope.name) File "/gruntdata/app_data//ReSAN/SICK_rl_pub/src/model/model_resan.py", line 23, in init self.update_tensor_add_ema_and_opt() File "/gruntdata/app_data//ReSAN/SICK_rl_pub/src/model/model_template.py", line 169, in update_tensor_add_ema_and_opt self.loss_sl, self.loss_rl = self.build_loss() File "/gruntdata/app_data//ReSAN/SICK_rl_pub/src/model/model_template.py", line 105, in build_loss cost_batch = 0.5 * (self.logits - self.gold_label) ** 2 File "/home//.local/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 838, in binary_op_wrapper return func(x, y, name=name) File "/home//.local/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 2501, in _sub result = _op_def_lib.apply_op("Sub", x=x, y=y, name=name) File "/home//.local/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op op_def=op_def) File "/home//.local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2506, in create_op original_op=self._default_original_op, op_def=op_def) File "/home//.local/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1269, in init self._traceback = _extract_stack()

InvalidArgumentError (see above for traceback): Incompatible shapes: [256,5] vs. [256] [[Node: resan/sub = Sub[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/gpu:0"](resan/output/Reshape_1, _arg_resan/gold_label_0_0/_125)]] [[Node: resan/gradients/resan/emb/gene_token_emb_mat/concat_grad/tuple/control_dependency_1/_273 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_2041_resan/gradients/resan/emb/gene_token_emb_mat/concat_grad/tuple/control_dependency_1", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

taoshen58 commented 6 years ago

Could u give me the detail of your change?

andyyuan78 commented 6 years ago

from parser.add_argument('--gpu', type=int, default=0, help='eval_period') parser.add_argument('--use_mse', type='bool', default=True, help='load specified step') parser.add_argument('--mse_logits', type='bool', default=False, help='load specified step') parser.add_argument('--train_batch_size', type=int, default=128, help='Train Batch Size') parser.add_argument('--highway_layer_num', type=int, default=2, help='highway layer number(abandoned)') parser.add_argument('--fine_tune', type='bool', default=False, help='(abandoned, keep False)') parser.add_argument('--batch_norm', type='bool', default=False, help='(abandoned, keep False)') parser.add_argument('--end_only_rl', type=int, default=6500, help='end only rl') parser.add_argument('--step_for_sl', type=int, default=1000, help='alternate step') parser.add_argument('--step_for_rl', type=int, default=1000, help='alternate step')

to

parser.add_argument('--gpu', type=str, default='0,1', help='eval_period') parser.add_argument('--use_mse', type='bool', default=False, help='load specified step') parser.add_argument('--mse_logits', type='bool', default=True, help='load specified step') parser.add_argument('--train_batch_size', type=int, default=256, help='Train Batch Size') parser.add_argument('--highway_layer_num', type=int, default=3, help='highway layer number(abandoned)') parser.add_argument('--fine_tune', type='bool', default=True, help='(abandoned, keep False)') parser.add_argument('--batch_norm', type='bool', default=True, help='(abandoned, keep False)') parser.add_argument('--end_only_rl', type=int, default=16500, help='end only rl') parser.add_argument('--step_for_sl', type=int, default=500, help='alternate step') parser.add_argument('--step_for_rl', type=int, default=500, help='alternate step')