Open ginward opened 5 years ago
The Error output is:
WARNING:tensorflow:From /Users/jinhuawang/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Train on 400 samples, validate on 45 samples
Epoch 1/30
400/400 [==============================] - 1s 1ms/step - loss: nan - val_loss: nan
Epoch 2/30
400/400 [==============================] - 0s 57us/step - loss: nan - val_loss: nan
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-6-9c6d6ed20ebe> in <module>
1 boot_est = BootstrapEstimator(deepIvEst, n_bootstrap_samples=2, n_jobs=1)
----> 2 boot_est.fit(Y=y,T=t,X=x,Z=z)
~/Dropbox (Cambridge University)/EconML/econml/bootstrap.py in fit(self, *args, **named_args)
56 Parallel(n_jobs=self._n_jobs, prefer='threads', verbose=3)(
57 (obj.fit, [arg[inds] for arg in args], {arg: named_args[arg][inds] for arg in named_args})
---> 58 for obj, inds in zip(self._instances, indices)
59 )
60 return self
~/anaconda3/lib/python3.6/site-packages/joblib-0.13.2-py3.6.egg/joblib/parallel.py in __call__(self, iterable)
919 # remaining jobs.
920 self._iterating = False
--> 921 if self.dispatch_one_batch(iterator):
922 self._iterating = self._original_iterator is not None
923
~/anaconda3/lib/python3.6/site-packages/joblib-0.13.2-py3.6.egg/joblib/parallel.py in dispatch_one_batch(self, iterator)
757 return False
758 else:
--> 759 self._dispatch(tasks)
760 return True
761
~/anaconda3/lib/python3.6/site-packages/joblib-0.13.2-py3.6.egg/joblib/parallel.py in _dispatch(self, batch)
714 with self._lock:
715 job_idx = len(self._jobs)
--> 716 job = self._backend.apply_async(batch, callback=cb)
717 # A job can complete so quickly than its callback is
718 # called before we get here, causing self._jobs to
~/anaconda3/lib/python3.6/site-packages/joblib-0.13.2-py3.6.egg/joblib/_parallel_backends.py in apply_async(self, func, callback)
180 def apply_async(self, func, callback=None):
181 """Schedule a func to be run"""
--> 182 result = ImmediateResult(func)
183 if callback:
184 callback(result)
~/anaconda3/lib/python3.6/site-packages/joblib-0.13.2-py3.6.egg/joblib/_parallel_backends.py in __init__(self, batch)
547 # Don't delay the application, to avoid keeping the input
548 # arguments in memory
--> 549 self.results = batch()
550
551 def get(self):
~/anaconda3/lib/python3.6/site-packages/joblib-0.13.2-py3.6.egg/joblib/parallel.py in __call__(self)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
~/anaconda3/lib/python3.6/site-packages/joblib-0.13.2-py3.6.egg/joblib/parallel.py in <listcomp>(.0)
223 with parallel_backend(self._backend, n_jobs=self._n_jobs):
224 return [func(*args, **kwargs)
--> 225 for func, args, kwargs in self.items]
226
227 def __len__(self):
~/Dropbox (Cambridge University)/EconML/econml/deepiv.py in fit(self, Y, T, X, Z)
329 model.compile(self._optimizer)
330 # TODO: do we need to give the user more control over other arguments to fit?
--> 331 model.fit([Z, X, T], [], **self._first_stage_options)
332
333 lm = response_loss_model(lambda t, x: self._h(t, x),
~/anaconda3/lib/python3.6/site-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)
1037 initial_epoch=initial_epoch,
1038 steps_per_epoch=steps_per_epoch,
-> 1039 validation_steps=validation_steps)
1040
1041 def evaluate(self, x=None, y=None,
~/anaconda3/lib/python3.6/site-packages/keras/engine/training_arrays.py in fit_loop(model, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)
215 for l, o in zip(out_labels, val_outs):
216 epoch_logs['val_' + l] = o
--> 217 callbacks.on_epoch_end(epoch, epoch_logs)
218 if callback_model.stop_training:
219 break
~/anaconda3/lib/python3.6/site-packages/keras/callbacks.py in on_epoch_end(self, epoch, logs)
77 logs = logs or {}
78 for callback in self.callbacks:
---> 79 callback.on_epoch_end(epoch, logs)
80
81 def on_batch_begin(self, batch, logs=None):
~/anaconda3/lib/python3.6/site-packages/keras/callbacks.py in on_epoch_end(self, epoch, logs)
555 print('Restoring model weights from the end of '
556 'the best epoch')
--> 557 self.model.set_weights(self.best_weights)
558
559 def on_train_end(self, logs=None):
~/anaconda3/lib/python3.6/site-packages/keras/engine/network.py in set_weights(self, weights)
502 for layer in self.layers:
503 num_param = len(layer.weights)
--> 504 layer_weights = weights[:num_param]
505 for sw, w in zip(layer.weights, layer_weights):
506 tuples.append((sw, w))
TypeError: 'NoneType' object is not subscriptable
It will work if I set:
restore_best_weights=False
But it will give NaN
value if I set restore_best_weights=True
, which I guess it is not technically working ...
It seems that scaling down the value will work:
x = np.random.uniform(low=0.0, high=1000.0, size=(445,1))
x=x/100
z = np.random.uniform(low=0.0, high=1000.0, size=(445,1))
z=z/100
So I guess there is a limit on the maximum of the value for the Deep IV module?
Thanks for reporting this - I'll try to take a closer look tomorrow.
I'm also getting NaNs, has this issues been resolved?
@yl3832 When I investigated this previously, it appeared to be dependent on the the network architecture and initialization (e.g. the weights need to have lower variance as additional layers are added). Unfortunately I don't see any easy way to avoid this since we let the user specify the network and some choices may lead to issues like exploding gradients.
@kbattocchi Thank you Keith, I did tried different architectures in my use-case and it sometimes works. Also, normalization always helps.
The following is a slightly modification to the DeepIV notebook, but it no longer works. I guess it is because of the fact that I increased the max value of the X and T, which caused the overflow somewhere:
Could you help and investigate why?