I am using my own gym env to test PILCO as a baseline, while this problem always occurs after about 2-3 iterations.
File "/home/lab/Github/PILCO/examples/gym_tracking_tendon.py", line 196, in <module>
pilco.optimize_policy()
File "/home/lab/Github/PILCO/pilco/models/pilco.py", line 96, in optimize_policy
try:
File "/home/lab/anaconda3/envs/pilco/lib/python3.7/site-packages/gpflow-2.0.0-py3.7.egg/gpflow/optimizers/scipy.py", line 73, in minimize
func, initial_params, jac=True, method=method, **scipy_kwargs
File "/home/lab/anaconda3/envs/pilco/lib/python3.7/site-packages/scipy/optimize/_minimize.py", line 610, in minimize
callback=callback, **options)
File "/home/lab/anaconda3/envs/pilco/lib/python3.7/site-packages/scipy/optimize/lbfgsb.py", line 345, in _minimize_lbfgsb
f, g = func_and_grad(x)
File "/home/lab/anaconda3/envs/pilco/lib/python3.7/site-packages/scipy/optimize/lbfgsb.py", line 295, in func_and_grad
f = fun(x, *args)
File "/home/lab/anaconda3/envs/pilco/lib/python3.7/site-packages/scipy/optimize/optimize.py", line 327, in function_wrapper
return function(*(wrapper_args + args))
File "/home/lab/anaconda3/envs/pilco/lib/python3.7/site-packages/scipy/optimize/optimize.py", line 65, in __call__
fg = self.fun(x, *args)
File "/home/lab/anaconda3/envs/pilco/lib/python3.7/site-packages/gpflow-2.0.0-py3.7.egg/gpflow/optimizers/scipy.py", line 95, in _eval
loss, grad = _tf_eval(tf.convert_to_tensor(x))
File "/home/lab/anaconda3/envs/pilco/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py", line 568, in __call__
result = self._call(*args, **kwds)
File "/home/lab/anaconda3/envs/pilco/lib/python3.7/site-packages/tensorflow_core/python/eager/def_function.py", line 638, in _call
return self._concrete_stateful_fn._filtered_call(canon_args, canon_kwds) # pylint: disable=protected-access
File "/home/lab/anaconda3/envs/pilco/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py", line 1611, in _filtered_call
self.captured_inputs)
File "/home/lab/anaconda3/envs/pilco/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py", line 1692, in _call_flat
ctx, args, cancellation_manager=cancellation_manager))
File "/home/lab/anaconda3/envs/pilco/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py", line 545, in call
ctx=ctx)
File "/home/lab/anaconda3/envs/pilco/lib/python3.7/site-packages/tensorflow_core/python/eager/execute.py", line 67, in quick_execute
six.raise_from(core._status_to_exception(e.code, message), None)
File "<string>", line 3, in raise_from
tensorflow.python.framework.errors_impl.InvalidArgumentError: Cholesky decomposition was not successful. The input might not be valid.
[[{{node while/body/_1/Cholesky}}]] [Op:__inference__tf_eval_1032837]
if __name__ == '__main__':
env = TendonGymEnv()
e = np.array([[1]]) # Max control input. Set too low can lead to Cholesky failures.
X, Y, _, _ = rollout(env=env, pilco=None, random=True, timesteps=40, render=False)
for i in range(1, 5):
X_, Y_, _, _ = rollout(env=env, pilco=None, random=True, timesteps=40, render=False)
X = np.vstack((X, X_))
Y = np.vstack((Y, Y_))
state_dim = Y.shape[1]
control_dim = X.shape[1] - state_dim
# controller = RbfController(state_dim=state_dim, control_dim=control_dim, num_basis_functions=10)
controller = LinearController(state_dim=state_dim, control_dim=control_dim)
pilco = PILCO((X, Y), controller=controller, horizon=40)
pilco.controller.max_action = e
# # for numerical stability
# for model in pilco.mgpr.models:
# model.likelihood.variance.assign(0.001)
# set_trainable(model.likelihood.variance, False)
# model.likelihood.fixed=True
return_lst = []
for rollouts in range(100):
print("**** ITERATION no.", rollouts, " ****")
try:
pilco.optimize_models()
except:
pdb.set_trace()
pilco.optimize_policy()
# import pdb
# pdb.set_trace()
X_new, Y_new, _, sum_return = rollout(env=env, pilco=pilco, timesteps=300, render=False)
return_lst.append(sum_return)
# Update dataset
X = np.vstack((X, X_new))
Y = np.vstack((Y, Y_new))
pilco.mgpr.set_data((X, Y))
And I debug the input X and Y carefully, there is no NaN in the array. This is bothering me for a long time, so I wonder if you can give me a favor, I will appreciate that very much.