Open JustinS6626 opened 2 years ago
Would it be possible to post a minimal example that demonstrates the problem?
In the meantime you could try one of ways torch
allows to set the default tensor device.
I tried using a modified version of one of the quimb examples:
import quimb as qu
import quimb.tensor as qtn
from quimb.tensor.optimize import TNOptimizer
def single_qubit_layer(circ, gate_round=None):
"""Apply a parametrizable layer of single qubit ``U3`` gates.
"""
for i in range(circ.N):
# initialize with random parameters
params = qu.randn(3, dist='uniform')
circ.apply_gate(
'U3', *params, i,
gate_round=gate_round, parametrize=True)
def two_qubit_layer(circ, gate2='CZ', reverse=False, gate_round=None):
"""Apply a layer of constant entangling gates.
"""
regs = range(0, circ.N - 1)
if reverse:
regs = reversed(regs)
for i in regs:
circ.apply_gate(
gate2, i, i + 1, gate_round=gate_round)
def ansatz_circuit(n, depth, gate2='CZ', **kwargs):
"""Construct a circuit of single qubit and entangling layers.
"""
circ = qtn.Circuit(n, **kwargs)
for r in range(depth):
# single qubit gate layer
single_qubit_layer(circ, gate_round=r)
# alternate between forward and backward CZ layers
two_qubit_layer(
circ, gate2=gate2, gate_round=r, reverse=r % 2 == 0)
# add a final single qubit layer
single_qubit_layer(circ, gate_round=r + 1)
return circ
n = 6
depth = 9
gate2 = 'CZ'
circ = ansatz_circuit(n, depth, gate2=gate2)
H = qu.ham_ising(n, jz=1.0, bx=0.7, cyclic=False)
# the propagator for the hamiltonian
t = 2
U_dense = qu.expm(-1j * t * H)
# 'tensorized' version of the unitary propagator
U = qtn.Tensor(
data=U_dense.reshape([2] * (2 * n)),
inds=[f'k{i}' for i in range(n)] + [f'b{i}' for i in range(n)],
tags={'U_TARGET'}
)
V = circ.uni
def loss(V, U):
return 1 - abs((V.H & U).contract(all, optimize='auto-hq')) / 2**n
# check our current unitary 'infidelity':
loss(V, U)
tnopt = qtn.TNOptimizer(
V, # the tensor network we want to optimize
loss, # the function we want to minimize
loss_constants={'U': U}, # supply U to the loss function as a constant TN
tags=['U3'],
loss_target=0, # only optimize U3 tensors
autodiff_backend='torch', # use 'autograd' for non-compiled optimization
optimizer='rmsprop' # the optimization algorithm
)
V_opt = tnopt.optimize_basinhopping(n=500, nhop=10)
V_opt_dense = V_opt.to_dense([f'k{i}' for i in range(n)], [f'b{i}' for i in range(n)])
psi0 = qu.rand_ket(2**n)
psif_exact = U_dense @ psi0
psif_apprx = V_opt_dense @ psi0
circ.update_params_from(V_opt)
and from that I got
Traceback (most recent call last):
File "/home/justin/darthmallocsarchive-svn/trunk/DissertationExperiment/quimbOptTest.py", line 81, in <module>
V_opt = tnopt.optimize_basinhopping(n=500, nhop=10)
File "/usr/local/lib/python3.8/dist-packages/quimb/tensor/optimize.py", line 946, in optimize_basinhopping
self.res = basinhopping(
File "/usr/local/lib/python3.8/dist-packages/scipy/optimize/_basinhopping.py", line 680, in basinhopping
bh = BasinHoppingRunner(x0, wrapped_minimizer, take_step_wrapped,
File "/usr/local/lib/python3.8/dist-packages/scipy/optimize/_basinhopping.py", line 72, in __init__
minres = minimizer(self.x)
File "/usr/local/lib/python3.8/dist-packages/scipy/optimize/_basinhopping.py", line 284, in __call__
return self.minimizer(self.func, x0, **self.kwargs)
File "/usr/local/lib/python3.8/dist-packages/scipy/optimize/_minimize.py", line 597, in minimize
return method(fun, x0, args=args, jac=jac, hess=hess, hessp=hessp,
File "/usr/local/lib/python3.8/dist-packages/quimb/tensor/optimize.py", line 501, in __call__
g = jac(x)
File "/usr/local/lib/python3.8/dist-packages/scipy/optimize/optimize.py", line 78, in derivative
self._compute_if_needed(x, *args)
File "/usr/local/lib/python3.8/dist-packages/scipy/optimize/optimize.py", line 68, in _compute_if_needed
fg = self.fun(x, *args)
File "/usr/local/lib/python3.8/dist-packages/quimb/tensor/optimize.py", line 846, in vectorized_value_and_grad
ag_result, ag_grads = self.handler.value_and_grad(arrays)
File "/usr/local/lib/python3.8/dist-packages/quimb/tensor/optimize.py", line 337, in value_and_grad
result = self._backend_fn(variables)
File "/usr/local/lib/python3.8/dist-packages/quimb/tensor/optimize.py", line 695, in __call__
return self.loss_fn(self.norm_fn(tn_compute))
File "/home/justin/darthmallocsarchive-svn/trunk/DissertationExperiment/quimbOptTest.py", line 66, in loss
return 1 - abs((V.H & U).contract(all, optimize='auto-hq')) / 2**n
File "/usr/local/lib/python3.8/dist-packages/quimb/tensor/tensor_core.py", line 5637, in contract
return tensor_contract(*self, **opts)
File "/usr/local/lib/python3.8/dist-packages/quimb/tensor/tensor_core.py", line 545, in tensor_contract
o_array = expression(*(t.data for t in tensors), backend=backend)
File "/usr/local/lib/python3.8/dist-packages/opt_einsum/contract.py", line 763, in __call__
return self._contract(ops, out, backend, evaluate_constants=evaluate_constants)
File "/usr/local/lib/python3.8/dist-packages/opt_einsum/contract.py", line 693, in _contract
return _core_contract(list(arrays),
File "/usr/local/lib/python3.8/dist-packages/opt_einsum/contract.py", line 573, in _core_contract
new_view = _tensordot(*tmp_operands, axes=(tuple(left_pos), tuple(right_pos)), backend=backend)
File "/usr/local/lib/python3.8/dist-packages/opt_einsum/sharing.py", line 131, in cached_tensordot
return tensordot(x, y, axes, backend=backend)
File "/usr/local/lib/python3.8/dist-packages/opt_einsum/contract.py", line 374, in _tensordot
return fn(x, y, axes=axes)
File "/usr/local/lib/python3.8/dist-packages/opt_einsum/backends/torch.py", line 54, in tensordot
return torch.tensordot(x, y, dims=axes)
File "/usr/local/lib/python3.8/dist-packages/torch/functional.py", line 1002, in tensordot
return _VF.tensordot(a, b, dims_a, dims_b) # type: ignore
RuntimeError: Tensor for 'out' is on CPU, Tensor for argument #1 'self' is on CPU, but expected them to be on GPU (while checking arguments for addmm)
Does this mean that if you try to run pytorch using the GPU once, it sets that device choice as an environmental variable? If so, then I'm afraid I'm not clear on how to switch back to CPU.
Yeah the problem seems to be that call torch.tensor
on e.g. a list no longer propagates the device and requires_grad
attributes of the scalars:
import torch
x = torch.tensor(2.0, requires_grad=True, device='cuda')
y = torch.tensor(3.0, requires_grad=True, device='cuda')
z = torch.tensor( # same for as_tensor
[[x, y],
[y, 0]],
)
z.device, z.requires_grad
# (device(type='cpu'), False)
need to look into how to get around this.
Here's a workaround for now:
import torch
import autoray as ar
def _nd_peek(x):
"""Return the first element, if any, of nested
iterable ``x`` that is a ``torch.Tensor``.
"""
if isinstance(x, torch.Tensor):
return x
elif isinstance(x, (tuple, list)):
for el in x:
res = _nd_peek(el)
if res:
return res
def _nd_stack(x, device):
"""Recursively stack ``x`` into ``torch.Tensor``,
creating any constant elements encountered on ``device``.
"""
if isinstance(x, (tuple, list)):
return torch.stack([_nd_stack(el, device) for el in x])
elif isinstance(x, torch.Tensor):
# torch element
return x
else:
# torch doesn't like you mixing devices
# so create constant elements correctly
return torch.tensor(x, device=device)
def torch_array(x):
"""Convert ``x`` into ``torch.Tensor`` respecting the device
and gradient requirements of scalars.
"""
# work out if we should propagate a device
any_torch_el = _nd_peek(x)
if any_torch_el is not None:
device = any_torch_el.device
else:
device = None
return _nd_stack(x, device)
ar.register_function('torch', 'array', torch_array)
if you call this first, then it should work.
Thank you very much for getting back to me about that. I tried that code in the toy example and it worked, but in my main program, I got the following error:
Traceback (most recent call last):
File "/home/justin/darthmallocsarchive-svn/trunk/DissertationExperiment/TensorNetworkRL/MERA_Frozen_Lake.py", line 383, in <module>
timestep_reward, iter_dex, iter_reward, iter_total_steps, MERA_circuit, MERA_bias = deep_TN_Learning(alpha, gamma, epsilon, 1, episodes, max_steps, n_tests, test=False)
File "/home/justin/darthmallocsarchive-svn/trunk/DissertationExperiment/TensorNetworkRL/MERA_Frozen_Lake.py", line 342, in deep_TN_Learning
MERA_circuit.optimize(MERA_RL_loss, MERA_bias, in_states, in_actions, TN_target, ['U3', 'RX', 'RY'])
File "/home/justin/darthmallocsarchive-svn/trunk/DissertationExperiment/TensorNetworkRL/RLMera.py", line 361, in optimize
V_opt = RLOpt.optimize_basinhopping(n=10, nhop=10)
File "/usr/local/lib/python3.8/dist-packages/quimb/tensor/optimize.py", line 946, in optimize_basinhopping
self.res = basinhopping(
File "/usr/local/lib/python3.8/dist-packages/scipy/optimize/_basinhopping.py", line 680, in basinhopping
bh = BasinHoppingRunner(x0, wrapped_minimizer, take_step_wrapped,
File "/usr/local/lib/python3.8/dist-packages/scipy/optimize/_basinhopping.py", line 72, in __init__
minres = minimizer(self.x)
File "/usr/local/lib/python3.8/dist-packages/scipy/optimize/_basinhopping.py", line 284, in __call__
return self.minimizer(self.func, x0, **self.kwargs)
File "/usr/local/lib/python3.8/dist-packages/scipy/optimize/_minimize.py", line 597, in minimize
return method(fun, x0, args=args, jac=jac, hess=hess, hessp=hessp,
File "/usr/local/lib/python3.8/dist-packages/quimb/tensor/optimize.py", line 501, in __call__
g = jac(x)
File "/usr/local/lib/python3.8/dist-packages/scipy/optimize/optimize.py", line 78, in derivative
self._compute_if_needed(x, *args)
File "/usr/local/lib/python3.8/dist-packages/scipy/optimize/optimize.py", line 68, in _compute_if_needed
fg = self.fun(x, *args)
File "/usr/local/lib/python3.8/dist-packages/quimb/tensor/optimize.py", line 846, in vectorized_value_and_grad
ag_result, ag_grads = self.handler.value_and_grad(arrays)
File "/usr/local/lib/python3.8/dist-packages/quimb/tensor/optimize.py", line 337, in value_and_grad
result = self._backend_fn(variables)
File "/usr/local/lib/python3.8/dist-packages/quimb/tensor/optimize.py", line 695, in __call__
return self.loss_fn(self.norm_fn(tn_compute))
File "/home/justin/darthmallocsarchive-svn/trunk/DissertationExperiment/TensorNetworkRL/MERA_Frozen_Lake.py", line 234, in MERA_RL_loss
expectation = action_expect_i(psi, ops, lightcone_tags, bias)[action]
File "/home/justin/darthmallocsarchive-svn/trunk/DissertationExperiment/TensorNetworkRL/RLMera.py", line 558, in action_expect_i
exp_array = np.asarray(exp_vals)
File "/usr/local/lib/python3.8/dist-packages/torch/tensor.py", line 621, in __array__
return self.numpy()
RuntimeError: Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead.
Again, its much harder to help with only the error traceback, but I suspect you might need to call:
import autoray as ar
...
exp_array = ar.do('array', exp_vals)
rather than try and call numpy on torch arrays.
Sorry about that. I added the two lines that you suggested, but I got a different error:
Traceback (most recent call last):
File "/home/justin/darthmallocsarchive-svn/trunk/DissertationExperiment/TensorNetworkRL/MERA_Frozen_Lake.py", line 383, in <module>
timestep_reward, iter_dex, iter_reward, iter_total_steps, MERA_circuit, MERA_bias = deep_TN_Learning(alpha, gamma, epsilon, 1, episodes, max_steps, n_tests, test=False)
File "/home/justin/darthmallocsarchive-svn/trunk/DissertationExperiment/TensorNetworkRL/MERA_Frozen_Lake.py", line 342, in deep_TN_Learning
MERA_circuit.optimize(MERA_RL_loss, MERA_bias, in_states, in_actions, TN_target, ['U3', 'RX', 'RY'])
File "/home/justin/darthmallocsarchive-svn/trunk/DissertationExperiment/TensorNetworkRL/RLMera.py", line 362, in optimize
V_opt = RLOpt.optimize_basinhopping(n=10, nhop=10)
File "/usr/local/lib/python3.8/dist-packages/quimb/tensor/optimize.py", line 946, in optimize_basinhopping
self.res = basinhopping(
File "/usr/local/lib/python3.8/dist-packages/scipy/optimize/_basinhopping.py", line 680, in basinhopping
bh = BasinHoppingRunner(x0, wrapped_minimizer, take_step_wrapped,
File "/usr/local/lib/python3.8/dist-packages/scipy/optimize/_basinhopping.py", line 72, in __init__
minres = minimizer(self.x)
File "/usr/local/lib/python3.8/dist-packages/scipy/optimize/_basinhopping.py", line 284, in __call__
return self.minimizer(self.func, x0, **self.kwargs)
File "/usr/local/lib/python3.8/dist-packages/scipy/optimize/_minimize.py", line 597, in minimize
return method(fun, x0, args=args, jac=jac, hess=hess, hessp=hessp,
File "/usr/local/lib/python3.8/dist-packages/quimb/tensor/optimize.py", line 501, in __call__
g = jac(x)
File "/usr/local/lib/python3.8/dist-packages/scipy/optimize/optimize.py", line 78, in derivative
self._compute_if_needed(x, *args)
File "/usr/local/lib/python3.8/dist-packages/scipy/optimize/optimize.py", line 68, in _compute_if_needed
fg = self.fun(x, *args)
File "/usr/local/lib/python3.8/dist-packages/quimb/tensor/optimize.py", line 846, in vectorized_value_and_grad
ag_result, ag_grads = self.handler.value_and_grad(arrays)
File "/usr/local/lib/python3.8/dist-packages/quimb/tensor/optimize.py", line 337, in value_and_grad
result = self._backend_fn(variables)
File "/usr/local/lib/python3.8/dist-packages/quimb/tensor/optimize.py", line 695, in __call__
return self.loss_fn(self.norm_fn(tn_compute))
File "/home/justin/darthmallocsarchive-svn/trunk/DissertationExperiment/TensorNetworkRL/MERA_Frozen_Lake.py", line 234, in MERA_RL_loss
expectation = action_expect_i(psi, ops, lightcone_tags, bias)[action]
File "/home/justin/darthmallocsarchive-svn/trunk/DissertationExperiment/TensorNetworkRL/RLMera.py", line 559, in action_expect_i
exp_array = ar.do('array', exp_vals)
File "/usr/local/lib/python3.8/dist-packages/autoray/autoray.py", line 84, in do
return get_lib_fn(backend, fn)(*args, **kwargs)
File "/usr/local/lib/python3.8/dist-packages/torch/tensor.py", line 621, in __array__
return self.numpy()
RuntimeError: Can't call numpy() on Tensor that requires grad. Use tensor.detach().numpy() instead.
My optimization function is defined as
def optimize(self, loss_function, bias, in_states, in_actions, targets, opt_points, state_type="type1"):
total_states = len(in_states)
bias_list = bias.tolist()
psi_states = []
for i in range(len(in_states)):
if state_type == "type1":
psi_state = psi_input_basic(in_states[i], self.n_qubits)
else:
psi_state = psi_input_advanced(in_states[i], self.n_qubits)
psi_states.append(psi_state)
action_select = in_actions[i]
self.structure.psi0 = psi_state
total_states = len(in_states)
RLOpt = qtn.optimize.TNOptimizer(self.structure.psi,
loss_function,
loss_constants={"ops" : self.ops,
"action" : action_select,
"targets" : targets,
"dex" : i},
loss_kwargs={"lightcone_tags" : self.lightcone_tags,
"bias" : bias},
tags=opt_points,
loss_target=0,
optimizer='rmsprop',
autodiff_backend="torch", progbar=False, device="cpu")
V_opt = RLOpt.optimize_basinhopping(n=10, nhop=10)
self.structure.update_params_from(V_opt)
while the loss function is defined as
def MERA_RL_loss(psi, ops, lightcone_tags, bias, action, targets, dex):
#lightcone_tags = {where : circ.get_reverse_lightcone_tags(where) for where in ops}
n_targets = len(targets)
#print("Test stage 1")
expectation = action_expect_i(psi, ops, lightcone_tags, bias)[action]
## print("Target Value: ")
## print(targets[dex])
## print("Expectation Values: ")
## print(expectation)
loss = ((expectation - targets[dex]) ** 2) / n_targets
#print("Loss Value: ")
#print(loss)
return loss
and the expectation value calculation function is defined as
def action_expect_i(psi, ops, lightcone_tags, bias):
#print(bias)
taglist = [lightcone_tags[where] for where in ops]
#print("Tag list: " + str(taglist))
kets = [psi.select(tag, 'any') for tag in taglist]
bras = [ket.H for ket in kets]
expects = [(ket.gate(ops[where], where) | bra) for ket, bra, where in zip(kets, bras, ops)]
exp_vals = [do("real", expec.contract(all, optimize="auto-hq")) for expec in expects]
exp_array = ar.do('array', exp_vals)
total = exp_array + bias
exp_sum = total.tolist()
return exp_sum
The key thing is that all numeric/array operations need to be dispatched to the correct backend library (in this case torch
), so that the computation can be traced and auto-diffed. That means using autoray
(if you want to easily switch backends etc) and avoiding builtin structures like list
. Here you probably just need to call:
exp_array = ar.do('stack', exp_vals)
# exp_array = ar.do('array', exp_vals, like=backend_or_example_array) # should also work
return exp_array + bias
What is your issue?
I recently upgraded to pytorch version 1.8.1+cu102, and now I am getting an error when I run the code that you have helped me with before:
I am not sure why, but it looks as though the optimizer is apply pytorch with GPU settings even though the device is set as CPU.