I want to feed pytorch gradients manually. In my real problem, I have my own adjoint function that does not use tensors. Is there any way I can define my own gradient function for pytorch to use during optimization?
import numpy as np
import torch
# define rosenbrock function and gradient
x0 = np.array([0.1, 0.1])
a = 1
b = 5
def f(x):
return (a - x[0]) ** 2 + b * (x[1] - x[0] ** 2) ** 2
def jac(x):
dx1 = -2 * a + 4 * b * x[0] ** 3 - 4 * b * x[0] * x[1] + 2 * x[0]
dx2 = 2 * b * (x[1] - x[0] ** 2)
return np.array([dx1, dx2])
# create stochastic rosenbrock function and gradient
# (the crude analogy is that I have predefined stochastic
# forward and backward functions)
def f_rand(x):
return f(x) * np.random.uniform(0.5, 1.5)
def jac_rand(x): return jac(x) * np.random.uniform(0.5, 1.5)
x_tensor = torch.tensor(x0, requires_grad=False)
optimizer = torch.optim.Adam([x_tensor], lr=0.1)
# here, closure is fed f_rand to compute the gradient.
# I need to feed closer the gradient directly from jac_rand
def closure():
optimizer.zero_grad()
loss = f_rand(x_tensor)
loss.backward() # jac_rand(x)
return loss
for ii in range(200):
optimizer.step(closure)
print(x_tensor, f(x_tensor))
# tensor([1.0000, 1.0000], dtype=torch.float64, requires_grad=True) tensor(4.5799e-09, dtype=torch.float64, grad_fn=<AddBackward0>)
# ( this is the right answer, E[f(1, 1)] = 0 )
I've tried defining a custom function, but I can't get it to work. This is my best attempt so far:
import numpy as np
import torch
# define rosenbrock function and gradient
x0 = np.array([0.1, 0.1])
a = 1
b = 5
def f(x):
return (a - x[0]) ** 2 + b * (x[1] - x[0] ** 2) ** 2
def jac(x):
dx1 = -2 * a + 4 * b * x[0] ** 3 - 4 * b * x[0] * x[1] + 2 * x[0]
dx2 = 2 * b * (x[1] - x[0] ** 2)
return np.array([dx1, dx2])
# create stochastic rosenbrock function and gradient
def f_rand(x):
return f(x) * np.random.uniform(0.5, 1.5)
def jac_rand(x): return jac(x) * np.random.uniform(0.5, 1.5)
class custom_function(torch.autograd.Function):
@staticmethod
def forward(ctx, input):
ctx.save_for_backward(input)
return f_rand(input)
@staticmethod
def backward(ctx, grad_output):
input, = ctx.saved_tensors
return grad_output * g_rand(input)
x_tensor = torch.tensor(x0, requires_grad=False)
optimizer = torch.optim.Adam([x_tensor], lr=0.1)
for ii in range(200):
print('x_tensor ', x_tensor)
optimizer.step(custom_function())
print(x_tensor, f(x_tensor))
It says:
RuntimeError: Legacy autograd function with non-static forward method is deprecated. Please use new-style autograd function with static forward method. (Example: https://pytorch.org/docs/stable/autograd.html#torch.autograd.Function)
adam. (in my real problem, I'd actually like to use this LBFGS approach: github.com/hjmshi/PyTorch-LBFGS)torch.autograd.Functionx_tensor.grad = jac_rand(x_tensor)?assigned grad expected to be a Tensor or None but got grad of typenumpy.ndarray.