Closed arthurfl closed 2 years ago
Hi! The way I'd approach it is to subclass the gaussian kernel so that whenever it gets called you can exp the lengthscale. It would look somewhat like this. Note this is untested, but you should be able to specify an arbitrary transformation in the constructor.
class LogGaussianKernel(falkon.kernels.GaussianKernel):
def __init__(self, sigma, opt=None):
super(LogGaussianKernel, self).__init__(sigma, opt)
self.transform = lambda s: torch.exp(s)
def keops_mmv_impl(self, X1, X2, v, kernel, out, opt):
formula = 'Exp(SqDist(x1 / g, x2 / g) * IntInv(-2)) * v'
aliases = [
'x1 = Vi(%d)' % (X1.shape[1]),
'x2 = Vj(%d)' % (X2.shape[1]),
'v = Vj(%d)' % (v.shape[1]),
'g = Pm(%d)' % (self.sigma.shape[0])
]
other_vars = [self.transform(self.sigma.to(device=X1.device, dtype=X1.dtype))]
return self.keops_mmv(X1, X2, v, out, formula, aliases, other_vars, opt)
@property
def diff_params(self):
param_dict = dict(self.named_parameters())
param_dict['sigma'] = self.transform(param_dict['sigma'])
return param_dict
def detach(self):
return LogGaussianKernel(self.sigma.detach(), opt=self.params)
def __repr__(self):
return f"LogGaussianKernel(sigma={self.sigma})"
def __str__(self):
return f"LogGaussian kernel<{self.sigma}>"
Thanks a lot! I'm giving it a try. For now it seems that the bandwidths are not getting updated by the optimizer, do you have any idea of what could be causing this behavior?
Sorry, that was my bad. That's apparently not the right place to put the transform. The following seems to work
class LogGaussianKernel(falkon.kernels.GaussianKernel):
def __init__(self, sigma, opt=None):
super(LogGaussianKernel, self).__init__(sigma, opt)
self.transform = lambda s: torch.exp(s)
def keops_mmv_impl(self, X1, X2, v, kernel, out, opt):
formula = 'Exp(SqDist(x1 / g, x2 / g) * IntInv(-2)) * v'
aliases = [
'x1 = Vi(%d)' % (X1.shape[1]),
'x2 = Vj(%d)' % (X2.shape[1]),
'v = Vj(%d)' % (v.shape[1]),
'g = Pm(%d)' % (self.sigma.shape[0])
]
other_vars = [self.transform(self.sigma.to(device=X1.device, dtype=X1.dtype))]
return self.keops_mmv(X1, X2, v, out, formula, aliases, other_vars, opt)
def compute_diff(self, X1: torch.Tensor, X2: torch.Tensor, diag: bool):
d_params = self.diff_params
d_params['sigma'] = self.transform(d_params['sigma'])
return self.core_fn(X1, X2, out=None, diag=diag, **d_params, **self._other_params)
def compute(self, X1: torch.Tensor, X2: torch.Tensor, out: torch.Tensor, diag: bool):
d_params = self.diff_params
d_params['sigma'] = self.transform(d_params['sigma'])
return self.core_fn(X1, X2, out, **d_params, diag=diag, **self._other_params)
def detach(self):
return LogGaussianKernel(self.sigma.detach(), opt=self.params)
def __repr__(self):
return f"LogGaussianKernel(sigma={self.sigma})"
def __str__(self):
return f"LogGaussian kernel<{self.sigma}>"
so the transform is done in the compute
methods, and we let diff_params
return the leaf variables for autograd.
Thanks a lot, I can confirm that it's working as intended!
Hi,
I'm currently using the automatic hyperparameter optimization features, and would like to know if the kernel bandwidths can be optimized on a log scale rather than a linear scale.
e.g. outside of the opt_he features, I can pass log-scaled bandwidths to a kernel class in the following way
This way, if I want to update the bandwidths, I can operate on the exponents. Can I do something similar when using as a central object a
falkon.hopt.objectives
along with a torch optimizer?