Closed Vayel closed 4 years ago
Automatic tuning of the convergence speed. We don't use the hessian anymore as it was hard to tune, plus there is now a 'warming phase' to automatically tune the gradient descent parameters (convergence speed and tol) based on the data.
import numpy as np
#from scipy import special #REMOVED L.R.
import warnings
def softmax(x): #ADDED L.R.
return np.exp(x)/np.sum(np.exp(x)) #ADDED L.R.
x = X_test['CRIM']
current_mean = x.mean()
target_mean = 12.760392145669
ksi = 0
n_iterations = 1
max_iterations = 100
tol = np.abs(target_mean-current_mean)/100.
while n_iterations < max_iterations:
print("++++++++++++++")
print(n_iterations,ksi)
# Update the sample weights and obtain the new mean of the distribution
lambdas = softmax(ksi * x)
current_mean = np.average(x, weights=lambdas)
# Do a Newton step using the difference between the mean and the
# target mean
grad = current_mean - target_mean
#tune coefMult at the first iteration - we want coefMult to be tuned so that:
# -> sign(mean_iteration1-init_mean)==sign(target_mean-init_mean)
# -> 4*abs(mean_iteration1-init_mean) < abs(target_mean-init_mean) < 16*abs(mean_iteration1-init_mean)
if n_iterations==1:
init_mean=current_mean
Diff_total=target_mean-init_mean
#test a first coefMult
coefMult=1e-5
print("coefMult=",coefMult)
ksi_tmp=-coefMult * grad
lambdas_tmp = softmax(ksi_tmp * x)
mean_iteration1 = np.average(x, weights=lambdas_tmp)
Diff_tmp=mean_iteration1-init_mean
if Diff_total*Diff_tmp<0. or 4*np.abs(Diff_tmp)>Diff_total: #coefMult is too high
while Diff_total*Diff_tmp<0. or 4*np.abs(Diff_tmp)>Diff_total:
coefMult/=2.
print("coefMult=",coefMult)
ksi_tmp=-coefMult * grad
Diff_tmp=np.average(x, weights=softmax(ksi_tmp * x))-init_mean
if 16*np.abs(Diff_tmp)<Diff_total: #coefMult is too low
while 16*np.abs(Diff_tmp)<Diff_total:
coefMult*=2.
print("coefMult=",coefMult)
ksi_tmp=-coefMult * grad
Diff_tmp=np.average(x, weights=softmax(ksi_tmp * x))-init_mean
#update ksi
step = (coefMult * grad)
ksi -= step
print("->",ksi,grad)
#increments n_iterations
n_iterations += 1
# Stop if the gradient is small enough
if abs(grad) < tol:
break
else:
warnings.warn(
message=(
f"Gradient descent failed to converge after {max_iterations} iterations "
+ f"(name={x.name}, mean={current_mean}, target_mean={target_mean}, "
+ f"current_mean={current_mean}, grad={grad}, hess={hess}, step={step}, ksi={ksi})"
),
category=UserWarning,
)
lambdas = special.softmax(ksi * x)
may fail ifx
is too big