XAI-ANITI / ethik

:mag_right: A toolbox for fair and explainable machine learning
https://xai-aniti.github.io/ethik/
GNU General Public License v3.0
53 stars 5 forks source link

Fix overflows in compute_ksis #95

Closed Vayel closed 4 years ago

Vayel commented 5 years ago

lambdas = special.softmax(ksi * x) may fail if x is too big

lrisser commented 5 years ago

Automatic tuning of the convergence speed. We don't use the hessian anymore as it was hard to tune, plus there is now a 'warming phase' to automatically tune the gradient descent parameters (convergence speed and tol) based on the data.


import numpy as np
#from scipy import special  #REMOVED L.R.
import warnings

def softmax(x):                         #ADDED L.R.
    return np.exp(x)/np.sum(np.exp(x))  #ADDED L.R.

x = X_test['CRIM']

current_mean = x.mean()
target_mean = 12.760392145669

ksi = 0
n_iterations = 1
max_iterations = 100
tol = np.abs(target_mean-current_mean)/100.

while n_iterations < max_iterations:
    print("++++++++++++++")
    print(n_iterations,ksi)

    # Update the sample weights and obtain the new mean of the distribution
    lambdas = softmax(ksi * x)

    current_mean = np.average(x, weights=lambdas)

    # Do a Newton step using the difference between the mean and the
    # target mean
    grad = current_mean - target_mean

    #tune coefMult at the first iteration - we want coefMult to be tuned so that:
    #  -> sign(mean_iteration1-init_mean)==sign(target_mean-init_mean)
    #  -> 4*abs(mean_iteration1-init_mean) < abs(target_mean-init_mean) < 16*abs(mean_iteration1-init_mean)
    if n_iterations==1:
      init_mean=current_mean
      Diff_total=target_mean-init_mean 

      #test a first coefMult
      coefMult=1e-5
      print("coefMult=",coefMult)
      ksi_tmp=-coefMult * grad
      lambdas_tmp = softmax(ksi_tmp * x)
      mean_iteration1 = np.average(x, weights=lambdas_tmp)
      Diff_tmp=mean_iteration1-init_mean

      if Diff_total*Diff_tmp<0. or 4*np.abs(Diff_tmp)>Diff_total:  #coefMult is too high
        while Diff_total*Diff_tmp<0. or 4*np.abs(Diff_tmp)>Diff_total:
          coefMult/=2.
          print("coefMult=",coefMult)
          ksi_tmp=-coefMult * grad
          Diff_tmp=np.average(x, weights=softmax(ksi_tmp * x))-init_mean

      if 16*np.abs(Diff_tmp)<Diff_total:  #coefMult is too low
        while 16*np.abs(Diff_tmp)<Diff_total:
          coefMult*=2.
          print("coefMult=",coefMult)
          ksi_tmp=-coefMult * grad
          Diff_tmp=np.average(x, weights=softmax(ksi_tmp * x))-init_mean

    #update ksi
    step = (coefMult * grad)
    ksi -= step

    print("->",ksi,grad)

    #increments n_iterations
    n_iterations += 1

    # Stop if the gradient is small enough
    if abs(grad) < tol:
        break

else:

    warnings.warn(
        message=(
            f"Gradient descent failed to converge after {max_iterations} iterations "
            + f"(name={x.name}, mean={current_mean}, target_mean={target_mean}, "
            + f"current_mean={current_mean}, grad={grad}, hess={hess}, step={step}, ksi={ksi})"
        ),
        category=UserWarning,
    )