Objective function using numpy argsort

aldebaransearch commented 4 years ago

Dear Pygmo Community

I have experimented a bit using pygmo for financial portfolio optimization in the context of risk and return measured relative to a benchmark.

With regards to measuring risk, many methods can be used. The usual is minimizing the variance of the portfolio, but I have a preference for minimizing tail risk.

The "hello world" example is to find the portfolio that minimizes risk relative to the benchmark, given that weights are constraint to the [0, 1] range and must sum to 1. It is easy to realize that the solution is a portfolio with weights equal to the benchmark weights.

I have implemented a class I thought should be capable of solving the problem for both of the above mentioned methods for defining risk. However, only the "minimize variance" gives an appropriate result. Minimizing tail risk seems not to change x_init values that I start the optimization from at all.

I suspect, that a problem might be that the objective function contains a call to np.argsort. I have previously had success implementing this with Matlabs fmincon, so I know there should be a way through. But I am unsure how I should attack the problem using pygmo. What would be the right algorithm the use, what would be appropriate parameters for the chosen algo, etc. I have found inspiration in tutorials.

Below you'll find a self containing piece of code that replicates my problem. First call to the utility function "run_problem" uses the "minimize variance (cov)" objective. The resulting portfolio equals the benchmark as desired. Second call to the function "run_problem" seems to not alter the initial value x at all.

If any one has any idea to how I can get around my problem, I would be very gratefull!

Thank you very much in advance!

(I am sorry for the below layout (FIXED BY @darioizzo). Cannot figure out the appropriate way to share a notebook here when submitting an issue, hence you can also find the notebook in the attached zip-folder: opt_test_pygmo_2.zip )

import pygmo as pg
import numpy as np
import time
import matplotlib.pyplot as plt 

from numba import jit, float64, int32

class optimize_risk_return:

    def __init__(self, cov, market, weight_bm, objective='cov'):

        self.cov = cov
        self.market = market
        self.weight_bm = weight_bm
        self.n = len(self.weight_bm)
        self.objective = objective

        assert objective in ['cov','es'], 'Input "objective" can only be "cov" or "es".'


    def get_nic(self):

        return 0

    def get_nec(self):

        return 1


    def get_bounds(self):

        return ([0]*self.n,[1]*self.n)

    def fitness(self, x):

        if self.objective == 'cov':
            return optimize_risk_return._fitness_std_cov(x,self.cov,self.weight_bm)
        else:
            return optimize_risk_return._fitness_es_market(x,self.market,self.weight_bm,1000)



    @jit(float64[:](float64[:],float64[:,:],float64[:]),nopython=True)
    def _fitness_std_cov(x,cov,bm_w):

        ret_val = np.zeros((2,),dtype=float64)
        w = x-bm_w
        ret_val[0] = w.T@cov@w*1000
        ret_val[1] = np.sum(x)-1

        return ret_val

    @jit(float64[:](float64[:],float64[:,:],float64[:], int32),nopython=True) # , parallel=True
    def _fitness_es_market(x,market,bm_w,n_fractile):

        ret_val = np.zeros((2,),dtype=float64)
        w = x-bm_w
        psi = market@w
        indx = np.argsort(psi)
        ret_val[0] = -1*np.mean(psi[indx[:n_fractile]])
        ret_val[1] = np.sum(x)-1
        return ret_val

    def gradient(self, x):

        if self.objective == 'cov':
            return optimize_risk_return._gradient_std_cov(x,self.cov,self.weight_bm)
        else:
            return optimize_risk_return._gradient_es_market(x,self.market,self.weight_bm,1000)

    @jit(float64[:](float64[:],float64[:,:],float64[:]),nopython=True) # , parallel=True
    def _gradient_std_cov(x,cov,bm_w):

        w = x-bm_w
        return np.concatenate((w.T@cov*1000,np.ones((len(w),),dtype=float64)))

    @jit(float64[:](float64[:],float64[:,:],float64[:],int32),nopython=True) # , parallel=True
    def _gradient_es_market(x,market,bm_w, n_fractile):

        w = x-bm_w
        psi = market@w
        indx = np.argsort(psi)
        tmp = -1*market[indx[:n_fractile],:]*w
        ret_val = np.empty(tmp.shape[1])
        for i in range(len(ret_val)):
            ret_val[i] = np.mean(tmp[:, i])

        return np.concatenate((ret_val,np.ones((len(w),),dtype=float64)))

weight_bm = np.array([0.11582448, 0.35305939, 0.34733299, 0.10375922, 0.08002392])
cov_mat = np.array([[2.87275736e-04, 6.72493473e-05, 1.68465649e-04, 4.18551925e-04, 1.19171347e-04],
                       [6.72493473e-05, 3.20281710e-05, 5.42226697e-05, 1.17381173e-04, 4.30776541e-05],
                       [1.68465649e-04, 5.42226697e-05, 2.01671669e-04, 2.66489778e-04, 9.81955361e-05],
                       [4.18551925e-04, 1.17381173e-04, 2.66489778e-04, 1.08587282e-03, 3.10847907e-04],
                       [1.19171347e-04, 4.30776541e-05, 9.81955361e-05, 3.10847907e-04, 1.72575569e-04]])

market = np.random.multivariate_normal(np.zeros((5,)),cov_mat,20000)

def run_problem(opt_obj):

    prob = pg.problem(opt_obj)
    print(prob)
    uda = pg.nlopt('auglag')
    uda.ftol_rel = 1e-12
    algo = pg.algorithm(uda = uda)
    algo.extract(pg.nlopt).local_optimizer = pg.nlopt('var2')
    algo.extract(pg.nlopt).local_optimizer.ftol_rel = 1e-20

    algo.set_verbosity(100) # in this case this correspond to logs each 200 objevals
    n = 100
    pop = pg.population(prob = prob, size = n)
    pop.problem.c_tol = [1E-12]

    for i in range(n):
        pop.set_x(i,np.ones((5,))/5)

    start_time = time.time()
    pop = algo.evolve(pop)
    print(time.time()-start_time)

    print('Fevals: {0}'.format(pop.problem.get_fevals()) )
    print('Gevals: {0}'.format(pop.problem.get_gevals()) )

    best_x = pop.get_x()[pop.best_idx()]

    print('Sum of weights (should be 1) = {0}'.format(np.sum(best_x)))

    plt.scatter(best_x,opt_obj.weight_bm,s=50)
    plt.scatter(opt_obj.weight_bm,opt_obj.weight_bm,s=25)
    plt.legend(['Actual Result','Desired Result'])

opt_obj = optimize_risk_return(cov_mat, market,weight_bm,'cov')
run_problem(opt_obj)

opt_obj = optimize_risk_return(cov_mat, market,weight_bm,'es')
run_problem(opt_obj)

darioizzo commented 4 years ago

Its not clear to me how you can provide a gradient if the fitness has an argsort method call ... are you sure the gradient and hessians are correct in that case?

aldebaransearch commented 4 years ago

Thanks a lot for your prompt reply, @darioizzo!

Yes, I know that the provided gradient is at least a good approximation in this particular case.

I have also tried the pygmo utility function for calculating gradients. The outcome was the same. When I have compared the actual values coming out of my supplied gradient with output from the pygmo utility function, numbers have been somewhat comparable. I did this check to see if I had implemented something completely nonsense.

Nevertheless, I am certain the problem is on my side and that I do not know the right handles to turn in pygmo. At least this is the part of the problem I feel most uncertain about.

Kind regards

darioizzo commented 4 years ago

Another thing you may want to look into is the c_tol. If, as you report, in the failing case the initial x is not altered at all, it means that the final solution found by the optimizer is violating the constraint while the initial poin was not (probably). Maybe c_tol = 1e-12 is too much? Try to relax it to 1e-5 for example

aldebaransearch commented 4 years ago

Ahh, ok.

Yes, indeed both the tolerance on the constraint and the fitness is very, very small. I chose these low values in an attempt to force the optimizer to do some work, since I interpreted the non-altered x (that I received the x_init as the result), as if pygmo for some reason thought it was where it should be. But your answer has definitely shed some more light and helped my understand!

Can I also read from your answer (at least between the lines) that any x_init should obey constraints? Sometimes such an x_init would be hard for me to come up with and I would rely on an optimizer to find a point that actually satisfies constraints.

Changing all tolerances back to what I would usually accept as good enough deviations from the "perfect" solution (in terms of fitness and constraint tolerance) unfortunately did not help me out.

Now I also tried to use the gradient from the "minimize variance" case in the "minimize tail risk" case. When the variable I call "market" in the code above is sampled from a Gaussian distribution, the "minimize tail risk" gradient is actually exactly the same as the "minimize variance", except for a scaling. I can see the gradient works for the "minimize variance" case, but using the code from the "minimize variance"/covariance approach times the appropriate scaling factor, still does change anything. So my problem can not only be a matter of the some how strange gradient definition using np.argsort.

I'll play around with it some more and if I find a solution I'll make sure to post it here.

Btw, thanks a lot @darioizzo, for fixing my bad formatting of code in the initial post above and for using your time on this in general so far!

Best regards

darioizzo commented 4 years ago

When pygmo compares two solutions x1 and x2, in order to decide which one is better, he 1) counts the number of violated constraints 2) considers the norm of the constraint violation 3) checks the objective function

In your case, when x_init is not violating constraint, after the call to the optimizer (via an evolve method) pygmo needs to decide if the solution returned by the optimizer is better than x_init. So, if its violating even by 1e-13 one of the constraints it will decide that x_init is better.

When x_init is also violating the constraints, then it will be the norm of the constraint violation to count (whihc in your case is good).

aldebaransearch commented 4 years ago

Thanks a lot, @darioizzo - it makes a lot of sense and makes it a lot easier for me finding out what goes on.

For what it is worth, I have now found a version that also arrives at the "right" solution for the "minimize tail risk" case. A minor reformulation of the fitness function made it work (with some scaling as well) in combination with using the utility function for calculating gradients. I will now try to fix my gradient the same way as the fitness function to get acceptable speed.

Thanks for all your help!!!

class optimize_risk_return:

    def __init__(self, cov, market, weight_bm, objective='cov', fractile = 0.95):

        self.cov = cov
        self.market = market
        self.weight_bm = weight_bm
        self.n = len(self.weight_bm)
        self.objective = objective

        self.fractile = fractile
        length = market.shape[0]
        threshold = np.ceil((1 - self.fractile) * length)
        self._es_spectrum = np.zeros(length)
        self._es_spectrum[:int(threshold)] = 1
        self._es_spectrum /= np.sum(self._es_spectrum)

        assert objective in ['cov','es'], 'Input "objective" can only be "cov" or "es".'

    def get_nic(self):

        return 0

    def get_nec(self):

        return 1

    def get_bounds(self):

        return ([0]*self.n,[1]*self.n)

    def fitness(self, x):

        if self.objective == 'cov':
            return optimize_risk_return._fitness_std_cov(x,self.cov,self.weight_bm)
        else:
            return optimize_risk_return._fitness_es_market(x,self.market,self.weight_bm,self._es_spectrum)

    @jit(float64[:](float64[:],float64[:,:],float64[:]),nopython=True)
    def _fitness_std_cov(x,cov,bm_w):

        ret_val = np.zeros((2,),dtype=float64)
        w = x-bm_w     
        ret_val[0] = w.T@cov@w*1000
        ret_val[1] = np.sum(x)-1

        return ret_val

    @jit(float64[:](float64[:],float64[:,:],float64[:], float64[:]),nopython=True) # , parallel=True
    def _fitness_es_market(x,market,bm_w,spectrum):

        ret_val = np.zeros((2,),dtype=float64)
        w = x-bm_w
        psi = market@w
        indx = np.argsort(psi)
        ret_val[0] = -1000000*psi[indx]@spectrum
        ret_val[1] = np.sum(x)-1
        return ret_val

    def gradient(self, x):

        if self.objective == 'cov':
            return optimize_risk_return._gradient_std_cov(x,self.cov,self.weight_bm)
        else:
            return pg.estimate_gradient_h(lambda x: opt_obj.fitness(x), x)

    @jit(float64[:](float64[:],float64[:,:],float64[:]),nopython=True) # , parallel=True
    def _gradient_std_cov(x,cov,bm_w):

        w = x-bm_w
        return np.concatenate((w.T@cov*1000,np.ones((len(w),),dtype=float64)))

esa / pygmo2

Objective function using numpy argsort #33