Closed waynezw0618 closed 5 years ago
Hi, from this error information
File "testGEPPY_DNS.py", line 71, in evaluate Rp_i=np.array([[Yp[i,1],Yp[i,2],Yp[i,3]],[Yp[i,2],Yp[i,4],Yp[i,5]],[Yp[i,3],Yp[i,5],Yp[i,:6]]]) IndexError: too many indices for array
It seems your Yp is a 1d array here, right? Yp is generated by the following evaluation:
func = toolbox.compile(individual)
Yp = np.array(list(map(func,T1,T2,T3))) # predictions with the GEP model
How did you define your individual? If the output of your individual is just a scalar, then of course Yp is a 1d array. I guess you would expect a 1d array output from each individual. Please check your linker function if there are multiple genes in one individual.
Hi thanks for replying soon
the individual is defined normally as
toolbox.register('individual', creator.Individual, gene_gen=toolbox.gene_gen, n_genes=n_genes, linker=operator.add)
...
n_genes = 2
toolbox.register('individual', creator.Individual, gene_gen=toolbox.gene_gen, n_genes=n_genes, linker=operator.add)
seems the out put is not always a scalar. it is just during the iteration. sometimes it goes to zero.
my input data are imported from a text file
I_train = np.genfromtxt('lamda_train.dat') T_train = np.genfromtxt('T_train.dat').reshape([-1, 3, 6]) bij = np.genfromtxt('bij_train.dat') I1=I_train[:,0] I2=I_train[:,1] I3=I_train[:,2] T1=T_train[:,0,:] T2=T_train[:,1,:] T3=T_train[:,2,:] size=np.shape(I1)[0]
so shape(I1-I3)=(size,1) shape(T1-T3)=(size,6) (as it is a symmetrical tensor).
I actually only modified some part of tutorial. `import geppy as gep import geppy as gep from deap import creator, base, tools import numpy as np import random
s = 10 random.seed(s) np.random.seed(s)
LINEAR_SCALING = False
I_train = np.genfromtxt('lamda_train.dat') T_train = np.genfromtxt('T_train.dat').reshape([-1, 3, 6]) bij = np.genfromtxt('bij_train.dat') I1=I_train[:,0] I2=I_train[:,1] I3=I_train[:,2] T1=T_train[:,0,:] T2=T_train[:,1,:] T3=T_train[:,2,:] size=np.shape(I1)[0]
def protected_div(a, b): if np.isscalar(b): if abs(b) < 1e-6: b = 1 else: b[abs(b) < 1e-6] = 1 return a / b
import operator
pset = gep.PrimitiveSet('Main', input_names=['I1','I2','I3','T1','T2','T3']) pset.add_function(operator.add, 2) pset.add_function(operator.sub, 2) pset.add_function(operator.mul, 2) pset.add_function(protected_div, 2) pset.add_ephemeral_terminal(name='enc', gen=lambda: random.uniform(-5, 5)) # each ENC is a random integer within [-10, 10]
from deap import creator, base, tools
creator.create("FitnessMin", base.Fitness, weights=(-1,)) # to minimize the objective (fitness) creator.create("Individual", gep.Chromosome, fitness=creator.FitnessMin, a=float, b=float)
h = 7 # head length n_genes = 2 # number of genes in a chromosome
toolbox = gep.Toolbox() toolbox.register('gene_gen', gep.Gene, pset=pset, head_length=h) toolbox.register('individual', creator.Individual, gene_gen=toolbox.gene_gen, n_genes=n_genes, linker=operator.add) toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register('compile', gep.compile_, pset=pset)
def evaluate(individual): """Evalute the fitness of an individual: MSE (mean squared error)""" func = toolbox.compile(individual) yp = np.array(list(map(func,I1,I2,I3,T1,T2,T3))) # predictions with the GEP model a=0 b=0 c=0 for i in range(size): Yp=np.zeros(6)
if np.shape(yp)!=(size,6):
# print(yp,np.shape(yp[i]))
Yp=yp[i]*np.array([1,1,1,0,0,0])
else:
Yp=yp[i]
#print(Yp)
#print("------------")
#print(np.shape(yp[i]))
#print("*************")
Ri=np.array([[bij[i,0],bij[i,1],bij[i,2]],
[bij[i,1],bij[i,3],bij[i,4]],
[bij[i,2],bij[i,4],bij[i,5]]])
Rp_i=np.array([[Yp[0],Yp[1],Yp[2]],
[Yp[1],Yp[3],Yp[4]],
[Yp[2],Yp[4],Yp[5]]])
a=a+np.tensordot(Rp_i,Ri)
b=a+np.tensordot(Ri,Ri.T)
c=c+np.tensordot(Rp_i,Rp_i.T)
#print(a,b,c)
#print (a,b,c,a/(b*c))
return a/(b*c),
def evaluate_linear_scaling(individual): """Evaluate the fitness of an individual with linearly scaled MSE. Get a and b by minimizing (a*Yp + b - Y)""" func = toolbox.compile(individual) Yp = np.array(list(map(func,I1,I2,I3,T1,T2,T3))) # predictions with the GEP model
# special cases: (1) individual has only a terminal
# (2) individual returns the same value for all test cases, like 'x - x + 10'. np.linalg.lstsq will fail in such cases.
if isinstance(Yp, np.ndarray):
Q = np.hstack((np.reshape(Yp, (-1, 1)), np.ones((len(Yp), 1))))
(individual.a, individual.b), residuals, _, _ = np.linalg.lstsq(Q, bij, rcond=None)
# residuals is the sum of squared errors
if residuals.size > 0:
return residuals[0] / len(Y), # MSE
# for the above special cases, the optimal linear scaling is just the mean of true target values
individual.a = 0
individual.b = np.mean(Y)
return np.mean((Y - individual.b) ** 2),
if LINEAR_SCALING: toolbox.register('evaluate', evaluate_linear_scaling) else: toolbox.register('evaluate', evaluate)
toolbox.register('select', tools.selTournament, tournsize=3)
toolbox.register('mut_uniform', gep.mutate_uniform, pset=pset, ind_pb=0.05, pb=1) toolbox.register('mut_invert', gep.invert, pb=0.1) toolbox.register('mut_is_transpose', gep.is_transpose, pb=0.1) toolbox.register('mut_ris_transpose', gep.ris_transpose, pb=0.1) toolbox.register('mut_gene_transpose', gep.gene_transpose, pb=0.1) toolbox.register('cx_1p', gep.crossover_one_point, pb=0.4) toolbox.register('cx_2p', gep.crossover_two_point, pb=0.2) toolbox.register('cx_gene', gep.crossover_gene, pb=0.1) toolbox.register('mut_ephemeral', gep.mutate_uniform_ephemeral, ind_pb='1p') # 1p: expected one point mutation in an individual toolbox.pbs['mut_ephemeral'] = 1 # we can also give the probability via the pbs property
stats = tools.Statistics(key=lambda ind: ind.fitness.values[0]) stats.register("avg", np.mean) stats.register("std", np.std) stats.register("min", np.min) stats.register("max", np.max)
n_pop = 10 n_gen = 200
pop = toolbox.population(n=n_pop) hof = tools.HallOfFame(3) # only record the best three individuals ever found in all generations
pop, log = gep.gep_simple(pop, toolbox, n_generations=n_gen, n_elites=1, stats=stats, hall_of_fame=hof, verbose=True)
print(hof[0])
for i in range(3): ind = hof[i] symplified_model = gep.simplify(ind) if LINEAR_SCALING: symplified_model = ind.a * symplified_model + ind.b print('Symplified best individual {}: '.format(i)) print(symplified_model)
rename_labels = {'add': '+', 'sub': '-', 'mul': '*', 'protected_div': '/'}
best_ind = hof[0]
gep.export_expression_tree(best_ind, rename_labels, 'data/numerical_expression_tree.png')
from IPython.display import Image Image(filename='data/numerical_expression_tree.png')`
(1) Do you always require your individual output to be a vector? If not, your Yp
would be a 1d array and Rp_i=np.array([[Yp[i,1],Yp[i,2],Yp[i,3]],[Yp[i,2],Yp[i,4],Yp[i,5]],[Yp[i,3],Yp[i,5],Yp[i,:6]]])
will not work.
(2) From
n_genes = 2
toolbox.register('individual', creator.Individual, gene_gen=toolbox.gene_gen, n_genes=n_genes, linker=operator.add)
, it seems your linker just adds up the two genes' outputs. If the genes output a vector (or a list), your final output of the individual would be a vector as well. Otherwise, it may be a scalar, and we come back to (1).
(3) Note that the expression tree in GEP is evolved randomly and you must consider all possible cases to make your output legal. Your primitive set contains four operators. The combination of these operators and your input may finally generate a scalar. You should handle these special cases in your linker
or your evaluate
functions.
Hi Yes I am using GEP to predict a tensor Bij as a function of tensors T1,T2,... and scalars I1,I2,I3, so I think individual output to be a tensor(vector). One thing I am not very sure is that since there are scalars in the input as you mentioned in (3), it could be possible to generator scalar in the solution. I would like to know how to play with this situation, any suggestion?
Let's take linker
as an example.
You can define your own linking function instead of the operator.add
. Since you have two genes in one chromosome, your linker should accept two arguments, say a
and b
. In your custom linker function, check whether a
(b
) is a vector or a scalar and do some proper pre-processing. Just guarantee that the final output of your custom linker is always a 1d vector. (This idea is similar to what has been done in protected_div
.)
Do I have to have two gene?
You can have any number of genes as you like. I just gave an example.
ok,Again, Could I calculate couples functions Gi, so that the inputs are scalar I1,I2,I3..., So that Gi is G1(I1,I2,I3,..) and out puts are sum(Gi*Ti) so I dont need to runs into a function of tensor?
Hello Again @ShuhuaGao @bolz213
I am trying geppy to fit N 3X3 tensor Bij (it is actually a symmetrical tensor, so I wrote it like a vector of size 6)with two scalar list I1,I2 of size N and two 3X3 tensor list V1,V2 with fitness defined as tensorDot(Bij, PBij)/(tensordot(Bij,Bji)*tensordot(PBij, PBji)). but I get errors in my evaluation function like this: [[ATraceback (most recent call last): File "testGEPPY_DNS.py", line 109, in stats=stats, hall_of_fame=hof, verbose=True) File "/Users/weizhang/software/backup/geppy-master/geppy/algorithms/basic.py", line 100, in gep_simple for ind, fit in zip(invalid_individuals, fitnesses): File "testGEPPY_DNS.py", line 71, in evaluate Rp_i=np.array([[Yp[i,1],Yp[i,2],Yp[i,3]],[Yp[i,2],Yp[i,4],Yp[i,5]],[Yp[i,3],Yp[i,5],Yp[i,:6]]]) IndexError: too many indices for array
here is my evaluation function: `def evaluate(individual): """Evalute the fitness of an individual: MSE (mean squared error)""" func = toolbox.compile(individual) Yp = np.array(list(map(func,T1,T2,T3))) # predictions with the GEP model
print (np.shape(Yp),Yp)
a=0 b=0 c=0 for i in range(size): Ri=np.array([[bij[i,1],bij[i,2],bij[i,3]],[bij[i,2],bij[i,4],bij[i,5]],[bij[i,3],bij[i,5],bij[i,6]]]) Rp_i=np.array([[Yp[i,1],Yp[i,2],Yp[i,3]],[Yp[i,2],Yp[i,4],Yp[i,5]],[Yp[i,3],Yp[i,5],Yp[i,6]]])
print (Ri,np.shape(Ri))
a=a+np.tensordot(Rp_i,Ri) b=a+np.tensordot(Ri,Ri.T) c=c+np.tensordot(Rp_i,Rp_i.T)
return a/(b*c),`
I tried to print the PB, seems give me a array of(N,1) rather than expected (N,3,3) can you please give me some suggestion