Imposing balance on unbalanced designs

Yes, below is some example code (below) for imposing balance. The get_random_balanced function should be called a number of times, to yield a number of random datasets, to ensure that the results do not qualitatively change.

from itertools import product
import numpy as np
import matplotlib.pyplot as plt
import spm1d
from spm1d import rft1d

def get_random_balanced(y, A, B):
    # find combinations of the A and B factor levels:
    uA      = np.unique(A)
    uB      = np.unique(B)
    combs   = list(  product( uA , uB )  )  # combinations of all A & B levels
    # find minimum number of observations across combinations:
    n       = [((A==a) & (B==b)).sum()  for a,b in combs]
    nmin    = min( n )  # minimum group size
    # select random group subsets:
    yb      = []       # holder for balanced observations
    Ab,Bb   = [],[]    # holders for balanced factor vectors
    for a,b in combs:
        yy  = y[(A==a) & (B==b)]
        nn  = yy.shape[0]  # group size
        if nn != nmin:
            ind = np.random.permutation( nn )[:nmin]
            yy  = yy[ind]
        yb.append( yy )
        Ab += [a]*nmin
        Bb += [b]*nmin
    yb      = np.vstack( yb )
    Ab      = np.array( Ab )
    Bb      = np.array( Bb )
    return yb, Ab, Bb

# create an unbalanced dataset:
np.random.seed(0)
y    = rft1d.randn1d(38, 101, 20)
a    = [0]*5 + [1]*8 + [2]*6
A    = np.array( [a]*2 ).flatten()
B    = np.array( [0]*19 + [1]*19 )

# impose balance:
yb,Ab,Bb = get_random_balanced(y, A, B)

# conduct ANOVA:
alpha        = 0.05
# FF           = spm1d.stats.anova2(y, A, B, equal_var=True)  # this will produce a balance error
FF           = spm1d.stats.anova2(yb, Ab, Bb, equal_var=True)
FFi          = FF.inference(alpha)
print( FFi )

# plot results:
plt.close('all')
FFi.plot(plot_threshold_label=True, plot_p_values=True)
plt.show()

0todd0000 / spm1d

Imposing balance on unbalanced designs #195