Open mortonjt opened 3 years ago
I already have both methods implemented, see code snippet below -- it'll be just a matter of refactoring and merging in.
from scipy import stats
from skbio.stats.composition import ilr
from statsmodels.regression.linear_model import OLS
def regression_test(Bxy, Bxyr, iAr):
# Bxy : n x 1
# Bxyr : n x 1
# Bxy : n x r
intr = np.ones((len(Bxy), 1))
Bxy = Bxy.reshape(-1, 1)
Bxyr = Bxyr.reshape(-1, 1)
X = np.hstack((intr, Bxyr, iAr))
Y = Bxy.reshape(-1, 1)
return OLS(Y.ravel(), X)
def proportionality(table, pairs, references):
""" Adapted from `Linear Association in Compositional Data Analysis`
This is the Regression test from 5.2.
Parameters
----------
table : biom.Table
Table of abundances
pairs : list of tuple of str
List of pairs to test
references : list of str
Reference Features
Return
------
tests : list of OLS results
"""
# Extract reference features
Ar = np.vstack([table.data(r, axis='observation') for r in references]).T
Ar[Ar==0] = 0.65
gAr = np.mean(np.log(Ar), axis=1)
lrA = ilr(Ar)
r, s = 2, len(references)
results = []
for i, (x, y) in enumerate(pairs):
print(i)
Ax, Ay = table.data(x, axis='observation'), table.data(y, axis='observation')
idx = np.logical_and(Ax > 0, Ay > 0)
Ax, Ay = np.log(Ax[idx]), np.log(Ay[idx])
n = np.sum(idx)
df = n - 2
# Compute balances
Bxyr = np.sqrt( r * s / (r + s)) * (0.5 * (Ax + Ay) - gAr[idx])
Bxy = Ax - Ay
try:
res = regression_test(Bxy, Bxyr, lrA[idx])
fit = res.fit()
summ = fit.summary()
fval = float(summ.tables[0].data[2][3])
pval = float(summ.tables[0].data[3][3])
results.append((x, y, fval, pval))
except:
continue
return results
We will want to add some support for proportionality