rs-station / careless

Merge X-ray diffraction data with Wilson's priors, variational inference, and metadata
MIT License
16 stars 6 forks source link

Frame Correlation Coefficients #123

Closed kmdalton closed 1 month ago

kmdalton commented 1 year ago

I have found it useful to compute the correlation coefficients between iobs and ipred on a per frame basis. Careless should either put these info into the predictions files directly or offer a command line program to calculate and plot them. This is an example of a script I've been using this week:

from pylab import *
from glob import glob
import reciprocalspaceship as rs

file_dict = {
    0  : 'merge/dj1_predictions_0.mtz',
    3  : 'merge/dj1_predictions_1.mtz',
    5  : 'merge/dj1_predictions_2.mtz',
    10 : 'merge/dj1_predictions_3.mtz',
    20 : 'merge/dj1_predictions_4.mtz',
    30 : 'merge/dj1_predictions_5.mtz',
}
files = list(file_dict.values())

print(files)

ds = rs.concat([rs.read_mtz(i) for i in files], check_isomorphous=False)
ds['BATCH'] = ds['image_id'] - ds.groupby("asu_id").transform("min")["image_id"] + 1
method = 'Pearson'
ykey = f"Correlation Coefficient ({method})"
g = ds.groupby(["file_id", "BATCH"], as_index=False)

def ccfunc(df):
    x = df.Iobs.to_numpy('float32')
    y = df.Ipred.to_numpy('float32')
    w = np.reciprocal(
        np.square(df.SigIpred) + np.square(df.SigIobs)
    ).to_numpy('float32')
    return rs.utils.weighted_pearsonr(x, y, w)

data = g.apply(
    lambda x: rs.DataSeries(
        {ykey : ccfunc(x)}
    )
)
data['file_id'] = data['file_id'].astype('int')
data['BATCH'] = data['BATCH'].astype('int')
data['File'] = np.array(files)[data['file_id']]
timekey = 'Time (s)'
data[timekey] = np.array(list(file_dict.keys()))[data['file_id']]
data.to_csv('image_cc.csv')

import seaborn as sns
sns.lineplot(
    data=data,
    x = 'BATCH',
    y = ykey,
    hue = timekey,
    palette = 'Dark2',
    marker='.',
    linestyle='none',
)
plt.savefig("image_cc_plot.png", dpi=300)

plt.figure()
sns.histplot(
    data=data, 
    x=ykey, 
    hue=timekey,
    palette='Dark2'
)
plt.savefig("image_cc_hist.png", dpi=300)

plt.show()
kmdalton commented 1 month ago

this had been done since #129