markovmodel / PyEMMA

🚂 Python API for Emma's Markov Model Algorithms 🚂
http://pyemma.org
GNU Lesser General Public License v3.0
311 stars 119 forks source link

RuntimeError: requested more output dimensions (10) than dimension of input data (1) #1484

Closed sbhakat closed 3 years ago

sbhakat commented 3 years ago

Hi I am trying to do a VAMP scoring but getting the following error RuntimeError: requested more output dimensions (10) than dimension of input data (1)

My Code looks like:

torsions_feat = pyemma.coordinates.featurizer(topfile)
torsions_feat.add_backbone_torsions(cossin=True, periodic=False)
torsions_feat.describe()[:10]
torsions_data = pyemma.coordinates.source(trajfile, features=torsions_feat, stride=10)
labels = ['bb\ntorsions']

positions_feat = pyemma.coordinates.featurizer(topfile)
#positions_feat.add_selection(positions_feat.select_Backbone())
positions_feat.add_all(atom_indices=bb)
positions_feat.describe()[:10]
positions_data = pyemma.coordinates.source(trajfile, features=positions_feat, stride=10)
labels += ['bb atom\npositions']

distances_feat = pyemma.coordinates.featurizer(topfile)
distances_feat.add_distances(bb, periodic=False)
#distances_feat.pairs(bb, excluded_neighbors=2)
#distances_feat.add_distances_ca(periodic=False, excluded_neighbors=2)
distances_feat.describe()[:10]
distances_data = pyemma.coordinates.source(trajfile, features=distances_feat, stride=10)
labels += ['bb atom\ndistances']

print('number of dimension = ',torsions_data.dimension())
# VAMP scoring
def score_cv(data, dim, lag, number_of_splits=10, validation_fraction=0.5):
    # we temporarily suppress very short-lived progress bars
    with pyemma.util.contexts.settings(show_progress_bars=False):
        nval = int(data.trajectory_length(0) * validation_fraction)
        scores = np.zeros(number_of_splits)
        for n in range(number_of_splits):
            ival = np.random.choice(data.trajectory_length(0), size=nval, replace=False)
            vamp = pyemma.coordinates.vamp(
                [d for i, d in enumerate(data) if i not in ival], lag=lag, dim=dim)
            scores[n] = vamp.score([d for i, d in enumerate(data) if i in ival])
    return scores

dim = 10

fig, axes = plt.subplots(1, 2, figsize=(12, 3), sharey=True)
for ax, lag in zip(axes.flat, [1, 10, 20]):
    torsions_scores = score_cv(torsions_data, lag=lag, dim=dim)
    scores = [torsions_scores.mean()]
    errors = [torsions_scores.std()]
    positions_scores = score_cv(positions_data, lag=lag, dim=dim)
    scores += [positions_scores.mean()]
    errors += [positions_scores.std()]
    #distances_scores = score_cv(distances_data, lag=lag, dim=dim)
    #scores += [distances_scores.mean()]
    #errors += [distances_scores.std()]
    ax.bar(labels, scores, yerr=errors, color=['C0', 'C1'])
    ax.set_title(r'lag time $\tau$={:.1f}Frames'.format(lag))
    if lag == 1:
        # save for later
        vamp_bars_plot = dict(
            labels=labels, scores=scores, errors=errors, dim=dim, lag=lag)
axes[0].set_ylabel('VAMP2 score')
fig.tight_layout()

I have changed len(data) to data.trajectory_length(0) otherwise it was giving an error TypeError: object of type 'FeatureReader' has no len() and I traced back to FeatureReader which does have no len()

Any help?

thempel commented 3 years ago

Your data has 1 dimension, but you are requesting the VAMP-score associated to the first 10 eigenprocesses of the Koopman operator (which in your case is a 1x1 matrix).