dattalab / keypoint-moseq

https://keypoint-moseq.readthedocs.io
Other
68 stars 28 forks source link

Syllable Statistics Graphing fails with only one group #54

Closed calebweinreb closed 1 year ago

calebweinreb commented 1 year ago

When only one group is present, the following error occurs. Probably it would be better to generate a warning explaining why multiple groups are needed for this step.

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[33], line 18
     15 # significance threshold
     16 thresh=0.05
---> 18 kpms.plot_syll_stats_with_sem(stats_df, progress_paths, plot_sig = plot_sig, thresh=thresh, stat=stat, ordering=ordering, 
     19                          groups=groups, ctrl_group=ctrl_group, exp_group=exp_group, join=join, figsize=(10, 5))                          

File ~/Dropbox (HMS)/caleb weinreb/PROJECTS/MOUSE_SURVEILANCE/22_3_1_keypoint_MOSEQ/official_pipelines/keypoint-moseq/keypoint_moseq/analysis.py:936, in plot_syll_stats_with_sem(stats_df, progress_paths, plot_sig, thresh, stat, ordering, groups, ctrl_group, exp_group, colors, join, figsize)
    933 sig_sylls = None
    934 if plot_sig:
    935     # run kruskal wallis and dunn's test
--> 936     _, _, sig_pairs = run_kruskal(stats_df, statistic=stat, thresh=thresh)
    937     # plot significant syllables for control and experimental group
    938     if ctrl_group is not None and exp_group is not None:
    939         # check if the group pair is in the sig pairs dict

File ~/Dropbox (HMS)/caleb weinreb/PROJECTS/MOUSE_SURVEILANCE/22_3_1_keypoint_MOSEQ/official_pipelines/keypoint-moseq/keypoint_moseq/analysis.py:716, in run_kruskal(stats_df, statistic, n_perm, seed, thresh, mc_method)
    713 N_m, N_s = syllable_data.shape
    715 # Run KW and return H-stats
--> 716 h_all, real_ranks, X_ties = run_manual_KW_test(
    717     df_usage=df_only_stats,
    718     merged_usages_all=syllable_data,
    719     num_groups=num_groups,
    720     n_per_group=n_per_group,
    721     cum_group_idx=cum_group_idx,
    722     n_perm=n_perm,
    723     seed=seed,
    724 )
    726 # find the real k_real
    727 df_k_real = pd.DataFrame(
    728     [
    729         stats.kruskal(
   (...)
    733     ]
    734 )

File ~/Dropbox (HMS)/caleb weinreb/PROJECTS/MOUSE_SURVEILANCE/22_3_1_keypoint_MOSEQ/official_pipelines/keypoint-moseq/keypoint_moseq/analysis.py:545, in run_manual_KW_test(df_usage, merged_usages_all, num_groups, n_per_group, cum_group_idx, n_perm, seed)
    543 p_i = np.random.randint(n_perm)
    544 s_i = np.random.randint(N_s)
--> 545 kr = stats.kruskal(
    546     *np.array_split(
    547         merged_usages_all[perm[p_i, :], s_i], np.cumsum(n_per_group[:-1])
    548     )
    549 )
    550 assert (kr.statistic == h_all[p_i, s_i]) & (
    551     kr.pvalue == p_vals[p_i, s_i]
    552 ), "manual KW is incorrect"
    554 return h_all, real_ranks, X_ties

File ~/miniconda3/envs/keypoint_moseq/lib/python3.9/site-packages/scipy/stats/_axis_nan_policy.py:502, in _axis_nan_policy_factory.<locals>.axis_nan_policy_decorator.<locals>.axis_nan_policy_wrapper(***failed resolving arguments***)
    500 if sentinel:
    501     samples = _remove_sentinel(samples, paired, sentinel)
--> 502 res = hypotest_fun_out(*samples, **kwds)
    503 res = result_to_tuple(res)
    504 res = _add_reduced_axes(res, reduced_axes, keepdims)

File ~/miniconda3/envs/keypoint_moseq/lib/python3.9/site-packages/scipy/stats/_stats_py.py:8584, in kruskal(nan_policy, *samples)
   8582 num_groups = len(samples)
   8583 if num_groups < 2:
-> 8584     raise ValueError("Need at least two groups in stats.kruskal()")
   8586 for sample in samples:
   8587     if sample.size == 0:

ValueError: Need at least two groups in stats.kruskal()
versey-sherry commented 1 year ago

handled here: 399af019ac852b34ce342b811de3df999f34a6ba