jmschrei / tfmodisco-lite

A lite implementation of tfmodisco, a motif discovery algorithm for genomics experiments.
MIT License
56 stars 16 forks source link

Could not construct partition: Cannot accept NaN weights. #12

Open avantikalal opened 1 year ago

avantikalal commented 1 year ago

Hi, using the following command, I get an error:

pos_patterns, neg_patterns = modiscolite.tfmodisco.TFMoDISco(
        hypothetical_contribs=attrs,
        one_hot=inputs,
        max_seqlets_per_metacluster=2000,
        sliding_window_size=20,
        flank_size=5,
        target_seqlet_fdr=0.05,
        n_leiden_runs=2,
    )

The error message is below:

/opt/conda/lib/python3.10/site-packages/modiscolite/affinitymat.py:238: RuntimeWarning: invalid value encountered in true_divide
  (Y_ / np.linalg.norm(Y_)).ravel())
/opt/conda/lib/python3.10/site-packages/modiscolite/affinitymat.py:237: RuntimeWarning: invalid value encountered in true_divide
  scores_ = np.dot((X / np.linalg.norm(X)).ravel(),
---------------------------------------------------------------------------
BaseException                             Traceback (most recent call last)
Cell In[33], line 1
----> 1 pos_patterns, neg_patterns = modiscolite.tfmodisco.TFMoDISco(
      2         hypothetical_contribs=attrs,
      3         one_hot=inputs,
      4         max_seqlets_per_metacluster=2000,
      5         sliding_window_size=20,
      6         flank_size=5,
      7         target_seqlet_fdr=0.05,
      8         n_leiden_runs=2,
      9     )

File /opt/conda/lib/python3.10/site-packages/modiscolite/tfmodisco.py:310, in TFMoDISco(one_hot, hypothetical_contribs, sliding_window_size, flank_size, min_metacluster_size, weak_threshold_for_counting_sign, max_seqlets_per_metacluster, target_seqlet_fdr, min_passing_windows_frac, max_passing_windows_frac, n_leiden_runs, n_leiden_iterations, min_overlap_while_sliding, nearest_neighbors_to_compute, affmat_correlation_threshold, tsne_perplexity, frac_support_to_trim_to, min_num_to_trim_to, trim_to_window_size, initial_flank_to_add, prob_and_pertrack_sim_merge_thresholds, prob_and_pertrack_sim_dealbreaker_thresholds, subcluster_perplexity, merging_max_seqlets_subsample, final_min_cluster_size, min_ic_in_window, min_ic_windowsize, ppm_pseudocount, verbose)
    307     if verbose:
    308         print("Using {} positive seqlets".format(len(pos_seqlets)))
--> 310     pos_patterns = seqlets_to_patterns(seqlets=pos_seqlets,
    311         track_set=track_set, 
    312         track_signs=1,
    313         min_overlap_while_sliding=min_overlap_while_sliding,
    314         nearest_neighbors_to_compute=nearest_neighbors_to_compute,
    315         affmat_correlation_threshold=affmat_correlation_threshold,
    316         tsne_perplexity=tsne_perplexity,
    317         n_leiden_iterations=n_leiden_iterations,
    318         n_leiden_runs=n_leiden_runs,
    319         frac_support_to_trim_to=frac_support_to_trim_to,
    320         min_num_to_trim_to=min_num_to_trim_to,
    321         trim_to_window_size=trim_to_window_size,
    322         initial_flank_to_add=initial_flank_to_add,
    323         prob_and_pertrack_sim_merge_thresholds=prob_and_pertrack_sim_merge_thresholds,
    324         prob_and_pertrack_sim_dealbreaker_thresholds=prob_and_pertrack_sim_dealbreaker_thresholds,
    325         subcluster_perplexity=subcluster_perplexity,
    326         merging_max_seqlets_subsample=merging_max_seqlets_subsample,
    327         final_min_cluster_size=final_min_cluster_size,
    328         min_ic_in_window=min_ic_in_window,
    329         min_ic_windowsize=min_ic_windowsize,
    330         ppm_pseudocount=ppm_pseudocount)
    331 else:
    332     pos_patterns = None

File /opt/conda/lib/python3.10/site-packages/modiscolite/tfmodisco.py:254, in seqlets_to_patterns(***failed resolving arguments***)
    252 #apply subclustering procedure on the final patterns
    253 for patternidx, pattern in enumerate(patterns):
--> 254     pattern.compute_subpatterns(subcluster_perplexity, 
    255         n_seeds=n_leiden_runs, n_iterations=n_leiden_iterations)
    257 return patterns

File /opt/conda/lib/python3.10/site-packages/modiscolite/core.py:153, in SeqletSet.compute_subpatterns(self, perplexity, n_seeds, n_iterations)
    150 sp_density_adapted_affmat /= np.sum(sp_density_adapted_affmat.data)
    152 #Do Leiden clustering
--> 153 self.subclusters = cluster.LeidenCluster(sp_density_adapted_affmat,
    154     n_seeds=n_seeds, n_leiden_iterations=n_iterations) 
    156 #this method assumes all the seqlets have been expanded so they
    157 # all start at 0
    158 subcluster_to_seqletsandalignments = OrderedDict()

File /opt/conda/lib/python3.10/site-packages/modiscolite/cluster.py:22, in LeidenCluster(affinity_mat, n_seeds, n_leiden_iterations)
     19 best_quality = None
     21 for seed in range(1, n_seeds+1):
---> 22     partition = leidenalg.find_partition(
     23         graph=g,
     24         partition_type=leidenalg.ModularityVertexPartition,
     25         weights=affinity_mat.data,
     26         n_iterations=n_leiden_iterations,
     27         initial_membership=None,
     28         seed=seed*100) 
     30     quality = np.array(partition.quality())
     31     membership = np.array(partition.membership)

File /opt/conda/lib/python3.10/site-packages/leidenalg/functions.py:81, in find_partition(graph, partition_type, initial_membership, weights, n_iterations, max_comm_size, seed, **kwargs)
     79 if not weights is None:
     80   kwargs['weights'] = weights
---> 81 partition = partition_type(graph,
     82                            initial_membership=initial_membership,
     83                            **kwargs)
     84 optimiser = Optimiser()
     86 optimiser.max_comm_size = max_comm_size

File /opt/conda/lib/python3.10/site-packages/leidenalg/VertexPartition.py:456, in ModularityVertexPartition.__init__(self, graph, initial_membership, weights)
    452   else:
    453     # Make sure it is a list
    454     weights = list(weights)
--> 456 self._partition = _c_leiden._new_ModularityVertexPartition(pygraph_t,
    457     initial_membership, weights)
    458 self._update_internal_membership()

BaseException: Could not construct partition: Cannot accept NaN weights.

Can you advise on what this means? I'm running modiscolite 2.0.7 in Python 3.10.8. inputs and attrs are both numpy arrays of shape (500, 400, 4) and neither contains NaNs.

jmschrei commented 1 year ago

From those errors, I'm not sure what's happening. I'd need to have access to the underlying data. Are your attributions the full hypothetical contributions?