Statistical quality evaluation of dimensionality reduction algorithms
29
stars
2
forks
source link
PCA Initialisation Error: ValueError: Cannot generate PCA initialization because input data, `X`, is an affinity matrix, not a samples x features matrix. #14
When trying to run EMBEDR.fit() with DRA_params:{'initialization':'pca'}, this error is returned:
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Input In [23], in <cell line: 2>() 2 for alg,dat in DR_params: #for alg, param in DR_params: 3 # ## If we're doing t-SNE, then we use theperplexityparameter... 4 # if alg.lower() == 'tsne': (...) 11 12 ## Initialize a new object at each loop. 13 embObj = EMBEDR(DRA=alg, 14 perplexity=30, 15 n_jobs=n_jobs, (...) 22 project_name=f'{alg}_{dat}', 23 project_dir=project_dir) ---> 25 embObj.fit(input_datasets[dat]) ## Usefit` to generate the embeddings.
27 embObjs[(alg, dat)] = embObj
File ~\Miniconda3\envs\DRaim2\lib\site-packages\EMBEDR\embedr.py:280, in EMBEDR.fit(self, X)
277 self._validate_with_data(X)
279 ## Finally, we can do the computations!
--> 280 self._fit(null_fit=False)
282 #####################
283 ## Fit to the NULL ##
284 #####################
286 self._fit(null_fit=True)
File ~\Miniconda3\envs\DRaim2\lib\site-packages\EMBEDR\embedr.py:532, in EMBEDR._fit(self, null_fit)
530 ## We then need to get the requested embeddings.
531 if (self.DRA in ['tsne', 't-sne']):
--> 532 dY, dEES = self.get_tSNE_embedding(X=self.data_X,
533 kNN_graph=self.data_kNN,
534 aff_mat=self.data_P)
536 elif (self.DRA in ['umap']):
537 dY, dEES = self.get_UMAP_embedding(X=self.data_X,
538 kNN_graph=self.data_kNN,
539 aff_mat=self.data_P)
File ~\Miniconda3\envs\DRaim2\lib\site-packages\EMBEDR\tsne.py:200, in tSNE_Embed.fit(self, X, aff_kwargs)
196 if self.verbose >= 1:
197 print(f"\nGenerating {self.n_components}-dimensional embedding"
198 f" with t-SNE!")
--> 200 P = self.initialize_embedding(X, aff_kwargs)
202 ## Optimize Early Exaggeration Phase
203 try:
File ~\Miniconda3\envs\DRaim2\lib\site-packages\EMBEDR\tsne.py:327, in tSNE_Embed.initialize_embedding(self, X, **aff_kwargs)
325 err_str += f", X, is an affinity matrix, not a samples x"
326 err_str += f" features matrix."
--> 327 raise ValueError(err_str)
329 ## If we wanted a spectral initialization, do that here.
330 elif self.initialization == 'spectral':
ValueError: Cannot generate PCA initialization because input data, X, is an affinity matrix, not a samples x features matrix.`
I have been trying to work through the code, but I am unclear on why initialisation of the t-SNE embeddings swaps from taking the input data to only taking the affinity matrix, and therefore producing this error.
When trying to run EMBEDR.fit() with DRA_params:{'initialization':'pca'}, this error is returned:
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Input In [23], in <cell line: 2>() 2 for alg,dat in DR_params: #for alg, param in DR_params: 3 # ## If we're doing t-SNE, then we use the
perplexityparameter... 4 # if alg.lower() == 'tsne': (...) 11 12 ## Initialize a new object at each loop. 13 embObj = EMBEDR(DRA=alg, 14 perplexity=30, 15 n_jobs=n_jobs, (...) 22 project_name=f'{alg}_{dat}', 23 project_dir=project_dir) ---> 25 embObj.fit(input_datasets[dat]) ## Use
fit` to generate the embeddings. 27 embObjs[(alg, dat)] = embObjFile ~\Miniconda3\envs\DRaim2\lib\site-packages\EMBEDR\embedr.py:280, in EMBEDR.fit(self, X) 277 self._validate_with_data(X) 279 ## Finally, we can do the computations! --> 280 self._fit(null_fit=False) 282 ##################### 283 ## Fit to the NULL ## 284 ##################### 286 self._fit(null_fit=True)
File ~\Miniconda3\envs\DRaim2\lib\site-packages\EMBEDR\embedr.py:532, in EMBEDR._fit(self, null_fit) 530 ## We then need to get the requested embeddings. 531 if (self.DRA in ['tsne', 't-sne']): --> 532 dY, dEES = self.get_tSNE_embedding(X=self.data_X, 533 kNN_graph=self.data_kNN, 534 aff_mat=self.data_P) 536 elif (self.DRA in ['umap']): 537 dY, dEES = self.get_UMAP_embedding(X=self.data_X, 538 kNN_graph=self.data_kNN, 539 aff_mat=self.data_P)
File ~\Miniconda3\envs\DRaim2\lib\site-packages\EMBEDR\embedr.py:1208, in EMBEDR.get_tSNE_embedding(self, X, kNN_graph, aff_mat, null_fit, return_tSNE_objects) 1198 seed_offset = n_embeds_made + ii 1200 embObj = tSNE_Embed(n_components=self.n_components, 1201 perplexity=self.perplexity, 1202 n_jobs=self.n_jobs, 1203 random_state=self._seed + seed_offset, 1204 verbose=self.verbose, 1205 **self.DRA_params) -> 1208 embObj.fit(aff_mat) 1210 tmp_embed_arr[ii] = embObj.embedding[:] 1212 ## Calculate the EES
File ~\Miniconda3\envs\DRaim2\lib\site-packages\EMBEDR\tsne.py:200, in tSNE_Embed.fit(self, X, aff_kwargs) 196 if self.verbose >= 1: 197 print(f"\nGenerating {self.n_components}-dimensional embedding" 198 f" with t-SNE!") --> 200 P = self.initialize_embedding(X, aff_kwargs) 202 ## Optimize Early Exaggeration Phase 203 try:
File ~\Miniconda3\envs\DRaim2\lib\site-packages\EMBEDR\tsne.py:327, in tSNE_Embed.initialize_embedding(self, X, **aff_kwargs) 325 err_str += f",
X
, is an affinity matrix, not a samples x" 326 err_str += f" features matrix." --> 327 raise ValueError(err_str) 329 ## If we wanted a spectral initialization, do that here. 330 elif self.initialization == 'spectral':ValueError: Cannot generate PCA initialization because input data,
X
, is an affinity matrix, not a samples x features matrix.`I have been trying to work through the code, but I am unclear on why initialisation of the t-SNE embeddings swaps from taking the input data to only taking the affinity matrix, and therefore producing this error.
Please help!!