Closed markroxor closed 6 years ago
import torch as ch
from sklearn.decomposition import PCA
import numpy as np
n = 5
n_dim = 3
X = ch.zeros(n, n_dim)
m = 5
c = 2
d = 5
for i in range(n):
X[i,0] = i
X[i,1] = m*i + c
X[i,2] = (d - 2*X[i,0] - 3*X[i,1])/2
X = X.numpy()
Xtrain = np.zeros([n, n_dim])
m = 5
c = 2
d = 5
for i in range(n):
Xtrain[i,0] = i
Xtrain[i,1] = m*i + c
Xtrain[i,2] = (d - 2*Xtrain[i,0] - 3*Xtrain[i,1])/2
print(X, type(X))
>>> (array([[ 0. , 2. , -0.5],
[ 1. , 7. , -9. ],
[ 2. , 12. , -17.5],
[ 3. , 17. , -26. ],
[ 4. , 22. , -34.5]], dtype=float32), <type 'numpy.ndarray'>)
print(Xtrain, type(Xtrain))
>>> (array([[ 0. , 2. , -0.5],
[ 1. , 7. , -9. ],
[ 2. , 12. , -17.5],
[ 3. , 17. , -26. ],
[ 4. , 22. , -34.5]]), <type 'numpy.ndarray'>)
print(np.allclose(Xtrain, X))
>>> True
pca_x = PCA(n_components=3)
pca_x.fit(X)
pca_xtrain = PCA(n_components=3)
pca_xtrain.fit(Xtrain)
print(pca_x.components_)
>>> [[-1.0088666e-01 -5.0443327e-01 8.5753661e-01]
[-5.8938619e-02 8.6345071e-01 5.0097811e-01]
[-9.9315059e-01 -7.4505806e-09 -1.1684125e-01]]
print(pca_xtrain.components_)
>>> [[-0.10088665 -0.50443327 0.85753656]
[-0.82430398 -0.44025326 -0.35594945]
[-0.55708601 0.74278135 0.37139068]]
possibly because the dtype is float64
and float32
I think something like X = ch.zeros((n, n_dim), , dtype=torch.float64)
should solve my issue. Unfortunately I couldn't find it in the documentation as it is still a unstable build feature.
Fix available in pytorch v0.4.
Xtrain = ch.Tensor(Xtrain)
Xtrain = ch.from_numpy(Xtrain)
Xtrain = Xtrain.numpy()
pca = PCA(n_components=3) pca.fit(Xtrain) U, S, VT = np.linalg.svd(Xtrain - Xtrain.mean(0)) print(VT)
print(pca.components_)
Xtrain = ch.Tensor(Xtrain)
Xtrain = ch.from_numpy(Xtrain) Xtrain = Xtrain.numpy()
pca = PCA(n_components=3) pca.fit(Xtrain) U, S, VT = np.linalg.svd(Xtrain - Xtrain.mean(0)) print(VT)