cyfile / Matlab-miscellanies

各种Matlab代码
0 stars 0 forks source link

Principal Component Analysis (PCA) #3

Open 213cy opened 9 years ago

213cy commented 9 years ago
load cities
% % In total, the cities data set contains three variables:
% % categories, a string matrix containing the names of the indices
% % names, a string matrix containing the 329 city names
% % ratings, the data matrix with 329 rows and 9 columns
boxplot(ratings,'orientation','horizontal','labels',categories)
ratings=ratings(:,1:3);
%%
%以下两句是一样的 都是基于相关系数进行主成分分析
[wcoeff_z,score_z,latent_z,tsquared_z,explained_z] = pca(zscore(ratings));
[COEFF_r,latent_r,explained_r] = pcacov(corr(ratings));
%
[U,S,V_svd] = svd(zscore(ratings));
[V,D]=eig(corr(ratings));%wcoeff_z COEFF_r V V_svd,这四个矩阵都是一样的
diag(D);%次式结果和 latent_z latent_r 相同
100*diag(D)/sum(diag(D));%此式结果和 explained_z explained_r 相同
%%
%以下三句是一样的 针对数据的主成分分析(pca)默认基于协方差进行分析
[wcoeff,score,latent,tsquared,explained] = pca(ratings);
[wcoeff_d,score_d,latent_d,tsquared_d,explained_d] = pca(detrend(ratings,'constant'));
[COEFF_c,latent_c,explained_c] = pcacov(cov(ratings));
%
[U,S,V_svd] = svd(detrend(ratings,'c'));
[V,D]=eig(cov(ratings));%wcoeff wcoeff_d COEFF_c V V_svd,这五个矩阵都是一样的
%注意V有可能是左右对调的fliplr(V)
diag(D);%次式结果和 latent latent_c 相同
100*diag(D)/sum(diag(D));%此式结果和 explained explained_c 相同
%%
w = 1./var(ratings);%这个权重视乎是变量方差贡献的权重?
[wcoeff_w,score_w,latent_w,tsquared_w,explained_w] = pca(ratings,'VariableWeights',w);
coefforth = inv(diag(std(ratings)))*wcoeff_w;
[wcoeff_m,score_m,latent_m,tsquared_m,explained_m] = pca(ratings*sqrt(diag(w)));