Open 213cy opened 9 years ago
load cities % % In total, the cities data set contains three variables: % % categories, a string matrix containing the names of the indices % % names, a string matrix containing the 329 city names % % ratings, the data matrix with 329 rows and 9 columns boxplot(ratings,'orientation','horizontal','labels',categories) ratings=ratings(:,1:3); %% %以下两句是一样的 都是基于相关系数进行主成分分析 [wcoeff_z,score_z,latent_z,tsquared_z,explained_z] = pca(zscore(ratings)); [COEFF_r,latent_r,explained_r] = pcacov(corr(ratings)); % [U,S,V_svd] = svd(zscore(ratings)); [V,D]=eig(corr(ratings));%wcoeff_z COEFF_r V V_svd,这四个矩阵都是一样的 diag(D);%次式结果和 latent_z latent_r 相同 100*diag(D)/sum(diag(D));%此式结果和 explained_z explained_r 相同 %% %以下三句是一样的 针对数据的主成分分析(pca)默认基于协方差进行分析 [wcoeff,score,latent,tsquared,explained] = pca(ratings); [wcoeff_d,score_d,latent_d,tsquared_d,explained_d] = pca(detrend(ratings,'constant')); [COEFF_c,latent_c,explained_c] = pcacov(cov(ratings)); % [U,S,V_svd] = svd(detrend(ratings,'c')); [V,D]=eig(cov(ratings));%wcoeff wcoeff_d COEFF_c V V_svd,这五个矩阵都是一样的 %注意V有可能是左右对调的fliplr(V) diag(D);%次式结果和 latent latent_c 相同 100*diag(D)/sum(diag(D));%此式结果和 explained explained_c 相同 %% w = 1./var(ratings);%这个权重视乎是变量方差贡献的权重? [wcoeff_w,score_w,latent_w,tsquared_w,explained_w] = pca(ratings,'VariableWeights',w); coefforth = inv(diag(std(ratings)))*wcoeff_w; [wcoeff_m,score_m,latent_m,tsquared_m,explained_m] = pca(ratings*sqrt(diag(w)));