1. 原理
blog.csdn.net/u010376788/…
Python 代码:
def compute_pca(X, n_components=2):
"""
Input:
X: of dimension (m,n) where each row corresponds to an observation, and each column is a feature (variable)
n_components: Number of components you want to keep.
Output:
X_reduced: data transformed in reduced dimension
"""
X_demeaned = X - np.mean(X,axis=0)
covariance_matrix = np.cov(X_demeaned, rowvar=False)
eigen_vals, eigen_vecs = np.linalg.eigh(covariance_matrix, UPLO='L')
idx_sorted = np.argsort(eigen_vals)
idx_sorted_decreasing = idx_sorted[::-1]
eigen_vals_sorted = eigen_vals[idx_sorted_decreasing]
eigen_vecs_sorted = eigen_vecs[:,idx_sorted_decreasing]
eigen_vecs_subset = eigen_vecs_sorted[:,0:n_components]
X_reduced = np.dot(eigen_vecs_subset.transpose(),X_demeaned.transpose()).transpose()
return X_reduced
X = np.random.rand(3, 10)
print(X)
X_reduced = compute_pca(X, n_components=2)
print("Your original matrix was " + str(X.shape) + " and it became:")
print(X_reduced)