In [1]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
def set_ax_properties(ax, title, proj_type="ortho"):
ax.set_title(title)
ax.set_proj_type(proj_type)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_zlim(zlim)
ax.set_xticks([])
ax.set_yticks([])
ax.set_zticks([])
np.random.seed(777)
n = 500
# ellipse-shaped sphere
A = np.array(
[
[0.5, 0.0, 0.0],
[1.3, 3.0, 0.0],
[0.0, 0.0, 1.5],
]
)
X_ = np.random.rand(n, 3)
X = X_ @ A.T
mu = X.mean(axis=0, keepdims=True)
# centralize X to have zero mean
X = X - mu
assert np.allclose(X.mean(axis=0), 0.0)
# calculate covariance matrix of X
# covariance_matrix = (1 / X.shape[0]) * X.T @ X
covariance_matrix = np.cov(X, rowvar=False, ddof=False)
eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)
# sort eigenvectors in descending
argsort = eigenvalues.argsort()[::-1]
eigenvalues = eigenvalues[argsort]
eigenvectors = eigenvectors.T[argsort]
xlim = (-1.5, 1.5)
ylim = (-1.5, 1.5)
zlim = (-1.5, 1.5)
# visualization
fig = plt.figure(figsize=(25, 10))
######################## ax1: raw data
ax1 = fig.add_subplot(1, 3, 1, projection="3d")
ax1.scatter(X[:, 0], X[:, 1], X[:, 2], s=10, alpha=0.6, color="gray")
set_ax_properties(ax1, title="Centralized Data")
######################## ax1: raw data
######################## ax2: eigenvectors
ax2 = fig.add_subplot(1, 3, 2, projection="3d")
ax2.scatter(X[:, 0], X[:, 1], X[:, 2], s=10, alpha=0.3, color="gray")
# visualize eigenvectors
colors = ["red", "green", "blue"]
for i in range(3):
projection = X @ eigenvectors[i]
arrow = eigenvectors[i] * projection.max()
ax2.quiver(
0,
0,
0,
arrow[0],
arrow[1],
arrow[2],
arrow_length_ratio=0.1 / np.linalg.norm(arrow),
linewidth=2,
color=colors[i],
label=f"PC {i}",
)
ax2.legend()
set_ax_properties(ax2, title="Eigenvectors of Covariance Matrix")
######################## ax2: eigenvectors
######################## ax3: projection
ax3 = fig.add_subplot(1, 3, 3, projection="3d")
pc1_scaled = eigenvectors[0] * 2.2
pc2_scaled = eigenvectors[1] * 2.2
# visualize 2D plane
plane = np.array(
[
-pc1_scaled - pc2_scaled,
pc1_scaled - pc2_scaled,
pc1_scaled + pc2_scaled,
-pc1_scaled + pc2_scaled,
-pc1_scaled - pc2_scaled
]
)
# projecting 3D X onto a 2D plane
projected_2d = X @ eigenvectors[:2].T
projected_3d = projected_2d @ eigenvectors[:2]
ax3.plot(plane[:, 0], plane[:, 1], plane[:, 2], linewidth=1, alpha=0.2, color="black", label="Plane")
ax3.add_collection3d(Poly3DCollection([plane], alpha=0.3, facecolor="white"))
ax3.scatter(X[:, 0], X[:, 1], X[:, 2], s=10, alpha=0.3, color="gray")
ax3.scatter(
projected_3d[:, 0],
projected_3d[:, 1],
projected_3d[:, 2],
s=10,
alpha=0.5,
color="purple",
label="Projected"
)
# visualize 2 principal components
for i in range(2):
projection = X @ eigenvectors[i]
arrow = eigenvectors[i] * projection.max()
ax3.quiver(
0,
0,
0,
arrow[0],
arrow[1],
arrow[2],
arrow_length_ratio=0.1 / np.linalg.norm(arrow),
linewidth=2,
color=colors[i],
label=f"PC {i}",
)
ax3.legend()
set_ax_properties(ax3, title="Projection on Principal Component Plane")
######################## ax3: projection
plt.tight_layout()
plt.show()