function sheet05_02
% Principal Component Analysis
% simple data set
X = randn(500, 2) * diag([1, 2]) * rotmat(0.3*pi);
X = X + repmat([3, -4], 500, 1);
plot(X(:,1), X(:,2), '.');
axis equal
grid
% first PCA without centering
figure(1)
p = pca(X, 0)
plot_pca(X, p);
title('PCA without centering')
% now, with proper centering
figure(2)
p = pca(X)
plot_pca(X, p);
title('PCA with centering')
% and now, we add further, far away point
figure(3)
X = [X; -50, -50];
p = pca(X);
plot_pca(X, p);
title('PCA is not very stable with respect to outliers')
% how about other data?
figure(4)
phi = rand(500, 1) * 4 * pi;
X = [phi, cos(phi) + (phi.^3)/1e2] + 0.5*randn(500, 2);
p = pca(X);
plot_pca(X, p);
function R = rotmat(phi)
R = [cos(phi), sin(phi); -sin(phi), cos(phi)];
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Fill in your solutions below
% 1. Compute the principal components. If the second argument is given and
% non-zero, properly center the data. Otherwise, don't center. Return a
% struct with fields "U", "D", and "M" with
%
% U is the matrix whose columns are the principal components
% D is a vector with the variances
% M is the mean vector
function result = pca(X, center)
if nargin == 1
center = 1;
end
% ...
result.U = U;
result.D = D;
result.M = M;
% 2. Plot the principal components for the PCA. First, plot the data as
% points. Then, draw an ellipsis whose radii are given as 2*sqrt(D) such
% that the ellipsis contains the data more or less. Add a grid and make sure
% axes are sized equally.
function plot_pca(X, p)
% ...