Attachment 'sheet11.m'
Download 1 function sheet11_solution
2
3 %
4 % Generate some data
5 %
6 [X, Y] = generate_data(50); % training data
7 [XE, YE] = generate_data(1000); % test data
8
9 figure(1);
10 plot(X, Y, 'k+', 'LineWidth', 3, 'MarkerSize', 10);
11
12 Xbase = linspace(-15, 15, 100)';
13
14 % Plot the fit for a number of weights
15 widths = [0.1, 1, 10];
16 STYLES = {'r-', 'b-', 'g-'};
17 LEGENDS = cell(1, length(widths)+1);
18 LEGENDS{1} = 'data';
19 hold on;
20 for wi = 1:length(widths)
21 w = widths(wi);
22 C = learn(w, Xbase, X, Y);
23 YEh = predict(C, XE);
24 plot(XE, YEh, STYLES{wi});
25 LEGENDS{wi+1} = sprintf('width = %.1f', w);
26 end
27 legend(LEGENDS)
28 hold off;
29
30 % in the following, we will use these
31 % candidates for the width
32 widths = logspace(-2, 2, 50);
33
34 % collect all the different errors
35 TRAIN_ERR = zeros(1, length(widths));
36 TEST_ERR = zeros(1, length(widths));
37 DOF = zeros(1, length(widths));
38 CV5_ERR = zeros(5, length(widths));
39 CV10_ERR = zeros(10, length(widths));
40 CP_ERR = zeros(1, length(widths));
41 for wi = 1:length(widths)
42 w = widths(wi);
43 C = learn(w, Xbase, X, Y);
44 Yh = predict(C, X);
45 TRAIN_ERR(wi) = l2err(Y, Yh);
46 YEh = predict(C, XE);
47 TEST_ERR(wi) = l2err(YE, YEh);
48
49 [DOF(wi), CP_ERR(wi)] = cp_statistic(w, Xbase, X, Y);
50 CV5_ERR(:,wi) = cv(5, w, Xbase, X, Y);
51 CV10_ERR(:,wi) = cv(10, w, Xbase, X, Y);
52 end
53
54 figure(2)
55 semilogx(widths, TRAIN_ERR, 'r.-', ...
56 widths, TEST_ERR, 'b.-', ...
57 widths, CP_ERR, 'g.-');
58 legend('train', 'test');
59 grid
60
61 figure(3)
62 semilogx(widths, DOF)
63 title('degrees-of-freedom');
64
65 % generate data from the sinc function
66 function [X, Y] = generate_data(N)
67 X = sort(30*rand(N,1) - 15);
68 Y = sin(X)./X + 0.1*randn(N,1);
69
70 % learn a function comprised of Gaussian bumps
71 % at certain base points
72 function C = learn(width, Xbase, X, Y)
73 PSI = design_matrix(width, Xbase, X);
74 C.W = (PSI*PSI' + 0.1 * eye(length(Xbase)))\(PSI*Y);
75 C.width = width;
76 C.Xbase = Xbase;
77
78 % compute pairwise distances between the rows of X and Y.
79 function D = pwdist(X, Y)
80 D = size(X, 2);
81 N = size(X, 1);
82 M = size(Y, 1);
83
84 XX = sum(X.*X, 2);
85 YY = sum(Y.*Y, 2);
86 D = repmat(XX, 1, M) + repmat(YY', N, 1) - 2*X*Y';
87
88 % Set up the design matrix for a basis function of
89 % Gaussian bumps
90 function PSI = design_matrix(width, Xbase, X)
91 D = pwdist(Xbase, X);
92 PSI = exp(-D/width/width);
93
94 % Predict values for the learned parameters
95 function Yh = predict(C, X)
96 Yh = design_matrix(C.width, C.Xbase, X)'*C.W;
97
98 % compute the l2-error
99 function l = l2err(Y1, Y2)
100 l = mean((Y1 - Y2).^2);
101
102 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
103 %
104 % Your solution below
105
106 % 1. Compute the CP statistics. This includes:
107 %
108 % - computing the training error
109 % - computing the trace of the hat matrix
110 % - estimating the noise level
111 %
112 % Return the degrees-of-freedom and the C_p statistic.
113 function [DOF, CP] = cp_statistic(w, Xbase, X, Y)
114 DOF = 1; % placeholders to make the script run
115 CP = 0;
116 % ...
117
118 % 2. Estimate the cross-validation error. Return all the individual test
119 % errors as a column vector.
120 function ERR = cv(K, w, Xbase, X, Y)
121 ERR = zeros(K, 1); % placeholder to make the script run
122 % ...
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.