Attachment 'sheet08.m'

Download

   1 function sheet08
   2 
   3 % Generate data
   4 centers = [0, 0; 7, 3; -2, 4; 0, 10; -5, -5];
   5 % (2): You centers here
   6 % centers = [...]
   7 X = generate_data(centers, 50);
   8 
   9 MAXK = 8; % maximal number of clusters
  10 ITERS = 10; % how many restarts.
  11 
  12 % compute (in-)stability for k-means and a random clustering
  13 DKMEANS = evaluate_clusterer(@k_means_clustering, X, MAXK, ITERS);
  14 DRAND = evaluate_clusterer(@random_clustering, X, MAXK, ITERS);
  15 
  16 % plot the two stability values
  17 figure(1)
  18 errorbar(2:8, mean(DKMEANS), std(DKMEANS));
  19 hold on
  20 errorbar(2:8, mean(DRAND), std(DRAND), 'r');
  21 hold off
  22 legend('k-means stability', 'random clusterer')
  23 
  24 % normalize the stability by the random clusterer
  25 D = normalize(DKMEANS, DRAND);
  26 
  27 figure(2)
  28 errorbar(2:8, mean(D), std(D));
  29 title('normalized k-means')
  30 
  31 % show clustering for most stable number of clusters
  32 [dummy, OPTK] = min(mean(D)); OPTK = OPTK + 1;
  33 
  34 Y = k_means_clustering(OPTK, X);
  35 figure(3);
  36 plot_clustering(X, Y);
  37 
  38 % generate data from some centers
  39 function X = generate_data(C, N)
  40 X = [];
  41 for I = 1:size(C, 1)
  42   X = [ X ; randn(N, 2) + repmat(C(I, :), N, 1) ];
  43 end
  44 
  45 % A variant of the k-means clustering algorithm which just iterates 100
  46 % times.
  47 function Y = k_means_clustering(K, X)
  48 [N, D] = size(X);
  49 
  50 % randomly select K points as centers
  51 P = randperm(N);
  52 MEANS = X(P(1:K), :);
  53 
  54 % set up variables
  55 for I = 1:100
  56   % compute nearest neighbor assignments
  57   dist = pwdist(MEANS, X);
  58   [dummy, Y] = min(dist);
  59   
  60   % compute new means
  61   for J = 1:K
  62     NJ = sum(Y == J);
  63     if NJ > 0
  64       MEANS(J, :) = sum(X(find(Y == J), :))/NJ;
  65     end
  66   end
  67 end
  68 
  69 % Plot the clustering and the centers.
  70 function plot_clustering(X, Y)
  71 gscatter(X(:, 1), X(:, 2), Y);
  72 
  73 % Compute all pairwise distances quickly.
  74 function D = pwdist(X, Y)
  75 D = size(X, 2);
  76 N = size(X, 1);
  77 M = size(Y, 1);
  78   
  79 XX = sum(X.*X, 2);
  80 YY = sum(Y.*Y, 2);
  81 D = repmat(XX, 1, M) + repmat(YY', N, 1) - 2*X*Y';
  82 
  83 % evaluate a clustering algorithm for clusters 2:MAXK
  84 % for ITERS restarts.
  85 function D = evaluate_clusterer(method, X, MAXK, ITERS)
  86 D = zeros(ITERS, MAXK-1);
  87 for K = 2:MAXK
  88   fprintf('K = %d\n', K);
  89   for I = 1:ITERS
  90     Y1 = feval(method, K, X);
  91     Y2 = feval(method, K, X);
  92     D(I, K-1) = cocluster_distance(Y1, Y2);
  93   end
  94 end
  95 
  96 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  97 % Fill in your solutions below
  98 %
  99 
 100 % 1 (a). The random clusterer. It should return random labels from 1..K
 101 % for each data point in X (stored in the rows.
 102 function Y = random_clustering(K, X)
 103 % ...
 104 
 105 % 1 (b). Compute the co-cluster distance for two labelings Y1 and Y2.
 106 function D = cocluster_distance(Y1, Y2)
 107 % ...
 108 
 109 % 1 (c). Normalize DKMEANS by dividing the result for a fixed
 110 % K by the mean value of the corrsponding values in DRAND.
 111 function D = normalize(DKMEANS, DRAND)
 112 % ...

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.
  • [get | view] (2009-03-12 14:03:18, 318.2 KB) [[attachment:BayesianDecisionTheory.pdf]]
  • [get | view] (2009-03-12 14:03:18, 247.3 KB) [[attachment:crossvalidation.pdf]]
  • [get | view] (2009-03-12 14:03:18, 1740.8 KB) [[attachment:mlintro.pdf]]
  • [get | view] (2009-03-12 14:03:18, 665.3 KB) [[attachment:probtheo.pdf]]
  • [get | view] (2009-03-12 14:03:18, 52.7 KB) [[attachment:sheet01.pdf]]
  • [get | view] (2009-03-12 14:03:18, 61.7 KB) [[attachment:sheet02.pdf]]
  • [get | view] (2009-03-12 14:03:18, 65.8 KB) [[attachment:sheet03.pdf]]
  • [get | view] (2009-03-12 14:03:18, 2.1 KB) [[attachment:sheet04.m]]
  • [get | view] (2009-03-12 14:03:18, 80.3 KB) [[attachment:sheet04.pdf]]
  • [get | view] (2009-03-12 14:03:18, 106.7 KB) [[attachment:sheet05.pdf]]
  • [get | view] (2009-03-12 14:03:18, 1.6 KB) [[attachment:sheet05_01.m]]
  • [get | view] (2009-03-12 14:03:18, 1.6 KB) [[attachment:sheet05_02.m]]
  • [get | view] (2009-03-12 14:03:18, 0.8 KB) [[attachment:sheet06.m]]
  • [get | view] (2009-03-12 14:03:18, 93.2 KB) [[attachment:sheet06.pdf]]
  • [get | view] (2009-03-12 14:03:18, 1.4 KB) [[attachment:sheet07.m]]
  • [get | view] (2009-03-12 14:03:18, 100.6 KB) [[attachment:sheet07.pdf]]
  • [get | view] (2009-03-12 14:03:18, 2.7 KB) [[attachment:sheet08.m]]
  • [get | view] (2009-03-12 14:03:18, 85.4 KB) [[attachment:sheet08.pdf]]
  • [get | view] (2009-03-12 14:03:18, 45.2 KB) [[attachment:sheet09.pdf]]
  • [get | view] (2009-03-12 14:03:18, 1.5 KB) [[attachment:sheet10.m]]
  • [get | view] (2009-03-12 14:03:18, 59.4 KB) [[attachment:sheet10.pdf]]
  • [get | view] (2009-03-12 14:03:18, 3.0 KB) [[attachment:sheet11.m]]
  • [get | view] (2009-03-12 14:03:18, 68.6 KB) [[attachment:sheet11.pdf]]
  • [get | view] (2009-03-12 14:03:18, 1.7 KB) [[attachment:sheet12.m]]
  • [get | view] (2009-03-12 14:03:18, 62.6 KB) [[attachment:sheet12.pdf]]
  • [get | view] (2009-03-12 14:03:18, 2.4 KB) [[attachment:sheet12_data.m]]
  • [get | view] (2009-03-12 14:03:18, 63.1 KB) [[attachment:sheet13.pdf]]
  • [get | view] (2009-03-12 14:03:18, 1.6 KB) [[attachment:sheet14.m]]
  • [get | view] (2009-03-12 14:03:18, 73.7 KB) [[attachment:sheet14.pdf]]
  • [get | view] (2009-03-12 14:03:18, 4.8 KB) [[attachment:sheet14_data.m]]
  • [get | view] (2009-03-12 14:03:18, 120.2 KB) [[attachment:summary.pdf]]
  • [get | view] (2009-03-12 14:03:18, 1505.5 KB) [[attachment:vl_ica_tub-08.pdf]]
 All files | Selected Files: delete move to page copy to page

You are not allowed to attach a file to this page.