% [h,m] = makehist(data,w) % makes a histogram (returned in h) of the data in data % with the width of a single bin as w. The minimum point (ie the % point on the left side of the bin with the smallest x value) is % returned in m (the maximum is therefore m+w*(s+1) where s is the % size of h % % Note that this function makes sure that one of the histogram % cell boundaries lines up with the origin -- this is more stable than % lining up the maximum or minimum or average point with a boundary function [h,m] = makehist(data, w) big = max(data); small = min(data); offset = -floor(small/w); l = offset+floor(big/w)+1; h = zeros(l,1); [dl,t] = size(data); inc = 1/(dl*w); for i=1:dl, p = floor(data(i)/w)+offset+1; h(p) = h(p)+inc; end; m = -offset*w; %------------------------------------------------------------------------ % function plothist(h,w,m,min,max) % plots the histogram in h with cell widths of w and a minimum point of m % on the range from min to max function pts = plothist(h,w,m,min,max) [ncells,t] = size(h); pts = zeros(ncells*2+4,2); if (min<=m), pts(1,1) = min; pts(1,2) = 0; pts(2,1) = m; pts(2,2) = 0; else, minp = h(floor((min-m)/w)+1); pts(1,1) = min; pts(1,2) = minp; pts(2,1) = min; pts(2,2) = minp; end; if (max>=ncells*w+m), pts(ncells*2+3,1) = ncells*w+m; pts(ncells*2+3,2) = 0; pts(ncells*2+4,1) = max; pts(ncells*2+4,2) = 0; else, maxp = h(floor((max-m)/w)+1); pts(ncells*2+3,1) = max; pts(ncells*2+3,2) = maxp; pts(ncells*2+4,1) = max; pts(ncells*2+4,2) = maxp; end; for i=1:ncells, y = h(i); minx = (i-1)*w+m; if (minxmax), minx = max; y = maxp; end; maxx = i*w+m; if (maxxmax), maxx = max; end; pts(i*2+1,1) = minx; pts(i*2+1,2) = y; pts(i*2+2,1) = maxx; pts(i*2+2,2) = y; end; plot(pts(:,1),pts(:,2)); %------------------------------------------------------------------------ % function parzengauss(data, var, min, max, res) % plots a parzen density estimate from the 1D data points given % in data (nx1) with var as the variance of the Gaussians % The density is plot from min to max with a resolution of res function parzengauss(data, var, min, max, res) npts = floor((max-min)/res)+1; val = zeros(npts,2); [datasize,t] = size(data); den = sqrt(2*pi*var)*datasize; for i=1:npts, p = min+(i-1)*res; v = 0; for j=1:datasize, v = v+exp(-(p-data(j))^2/(2*var)); end; v = v/den; val(i,1) = p; val(i,2) = v; end; plot(val(:,1),val(:,2)); %------------------------------------------------------------------------ % function parzenbox(data, w, min, max, res) % plots a parzen density estimate from the 1D data points given % in data (nx1) with w as the width of the boxcar function (-w/2 to w/2) % The density is plot from min to max with a resolution of res function parzenbox(data, w, min, max, res) % Note: This function *could* be plotted exactly instead of sampling npts = floor((max-min)/res)+1; val = zeros(npts,2); [datasize,t] = size(data); inc = 1/(datasize*w); for i=1:npts, p = min+(i-1)*res; v = 0; for j=1:datasize, if (p-data(j)-w/2), v = v + inc; end; end; val(i,1) = p; val(i,2) = v; end; plot(val(:,1),val(:,2)); %------------------------------------------------------------------------ % function [prior, mean, var] = emmixture(data, n, itt) % estimates the density of the distribution which produced data by % n Gaussians using EM clustering. The results (priors, means and variances) % are given as output. For 1D data only. % The estimation is run for itt itteration (it would probably be better % to put a stopping condition -- like the log likelihood increases by % less than a threshold) function [prior, mean, var] = emmixture(data, n, itt) minx = min(data); maxx = max(data); % just to get some random distibution that is in the right range: mean = rand(n,1)*(maxx-minx)+minx; var = repmat((maxx-minx)/(2*n),n,1); prior = ones(n,1)/n; [ds,t] = size(data); D = repmat(data,1,n); % This is needed to prevent the variance or prior probablity of % a particular distribution from going to zero (since we don't have % an infinite amount of data, this could happen) minval = 1e-300; for i=1:itt, % SD = squared differences SD = D-repmat(mean',ds,1); SD = SD.*SD; % W = weights to the weighted averaged W = -SD./(2*repmat(var',ds,1)); W = exp(W).*repmat((prior./sqrt(2*pi*var))',ds,1); W = W + minval; % W at this point is p(x|j)p(j) % normalize the weights % which makes W p(x|j)p(j) / p(x) % or p(j|x) W = W./repmat(sum(W,2),1,n); temp = sum(W,1); % now renormalize the weights just once in the other % direction so that the weights are % p(j|x) / sum(p(k|x) W = W./repmat(temp,ds,1); prior = temp'./sum(temp); var = sum(SD.*W,1)'; mean = sum(D.*W,1)'; end; %------------------------------------------------------------------------ % function plotmixture(prior, mean, var, min, max, res) % plots a Gaussian mixture density model (given by prior, mean, and var) % The density is plot from min to max with a resolution of res function plotmixture(prior, mean, var, min, max, res) npts = floor((max-min)/res)+1; val = zeros(npts,2); [n,t] = size(prior); mult = prior./sqrt(2*pi*var); for i=1:npts, p = min+(i-1)*res; sq = repmat(p,n,1)-mean; v = sum(exp((-sq.*sq)./(2*var)).*mult); val(i,1) = p; val(i,2) = v; end; plot(val(:,1),val(:,2)); %------------------------------------------------------------------------ % function plotgamma(alpha,lambda,min,max,res) % plots the gamma distribution from min to max with a resolution of res % (works only for integer alpha) % Not required for the problem set function plotgamma(alpha,lambda,min,max,res) npts = floor((max-min)/res)+1; val = zeros(npts,2); mult = lambda^alpha; for i=1:alpha-1, mult = mult/i; end; for i=1:npts, p = min+(i-1)*res; if (p>=0), v = mult*p^(alpha-1)*exp(-lambda*p); else, v = 0; end; val(i,1) = p; val(i,2) = v; end; plot(val(:,1),val(:,2)); %---------------------------------------------------------------------------- function F = flagmax(X) % F = flagmax(X) % returns f which is the matrix of all zeros except for one 1 in each % row in the place of the maximum element of that row in X F = zeros(size(X)); [Y,I] = max(X,[],2); for i=1:size(X,1), F(i,I(i)) = 1; end; %---------------------------------------------------------------------------- function [means, sigmas, priors] = fit_gaussians(input, target) % function [means sigmas priors] = fit_gaussians(input, target) % % For each target class fit a multi-dimensional gaussian. % % Assumes the targets are stored one column per class. When the i'th target % column is greater than 0 it identifies that row as being an element of % class i. % % Returns the means, the covariance matrices, and the priors. % The ith row of means is the mean of the i'th class % invsigmas is a 3D array. The third index ranges of the classes. % priors is a vector. numclasses = size(target, 2); for i = 1:numclasses select = target(:,i) > 0; size(select) data = input(select,:); means(i,:) = mean(data); sigmas(:,:,i) = cov(data) priors(i,1) = size(data, 1) / size(input, 1); end %---------------------------------------------------------------------------- function [means, sigmas, priors, ss] = fit_sphere(data, target) % function [means, sigmas, priors, ss] = fit_sphere(data, target) % returns the estimates Gaussians for each class (their mean vectors, % covariance matrices, and priors) with the constraint that the % covariance matrices must be spherical. The ss returned variable % is a list of the scaling of each covariance matrix from the identity % matrix nc = size(target,2); dim = size(data,2); for i= 1:nc, s = target(:,i) > 0; size(s); d = data(s,:); means(i,:) = mean(d); diff = d-repmat(means(i,:),size(d,1),1); ss(i) = sum(sum(diff.*diff))/(size(d,1)*dim); sigmas(:,:,i) = eye(dim)*ss(i); ss(i) = sqrt(ss(i)); priors(i) = size(d,1) / size(data,1); end %---------------------------------------------------------------------------- function [class] = gaussian_classify(DATA, MEANS, SIGS, PRIORS) % % Computes the density of each datapoint under each of the Gaussian classes. % % ---- Preconditions ---- % DATA: rows contain multidimensional datapoints (d columns) % MEANS: the ith row is the mean of the ith model % SIGS: array of covariance matrices sigs(:,:,i) is the cov of ith model % PRIOR: column vector with the prior of the ith model in the ith row. % % ---- Postconditions ---- % class: a column vector where the nth row contains the classes of % the datapoint in the nth row of data. % % ----- Description ----- % This function computes a density from a multigaussian model, where each % gaussian has a different mean, covariance, and prior probability. In the % case of one gaussian, the function will also work, naturally. dens = gaussian_density(DATA, MEANS, SIGS, PRIORS); class = zeros(size(dens)); [val ind] = max(dens'); for i = 1:size(class,1) class(i,ind(i)) = 1; end %---------------------------------------------------------------------------- function [dens] = gaussian_density(DATA, MEANS, SIGS, PRIORS) % % Computes the density of each datapoint under each of the Gaussian classes. % % ---- Preconditions ---- % DATA: rows contain multidimensional datapoints (d columns) % MEANS: the ith row is the mean of the ith model % SIGS: array of covariance matrices sigs(:,:,i) is the cov of ith model % PRIOR: column vector with the prior of the ith model in the ith row. % % ---- Postconditions ---- % dens: a column vector where the nth row contains the density associated % with the datapoint in the nth row of data. % % ----- Description ----- % This function computes a density from a multigaussian model, where each % gaussian has a different mean, covariance, and prior probability. In the % case of one gaussian, the function will also work, naturally. [N,D] = size(DATA); % N <- number of datapoints, D <- dimensionality M = size(MEANS, 1); dens = zeros(N,M); for m=1:M, const = 1 / (sqrt((2*pi)^D) * sqrt(det(SIGS(:,:,m)))); % Tricky.. we want the sqrt of the inverse. invsig = SIGS(:,:,m)^-0.5; % Subtract mean cdata = DATA - ones(N,1)*MEANS(m,:); % Multiply each row by inv. cov. ndata = cdata * invsig; % Take the componentwise product pdata = ndata .* ndata; % Take the row sum of the componentwise products dots = sum(pdata')'; dens(:,m) = const * PRIORS(m) * exp(-0.5 * dots); end; %---------------------------------------------------------------------------- function output = nearest(input, data, target) % output = nearest(input, data, target) % returns the classification of the input points based on the training % data in data and target % compute the distances (well, the squared distances) dis = repmat(sum(data.*data,2)',size(input,1),1) - ... 2*input*data' + repmat(sum(input.*input,2),1,size(data,1)); % find the minimum for each input [val, ind] = min(dis,[],2); % pull out the corresponding targets and return them output = target(ind,:); %---------------------------------------------------------------------------- function output = parzen_classify(input, data, target, sigma) % output = parzen_classify(input,data,target,sigma) % returns the probability of each class (which yeilds a hard classification % when run through flagmax) of the input points in input based on the % training data in data and target and the sigma value. nc = size(target,2); ni = size(input,1); in2 = sum(input.*input,2); s2 = sigma*sigma; for i=1:nc, s = target(:,i) > 0; d = data(s,:); nd = size(d,1); % note that (x-y)'*(x-y) = x'*x - 2x'*y + y'*y output(:,i) = sum(exp(-repmat(sum(d.*d,2)',ni,1) + ... 2*input*d' - repmat(sum(input.*input,2),1,nd)/(2*s2)),2)/nd; end; %---------------------------------------------------------------------------- function plot_gaussian(mean, sigma) % % function plot_gaussian(mean, sigma) % % ---- Preconditions ---- % MEAN: row % SIGMA: the covariance matrix % % ---- Postconditions ---- % % ----- Description ----- % Plots the eigenvectors of the Covariance matrix. % [u, s, v] = svd(sigma); drawvec1 = [mean; mean+(u(:,1)*sqrt(s(1,1)))'; mean-1*(u(:,1)*sqrt(s(1,1)))']; drawvec2 = [mean; mean+(u(:,2)*sqrt(s(2,2)))'; mean-1*(u(:,2)*sqrt(s(2,2)))']; drawvec = [drawvec1 ; drawvec2]; plot(drawvec(:,1), drawvec(:,2), 'r'); %---------------------------------------------------------------------------- function plotc(f,npts,varargin) % plotc(f,npts,param1,param2,param3,...) % This function takes in a string with the name of a function that will % classify a matrix of points (each example is one row), f. npts is % the resolution at which to plot and the remaining parameters (any number) % are passed on without modification to the function to be plotted % % This function assumes that a graph has already been plotted and proceedes % to plot the decision boundries inside the axes of the current graph (thus % drawing over the current graph). For this to work, f needs to be a % function which takes a matrix of points (one per row) as the first % argument followed by the parameters given to plotc. f should return % a matrix of classification (one target vector per row) of 0's and 1's. % grab current dimensions of the plot: xr = get(gca,'XLim'); yr = get(gca,'YLim'); maxx = xr(2); minx = xr(1); maxy = yr(2); miny = yr(1); xstep = (maxx-minx)/(npts-1); ystep = (maxy-miny)/(npts-1); xvec = zeros(npts,1); yvec = zeros(npts,1); % evaluated the function f with parameters varargin at each point: i = 0; for y=0:npts-1, yvec(y+1) = ystep*y+miny; end; for x=0:npts-1, xvec(x+1) = xstep*x+minx; for y=0:npts-1, i = i+1; X(i,:) = [xvec(x+1) yvec(y+1)]; end; end; Z = feval(f,X,varargin{:}); if (size(Z,2)==1), Z = Z>0.5; else, Z = flagmax(Z); end; Z = permute(Z,[1 3 2]); Z = reshape(Z,npts,npts,size(Z,3)); % plot all of the contours and color in the regions nbound = size(Z,3); classcolor(1,:) = [0.7 0.3 0.3]; % red (r) classcolor(2,:) = [0.3 0.3 0.7]; % blue (b) classcolor(3,:) = [0.7 0.7 0.7]; % black (k) classcolor(4,:) = [0.3 0.7 0.7]; % cyan (c) classcolor(5,:) = [0.3 0.7 0.3]; % green (g) classcolor(6,:) = [0.7 0.3 0.7]; % magenta (m) classcolor(7,:) = [0.7 0.7 0.3]; % yellow/brown (y) linecolor(1) = 'r'; linecolor(2) = 'b'; linecolor(3) = 'k'; linecolor(4) = 'c'; linecolor(5) = 'g'; linecolor(6) = 'm'; linecolor(7) = 'y'; for j=1:nbound, [c,h]=contourf(xvec,yvec,Z(:,:,j),[0.5 0.5],linecolor(j)); hold on; for i=1:length(h), if (length(get(h(i),'FaceColor')) ~=3 | ... get(h(i),'FaceColor') ~= [1 1 1]), set(h(i),'FaceColor',classcolor(j,:)); end; end; end; % reset axes just to make sure they didn't get scaled set(gca,'XLim',xr); set(gca,'YLim',yr); hold off; %---------------------------------------------------------------------------- function plotlines(w), % function plotlines(w) % for 2D classification, this function plots the lines implied by the % weights in w xr = get(gca,'XLim'); yr = get(gca,'YLim'); px = []; py = []; for x=0:1:6, y = (-w(1,:)-w(2,:)*x)./w(3,:); px = [px x]; py = [py y']; end; plot(px',py(1,:)','r-'); hold on; if (size(py,1)>1), plot(px',py(2,:)','b-'); end; if (size(py,1)>2), plot(px',py(3,:)','k-'); end; set(gca,'XLim',xr); set(gca,'YLim',yr); hold off; %---------------------------------------------------------------------------- function plot_gaussians(means, invsigmas) % % function plot_gaussians(means, sigmas) % % ---- Preconditions ---- % MEANS: each row is a different mean % INVSIGMAS: 3D matrix, invsigmas(:,:,1) is the first inv. covariance % % ---- Postconditions ---- % % ----- Description ----- % Plots the eigenvectors of the Covariance matrix. % hold on for i = 1:size(means,1) plot_gaussian(means(i,:), invsigmas(:,:,i)); end hold off %---------------------------------------------------------------------------- function sigma = trainparzen(data, target, sigma0, n, mindiv) % sigma = trainparzen(data, target, sigma0, n, mindiv) % finds the best value of sigma for a parzan estimator using cross- % validation. sigma0 is the highest value of sigma to be checked % and mindiv is the resolution at which to find the solution % % This function could be faster if you note that the pair-wise % distances needed do not change and can be pre-computed. However, % this breaks up the nice decomposition I have going here. % (if you do do this, this function takes no time at all to compute) npts = size(data,1); ppn = npts/n; rp = randperm(npts); d = data(rp,:); % shuffle the data t = target(rp,:); % and the targets! high = sigma0; div = sigma0/5; low = div; tries = []; % the basic algorithm is to divide up the range into a few intervals % and evaluate the estimated error at points inside of each. Then, % we find the best value found so far and perform a similar division % of the area around it. We plot the function known thus far at each % step for amusement purposes while(div>mindiv) for s = low:div:high, error = 0; for k=1:n, out = parzan_classify(d((k-1)*ppn+1:k*ppn,:), ... d([1:(k-1)*ppn k*ppn+1:npts],:), ... t([1:(k-1)*ppn k*ppn+1:npts],:), s); out = flagmax(out); ematrix = out-t((k-1)*ppn+1:k*ppn,:); error = error + sum(sum(ematrix.*ematrix)); end; tries = [tries [s error]']; [tt,i] = sort(tries,2); tries = tries(:,i(1,:)); plot(tries(1,:),tries(2,:),'k-'); pause(0.01); end; [newmin,p] = min(tries(2,:)); if (p==size(tries,2)), high = sigma0; low = tries(1,size(tries,2)-2); else, if (p==1), high = tries(1,2); low = tries(1,1)/2; else, high = tries(1,p+1); low = tries(1,p-1); end; end; div = (high-low)/5; end; sigma = tries(1,p); %------------------------------------------------------------------------ %------------------------------------------------------------------------ % script to run all of ps2 solutions in order: res=50 % Problem #3 load ps2a load ps2b % First the histograms figure; subplot(3,2,1); [h,m] = makehist(ps2a,0.5); plothist(h,0.5,m,-15,15); title('Histogram of ps2a with width of 0.5'); subplot(3,2,3); [h,m] = makehist(ps2a,1); plothist(h,1,m,-15,15); title('Histogram of ps2a with width of 1'); subplot(3,2,5); [h,m] = makehist(ps2a,5); plothist(h,5,m,-15,15); title('Histogram of ps2a with width of 5'); subplot(3,2,2); [h,m] = makehist(ps2b,0.5); plothist(h,0.5,m,0,30); title('Histogram of ps2b with width of 0.5'); subplot(3,2,4); [h,m] = makehist(ps2b,1); plothist(h,1,m,0,30); title('Histogram of ps2b with width of 1'); subplot(3,2,6); [h,m] = makehist(ps2b,5); plothist(h,5,m,0,30); title('Histogram of ps2b with width of 5'); % now the parzen window estimates: % with Gaussians: figure subplot(3,2,1); parzengauss(ps2a,0.5,-15,15,res); title('Parzen window estimate of ps2a with Guassians of variance 0.5'); subplot(3,2,3); parzengauss(ps2a,1,-15,15,res); title('Parzen window estimate of ps2a with Gaussians of variance 1'); subplot(3,2,5); parzengauss(ps2a,5,-15,15,res); title('Parzen window estimate of ps2a with Gaussians of variance 5'); subplot(3,2,2); parzengauss(ps2b,0.5,0,30,res); title('Parzen window estimate of ps2b with Gaussians of variance 0.5'); subplot(3,2,4); parzengauss(ps2b,1,0,30,res); title('Parzen window estimate of ps2b with Gaussians of variance 1'); subplot(3,2,6); parzengauss(ps2b,5,0,30,res); title('Parzen window estimate of ps2b with Gaussians of variance 5'); % with box cars: figure subplot(3,2,1); parzenbox(ps2a,0.5,-15,15,res); title('Parzen window estimate of ps2a with a box car of width 0.5'); subplot(3,2,3); parzenbox(ps2a,1,-15,15,res); title('Parzen window estimate of ps2a with a box car of width 1'); subplot(3,2,5); parzenbox(ps2a,5,-15,15,res); title('Parzen window estimate of ps2a with a box car of width 5'); subplot(3,2,2); parzenbox(ps2b,0.5,0,30,res); title('Parzen window estimate of ps2b with a box car of width 0.5'); subplot(3,2,4); parzenbox(ps2b,1,0,30,res); title('Parzen window estimate of ps2b with a box car of width 1'); subplot(3,2,6); parzenbox(ps2b,5,0,30,res); title('Parzen window estimate of ps2b with a box car of width 5'); % now estimate using EM with mixture of Gaussians % the itteration setting of 20 should be *well* more than enough! figure subplot(3,2,1); [p,m,v] = emmixture(ps2a,2,20); plotmixture(p,m,v,-15,15,res); title('ps2a estimated with a mixture of 2 Gaussians'); disp('ps2a 2 Gaussian:'); disp('priors:'); disp(p'); disp('means:'); disp(m'); disp('variances:'); disp(v'); subplot(3,2,3); [p,m,v] = emmixture(ps2a,3,20); plotmixture(p,m,v,-15,15,res); title('ps2a estimated with a mixture of 3 Gaussians'); disp('ps2a 3 Gaussian:'); disp('priors:'); disp(p'); disp('means:'); disp(m'); disp('variances:'); disp(v'); subplot(3,2,5); [p,m,v] = emmixture(ps2a,10,20); plotmixture(p,m,v,-15,15,res); title('ps2a estimated with a mixture of 10 Gaussians'); disp('ps2a 10 Gaussian:'); disp('priors:'); disp(p'); disp('means:'); disp(m'); disp('variances:'); disp(v'); subplot(3,2,2); [p,m,v] = emmixture(ps2b,2,20); plotmixture(p,m,v,0,30,res); title('ps2b estimated with a mixture of 2 Gaussians'); disp('ps2b 2 Gaussian:'); disp('priors:'); disp(p'); disp('means:'); disp(m'); disp('variances:'); disp(v'); subplot(3,2,4); [p,m,v] = emmixture(ps2b,3,20); plotmixture(p,m,v,0,30,res); title('ps2b estimated with a mixture of 3 Gaussians'); disp('ps2b 3 Gaussian:'); disp('priors:'); disp(p'); disp('means:'); disp(m'); disp('variances:'); disp(v'); subplot(3,2,6); [p,m,v] = emmixture(ps2b,10,20); plotmixture(p,m,v,0,30,res); title('ps2b estimated with a mixture of 10 Gaussians'); disp('ps2b 10 Gaussian:'); disp('priors:'); disp(p'); disp('means:'); disp(m'); disp('variances:'); disp(v'); % and now we plot the true distributions (since I happen % to know what they were -- this obviously wasn't part of % the assignment) figure; subplot(1,2,1); plotmixture([0.3 0.5 0.2]',[-3 2 6]',[4 1 9]',-15,15,res); title('true underlying distribution for ps2a'); subplot(1,2,2); plotgamma(2,0.3,0,30,res); title('true underlying distribution for ps2b'); % first we'll do all of the aritificial dataset problems: load artificial; clf; % Problem #4: [m,s,p] = fit_gaussians(input,target); pred_p2_d1 = flagmax(gaussian_classify(test,m,s,p)); figure; plot_data(input,target); hold on; plotc('gaussian_classify',res,m,s,p); hold on; plot_data(input,target); hold on; plot_gaussians(m,s); title('Problem 4'); hold off; pause(0.01); % Problem #5: [m,s,p,sig] = fit_sphere(input,target); pred_p3_d1 = flagmax(gaussian_classify(test,m,s,p)); figure; plot_data(input,target); hold on; plotc('gaussian_classify',res,m,s,p); hold on; plot_data(input,target); hold on; plot_gaussians(m,s); title('Problem 5'); hold off; pause(0.01); % Problem #6: figure; sigma = trainparzen(input, target, max(sig), 4, max(sig)/20); title('Cross Validation Parzen Estimates'); xl = sprintf('minimum found at sigma = %f',sigma); xlabel(xl); hold off; pause(0.01); pred_p4_d1 = flagmax(parzen_classify(test, input, target, sigma)); figure; plot_data(input,target); hold on; plotc('parzen_classify',res,input,target,sigma); hold on; plot_data(input,target); title('Problem 6'); hold off; pause(0.01); % Problem #7: % no training needed pred_p5_d1 = flagmax(nearest(test,input,target)); figure; plot_data(input,target); hold on; plotc('nearest',res,input,target); hold on; plot_data(input,target); title('Problem 7'); hold off; pause(0.01);