%-------------------------------------------------------------------
function dopca(data,testdata)

mu = mean(data);
data = data - repmat(mu,size(data,1),1);
sig = data'*data/size(data,1);
[u,s,v] = svd(sig);
ev = diag(s);

figure;
hold on;
title('eigenvalues of data');
plot(ev);
axis tight;
hold off;

% compute the sum of the eigenvalues less than a given value
% as the predicted squared reconstruction error 
esterr = cumsum(ev(size(data,2):-1:1));
esterr = esterr(size(data,2):-1:1);

figure;
hold on;
title('predicted average reconstruction error');
plot(esterr);
axis tight;
hold off;

xverr = zeros(size(data,2),1);
% the squared reconstruction error is the sum of the squares of the
% projections of the data onto the axes *not* included (ie the n-i+1)
% smallest eigenvectors
proj = (testdata-repmat(mu,size(testdata,1),1))*v;
for i=1:size(data,2)-1,
	xverr(i) = sum(sum(proj(:,i+1:size(data,2)).^2))/size(testdata,1);
end;
figure;
hold on;
title('average reconstruction error on test data');
plot(xverr);
axis tight;
hold off;

npics = 30;
ndim = 20;
pos = floor(linspace(1,size(proj,1),npics));
dim = ceil(logspace(0,log10(size(proj,2)),ndim));
pic = ones(ndim*16,npics*16);
for i=1:ndim,
for j=1:npics,
	pic((i-1)*16+1:i*16,(j-1)*16+1:j*16) = ...
		reshape((proj(pos(j),1:dim(i))*v(:,1:dim(i))'+mu),16,16)';
end;
end;
figure;
image(pic*64);
axis off;
axis image;
colormap gray;
for i=1:ndim,
	text(-8,8+(i-1)*16,sprintf('%d',dim(i)),'HorizontalAlignment','right');
end;

%-------------------------------------------------------------------
function testclassifier(trainx,trainy,testx,testy,ndim,classifier,varargin)

mu = mean(trainx);
d = trainx - repmat(mu,size(trainx,1),1);
sig = d'*d/size(trainx,1);
[u,s,v] = svd(sig);
% notice that I'm not subtracting off of the mean, this just means
% that the projected data won't be zero mean (otherwise the same),
% which should not effect the classifier
trainp = trainx*v;
testp = testx*v;
sqerr = zeros(size(ndim));
figure;
for i=1:size(ndim),
	outy = feval(classifier,testp(:,1:ndim(i)),...
		trainp(:,1:ndim(i)),trainy,varargin{:});
	sqerr(i) = sum(sum((testy-outy).^2));
	% just plot each time to convince me things are still happening
	plot(ndim,sqerr/200);
	pause(0.01);
end;
clf;
% note that sqerr/200 is the average miss rate (since a miss will count 
%  as 2 and a hit as 0)
title('Error rate on test data verses number of eigenvectors');
plot(ndim,sqerr/200);


%-------------------------------------------------------------------
function outy = testmlp(input,data,target)

[w,b] = trainmlp(data,target,[15 size(target,2)],1/size(data,2));
outy = flagmax(mlp(input,w,b));


%-------------------------------------------------------------------
function [w,b] = trainmlp(input, target, layers, scale),
% this is the same function as from the solutions of problem
% set 2, except that the training rates and stopping conditions
% have been modified to work better with this problem set

% initialize the network:
nex = size(input,1);
nin = size(input,2);
nlayers = size(layers,2);
layers = [nin , layers];
for i=1:nlayers,
	w{i} = scale*randn(layers(i),layers(i+1));
	b{i} = scale*randn(1,layers(i+1));
end;

% now train:
eta = 10/nex;
j = 1;
wch = 2;
while(j<10 | wch>1),
	for i=1:nlayers,
		wold{i} = w{i};
	end;
	for i=1:100,
		z = mlpforward(input,w,b); % get all of the outputs
		delta = mlpbackward(target,w,b,z);
		[w,b] = mlpupdate(input,w,b,z,delta,eta);
	end;
	wch = 0;
	for i=1:nlayers,
		wch = wch + sum(sum((wold{i}-w{i}).^2));
	end;
	wch
	if (isinf(wch) | isnan(wch)),
		for i=1:nlayers,
			w{i} = scale*randn(layers(i),layers(i+1));
			b{i} = scale*randn(1,layers(i+1));
		end;
	end;
	eta = eta*0.8;
	error = sum((z{nlayers}-target).^2)
	j = j+1;
end;


%-------------------------------------------------------------------
% script to run problem 2

load all_chars;

dopca(chars,tchars);

nd=[1 2 4 8 12 16 20 25 30 35 40 45 50 55 60 65 70 80 90 100 125 150 200 256]';
% now try a nearest-neighbor classifier
testclassifier(chars,target,tchars,correct,nd,'nearest');

testclassifier(chars,target,tchars,correct,nd,'testmlp');