Matlab code:
%example 1: univariate data, heights of women
if 1
%generate random data
D = 64 + 3*randn(20,1);
N = length(D);
figure(1); clf; set(gcf,’color’,’w’,’position’,[93 330 1267 420]);
%display raw data points
subplot(1,3,1); set(gca,’fontsize’,18);
plot(D,ones(N,1),’*r’);
box off
xlabel(‘height in inches’);
axis([50 80 .9 1.1]);
set(gca,’ytick’,[]);
title(‘Raw data’);
%display data as a histogram
subplot(1,3,2); set(gca,’fontsize’,18);
hist(D,[50:.1:80]);
box off
xlabel(‘height in inches’);
ylabel(‘frequency’);
axis([50 80 0 10]);
title(‘Histogram of data’);
%display mean with errorbar representing 2 std errors
subplot(1,3,3); set(gca,’fontsize’,18);
mybar(mean(D),2*std(D)/sqrt(N));
axis([.5 1.5 50 80]);
set(gca,’xticklabel’,[]);
box off
ylabel(‘height in inches’);
title(‘Mean +/- 2*SEM’);
%bootstrap samples
%take a single bootstrap sample by sampling N items from D with replacement
figure(2); clf; set(gcf,’color’,’w’,’position’,[93 330 900 420]);
subplot(1,2,1);
set(gca,’fontsize’,18);
plot(D,ones(N,1),’*r’);
box off
xlabel(‘height in inches’);
axis([50 80 .9 1.1]);
set(gca,’ytick’,[]);
title(‘Raw data’);
hold on
%superimpose bootstrap sample on D
% d = randsample(D,N);
d = randsample(D,N);
subplot(1,2,1); set(gca,’fontsize’,18);
plot(d,1.02*ones(N,1),’or’);
%mark position of mean
subplot(1,2,2); set(gca,’fontsize’,18);
plot(mean(d),1, ‘.r’,’markersize’,25);
axis([60 70 .9 1.1]); box off; set(gca,’ytick’,[]);
hold on
%do it again 5 times
for i=1:5
d = randsample(D,N);
subplot(1,2,1);
plot(d,(1.02+i/100)*ones(N,1),’or’);
%mark position of mean
subplot(1,2,2);
plot(mean(d),1, ‘.r’,’markersize’,25);
axis([60 70 .9 1.1]);
end
%compute bootstrap standard error of sample mean
ms = [];
for i=1:10000
d = randsample(D,N);
ms(i) = mean(d);
end
%plot histogram of bootstrap means
hist(ms,[60:.5:70]);
axis([60 70 0 5000]);
box off
%find and display 95% CI
ci1 = prctile(ms,2.5);
ci2 = prctile(ms,97.5);
hold on
plot([ci1 ci1],[0 3000],’k–‘,’linewidth’,5);
plot([ci2 ci2],[0 3000],’k–‘,’linewidth’,5);
title(‘Distribution of bootstrap means’);
end
%comparing two samples (heights of women and men)
if 1
%generate random data
WH = 64 + 3*randn(10,1);
N = length(WH);
MH = 70 + 4*randn(N,1);
figure(3); clf; set(gcf,’color’,’w’,’position’,[93 330 1267 420]);
%display raw data points
subplot(1,2,1); set(gca,’fontsize’,18);
plot(WH,ones(N,1),’*r’);
hold on
plot(MH,1.03*ones(N,1),’*b’);
box off
xlabel(‘height in inches’);
axis([50 80 .9 1.1]);
set(gca,’ytick’,[]);
title(‘Raw data’);
%display mean with errorbar representing 2 std errors
subplot(1,2,2); set(gca,’fontsize’,18);
mybar([mean(WH) mean(MH)],[2*std(WH) 2*std(MH)]/sqrt(N),[],[],[1 0 0; 0 0 1]);
axis([.5 2.5 50 80]);
set(gca,’xticklabel’,[]);
box off
ylabel(‘height in inches’);
title(‘Mean +/- 2*SEM’);
%difference in means
DM = mean(MH) – mean(WH)
%how do we compute a confidence interval for this?
%bootstrap the samples many times and compute the difference of means
%each time, then look at the distribution
dms = [];
for i=1:10000
d1 = randsample(WH,N);
d2 = randsample(MH,N);
dms(i) = mean(d2) – mean(d1);
end
%plot histogram of bootstrap difference of means
hist(dms);
box off
%find and display 95% CI
ci1 = prctile(dms,2.5);
ci2 = prctile(dms,97.5);
hold on
plot([ci1 ci1],[0 3000],’k–‘,’linewidth’,5);
plot([ci2 ci2],[0 3000],’k–‘,’linewidth’,5);
title(‘Distribution of bootstrap difference of means’);
end
%example 3: correlation of height & weight of 20 men
H = 70 + 4*randn(100,1);
W = 2.3*H + 5*randn(100,1);
figure(4); clf; set(gcf,’color’,’w’,’position’,[93 330 1267 420]);
%display raw data points
subplot(1,2,1); set(gca,’fontsize’,18);
plot(H,W,’*b’);
box off
xlabel(‘height in inches’);
ylabel(‘weight in pounds’);
axis([50 100 100 250]);
% set(gca,’ytick’,[]);
title(‘Raw data’);
%calculate sample correlation (pearson’s r)
r = corr(H,W)
%is it significant? what is the 95% ci for r?
rs = [];
for i=1:1000
d = randsample(1:N,N);
d1 = H(d);
d2 = W(d);
rs(i) = corr(d1,d2);
end
%plot histogram of bootstrap difference of means
subplot(1,2,2); set(gca,’fontsize’,18);
hist(rs,[-1:.1:1]);
box off
%find and display 95% CI
ci1 = prctile(rs,2.5);
ci2 = prctile(rs,97.5);
hold on
plot([ci1 ci1],[0 3000],’k–‘,’linewidth’,5);
plot([ci2 ci2],[0 3000],’k–‘,’linewidth’,5);
title(‘Distribution of bootstrap correlation values’);
%
% % %display data as a histogram
% % subplot(1,3,2); set(gca,’fontsize’,18);
% % hist(D,[50:2.5:80]);
% % box off
% % xlabel(‘height in inches’);
% % ylabel(‘frequency’);
% % axis([50 80 0 10]);
% % title(‘Histogram of data’);
% %
% % %display mean with errorbar representing 2 std errors
% % subplot(1,3,3); set(gca,’fontsize’,18);
% % mybar(mean(D),2*std(D)/sqrt(N));
% % axis([.5 1.5 50 80]);
% % set(gca,’xticklabel’,[]);
% % box off
% % ylabel(‘height in inches’);
% % title(‘Mean +/- 2*SEM’);
% %