function DD_predict

%%%% This matlab function calculates the predictions of a statistical
%%%% model for a DD experiment. For details and notation compare
%%%% Meintrup, Reisinger, "A statistical model providing comprehensive predictions for the mRNA differential 
%%%% display", Bioinformatics, 2005.

%%%% If you want to use this function for your own DD experiment you have to adjust the next 4
%%%% variables: number_of_subexps, predict_exp, v and y.

number_of_subexps = 28;  % number of subexperiments that you performed
predict_exp = 135;       % number of subexperiemnts you want to have predicted 

%%%%%% y is the vector of sample sizes of your DD subexperiments 

y=[41,48,41,47,40,34,55,48,38,48,47,45,45,48,49,60,51,51,47,57,48,44,48,69,62,62,69,65];

%%%%%%%% the following matrix v contains the result of your subexperiements. note
%%%%%%%% that you have to adjust the number of zeros to add so that v is a
%%%%%%%% matrix. 

v(1,:) = [[1,2,3,4,5], zeros(1,19)];
v(2,:) = [[6,7,8,9,10, zeros(1,19)]];
v(3,:) = [[9,11,12,13,14, zeros(1,19)]];
v(4,:) = [[15,16,17,18,19,20,21,22,23,24], zeros(1,14)];
v(5,:) = [[19,25,26,27,28,29,30,31,32], zeros(1,24-9)];
v(6,:) = [[19,27,33,34,35,36,37,38], zeros(1,24-8)];
v(7,:) = [[39,40,41,42,43], zeros(1,24-5)];
v(8,:) = [[43,44,45,46,47,48,49], zeros(1,24-7)];
v(9,:) = [[43,50,51], zeros(1,24-3)];
v(10,:) =[[4,15,51,52,53,54,55,56,57,58,59,60,61], zeros(1,24-13)];
v(11,:) = [[62,63], zeros(1,24-2)];
v(12,:) = [[64,65,66,67,68,69], zeros(1,24-6)];
v(13,:) = [[35,70,71,72,73], zeros(1,24-5)];
v(14,:) = [[19,34,35,74,75,76], zeros(1,24-6)];
v(15,:) = [[77,78,79,80,81], zeros(1,24-5)];
v(16,:) = [35,77,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102];
v(17,:) = [[43,47,103,104,105,106,107,108,109,110,111], zeros(1,24-11)];
v(18,:) = [[75,103,112,113], zeros(1,24-4)];
v(19,:) = [[39,115,116,117,118,119,120,121], zeros(1,24-8)];
v(20,:) = [[83,93,94,97,122,123,124,125,126,127,128], zeros(1,24-11)];
v(21,:) = [[75,108,129,130,131], zeros(1,24-5)];
v(22,:) = [[75,132,133,134], zeros(1,24-4)];
v(23,:) = [[135], zeros(1,24-1)];
v(24,:) = [[67,83,92,97,99,101,136,137,138,139,140,141,142,143,144], zeros(1,24-15)];
v(25,:) = [[6,75,97,145,146,147,148,149,150,151,152,153,154,155], zeros(1,24-14)];
v(26,:) = [[75,156,157,158,159,160], zeros(1,24-6)];
v(27,:) = [[5,13,34,103,145,161,162,163,164,165,166,167], zeros(1,12)];
v(28,:) = [[82,97,99,123,145,163,168,169,170,171,172,173,174,175], zeros(1,24-14)];


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%% determines the number x of differantial bands

for i=1:number_of_subexps
    x(i) = length(find(v(i,:)));
end

%%%%%%%%% the next lines determine the non-redundant bands u and the
%%%%%%%%% redundancy r

A=v;
[m,n]=size(A);
for i=1:m
    u(i) = length(find(A(i,:)));
    r(i) = 0;
end

for i=2:m
    for j=1:u(i)
        find(A(1:i-1,:)-A(i,j)*ones(i-1,n));
        d=length(find(A(1:i-1,:)-A(i,j)*ones(i-1,n)));
        if d ~= (i-1)*n
            r(i)= r(i) +1;
        end
    end
    u(i) = u(i) - r(i);
end

%%%%%%%%%% z and d are calculated %%%%%%%%%%%%%%%%%%%%%%%%%%%

for i=1:number_of_subexps
   z(i) = sum(u(1:i));
   d(i) = sum(x(1:i)); 
end
  
%%%%%%%%% estimator for p %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

p=sum(x)/sum(y);


%%%%%%%%%%% here the estimator for N and N_1 is calculated %%%%%

for i=1:1:8000
    N=1500+1*(i-1);
    Ni(i) = N;
    xi(i)=i;
     fi(i)=1;
     fi(i) = bino(y(1),p,z(1));
     for j=2:length(u)
        fi(i) = fi(i)*bino(y(j),p-z(j-1)/N,u(j));
    end
 end
[Y,I] = max(fi);
N_hat=Ni(I);

N_1_hat = p*N_hat;

%%%%%%%%% calculates y and its standard deviation %%%%%%%%%%%%%%%%%%

y_hat = mean(y);
sigma_y=std(y);

%%%%%%%%%%%%%%%% calculates the expected values for d,z and c  %%%%%%

for i=1:predict_exp
    d_hat(i) = i*p *y_hat;
    z_hat(i) = p*N_hat*(1-(1-y_hat/N_hat)^i);
    c_hat(i) = z_hat(i)/N_1_hat;
end

%%%%%%%%% plots the results %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

e=[1:predict_exp];
e_short = [1:number_of_subexps];
N_1_plot = N_1_hat*ones(1,predict_exp);
subplot(1,2,1);
plot(e_short,z,e_short, z_hat(1:number_of_subexps),e_short, d, e_short, d_hat(1:number_of_subexps));
legend('non-red. bands (exper.)','non-red. bands (expected)','all diff. bands (exper.)','all diff. bands (expected)',2);
xlabel('subexperiments');
ylabel('differential bands');
grid on;
subplot(1,2,2);
plot(e,z_hat,e,d_hat,e,N_1_plot,'k--');
legend('non-red. bands (expected)','all diff. bands (expected)','N_1 (number of diff. bands)',2);
xlabel('subexperiments');
ylabel('differential bands');
grid on;

%%%%%%%%%%%%%%%%%%%%%%%%%%%% display values of interest %%%%%%%%%%%%%%%%%%%%%%%%%%

sprintf('Estimator for p: %.4f',p)

sprintf('Estimator for N: %i',N_hat)

sprintf('Estimator for N_1: %.0f', N_1_hat)

sprintf('average of sample size: %.2f',y_hat')

sprintf('standard deviation of sample size: %.2f', sigma_y)

sprintf('experimentally reached non-redundant bands: %i',z(number_of_subexps))

sprintf('experimentally reached differential bands: %i',d(number_of_subexps)')

sprintf('experimentally reached coverage: %.2f',z(number_of_subexps)/N_1_hat)

sprintf('for %i subexperiments predicted non-redundant bands: %.2f',predict_exp,z_hat(predict_exp))

sprintf('for %i subexperiments predicted differential bands: %.2f',predict_exp,d_hat(predict_exp))

sprintf('for %i subexperiments predicted coverage: %.2f',predict_exp,z_hat(predict_exp)/N_1_hat)

%%%%%%%%%%%% subfunction  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function s = bino(n,p,k)  %% calculates B(n,p)(k) for the binomial distribution

koeff = 1;
for i=1 : k
    koeff =koeff*(n-i+1)/i;
end
s = koeff*p^k*(1-p)^(n-k);


%%%%%%%%%%%%%%% END %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
