Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
cluster Mass-spec peptides step
  • Loading branch information
MPIBR-tushevg committed Sep 20, 2016
1 parent 3d30e77 commit ff26052
Show file tree
Hide file tree
Showing 10 changed files with 1,033 additions and 0 deletions.
43 changes: 43 additions & 0 deletions cluster_peptides/CalculateMeasures.m
@@ -0,0 +1,43 @@
function [ ary_cor, ary_iAB ] = CalculateMeasures( data )
% calculate measures

count = size(data.seqaa,1);
ary_rep = zeros(count,1);
ary_exp = zeros(count,1);
ary_smp = zeros(count,1);

ary_iAB = zeros(count,1);
ary_cor = zeros(count,1);

for k = 1 : count

% get each experiment ICC
EI = data.imtx(k,data.idx_exp == 1)';
EII = data.imtx(k,data.idx_exp == 2)';
ary_exp(k) = ICC([EI,EII], '1-1');

% get each sample ICC
SA = data.imtx(k,data.idx_smp == 'A')';
SB = data.imtx(k,data.idx_smp == 'B')';
ary_smp(k) = -ICC([SA,SB],'1-1');

% get each replica ICC
EIRI = data.imtx(k,data.idx_exp == 1 & data.idx_rep == 1)';
EIRII = data.imtx(k,data.idx_exp == 1 & data.idx_rep == 2)';
EIIRI = data.imtx(k,data.idx_exp == 2 & data.idx_rep == 1)';
EIIRII = data.imtx(k,data.idx_exp == 2 & data.idx_rep == 2)';
ary_rep(k) = ICC([EIRI,EIRII,EIIRI,EIIRII], '1-1');

ary_cor(k) = ary_exp(k) * ary_rep(k);% ary_smp(k);

% difference
iA = mean(data.imtx(k,data.idx_smp == 'A'));
iB = mean(data.imtx(k,data.idx_smp == 'B'));
%iPeak = data.intensity(k);
iPeak = max(data.imtx(k,:));
ary_iAB(k) = (iA - iB)/iPeak;
%}
end

end

189 changes: 189 additions & 0 deletions cluster_peptides/ICC.m
@@ -0,0 +1,189 @@
function [r, LB, UB, F, df1, df2, p] = ICC(M, type, alpha, r0)
% Intraclass correlation
% [r, LB, UB, F, df1, df2, p] = ICC(M, type, alpha, r0)
%
% M is matrix of observations. Each row is an object of measurement and
% each column is a judge or measurement.
%
% 'type' is a string that can be one of the six possible codes for the desired
% type of ICC:
% '1-1': The degree of absolute agreement among measurements made on
% randomly seleted objects. It estimates the correlation of any two
% measurements.
% '1-k': The degree of absolute agreement of measurements that are
% averages of k independent measurements on randomly selected
% objects.
% 'C-1': case 2: The degree of consistency among measurements. Also known
% as norm-referenced reliability and as Winer's adjustment for
% anchor points. case 3: The degree of consistency among measurements maded under
% the fixed levels of the column factor. This ICC estimates the
% corrlation of any two measurements, but when interaction is
% present, it underestimates reliability.
% 'C-k': case 2: The degree of consistency for measurements that are
% averages of k independent measurements on randomly selected
% onbjectgs. Known as Cronbach's alpha in psychometrics. case 3:
% The degree of consistency for averages of k independent
% measures made under the fixed levels of column factor.
% 'A-1': case 2: The degree of absolute agreement among measurements. Also
% known as criterion-referenced reliability. case 3: The absolute
% agreement of measurements made under the fixed levels of the column factor.
% 'A-k': case 2: The degree of absolute agreement for measurements that are
% averages of k independent measurements on randomly selected objects.
% case 3: he degree of absolute agreement for measurements that are
% based on k independent measurements maded under the fixed levels
% of the column factor.
%
% ICC is the estimated intraclass correlation. LB and UB are upper
% and lower bounds of the ICC with alpha level of significance.
%
% In addition to estimation of ICC, a hypothesis test is performed
% with the null hypothesis that ICC = r0. The F value, degrees of
% freedom and the corresponding p-value of the this test are
% reported.
%
% (c) Arash Salarian, 2008
%
% Reference: McGraw, K. O., Wong, S. P., "Forming Inferences About
% Some Intraclass Correlation Coefficients", Psychological Methods,
% Vol. 1, No. 1, pp. 30-46, 1996
%

if nargin < 3
alpha = .05;
end

if nargin < 4
r0 = 0;
end

[n, k] = size(M);
[p, table] = anova_rm(M, 'off');

SSR = table{3,2};
SSE = table{4,2};
SSC = table{2,2};
SSW = SSE + SSC;

MSR = SSR / (n-1);
MSE = SSE / ((n-1)*(k-1));
MSC = SSC / (k-1);
MSW = SSW / (n*(k-1));

switch type
case '1-1'
[r, LB, UB, F, df1, df2, p] = ICC_case_1_1(MSR, MSE, MSC, MSW, alpha, r0, n, k);
case '1-k'
[r, LB, UB, F, df1, df2, p] = ICC_case_1_k(MSR, MSE, MSC, MSW, alpha, r0, n, k);
case 'C-1'
[r, LB, UB, F, df1, df2, p] = ICC_case_C_1(MSR, MSE, MSC, MSW, alpha, r0, n, k);
case 'C-k'
[r, LB, UB, F, df1, df2, p] = ICC_case_C_k(MSR, MSE, MSC, MSW, alpha, r0, n, k);
case 'A-1'
[r, LB, UB, F, df1, df2, p] = ICC_case_A_1(MSR, MSE, MSC, MSW, alpha, r0, n, k);
case 'A-k'
[r, LB, UB, F, df1, df2, p] = ICC_case_A_k(MSR, MSE, MSC, MSW, alpha, r0, n, k);
end


%----------------------------------------
function [r, LB, UB, F, df1, df2, p] = ICC_case_1_1(MSR, MSE, MSC, MSW, alpha, r0, n, k)
r = (MSR - MSW) / (MSR + (k-1)*MSW);

F = (MSR/MSW) * (1-r0)/(1+(k-1)*r0);
df1 = n-1;
df2 = n*(k-1);
p = 1-fcdf(F, df1, df2);

FL = F / finv(1-alpha/2, n-1, n*(k-1));
FU = F * finv(1-alpha/2, n*(k-1), n-1);

LB = (FL - 1) / (FL + (k-1));
UB = (FU - 1) / (FU + (k-1));

%----------------------------------------
function [r, LB, UB, F, df1, df2, p] = ICC_case_1_k(MSR, MSE, MSC, MSW, alpha, r0, n, k)
r = (MSR - MSW) / MSR;

F = (MSR/MSW) * (1-r0);
df1 = n-1;
df2 = n*(k-1);
p = 1-fcdf(F, df1, df2);

FL = F / finv(1-alpha/2, n-1, n*(k-1));
FU = F * finv(1-alpha/2, n*(k-1), n-1);

LB = 1 - 1 / FL;
UB = 1 - 1 / FU;

%----------------------------------------
function [r, LB, UB, F, df1, df2, p] = ICC_case_C_1(MSR, MSE, MSC, MSW, alpha, r0, n, k)
r = (MSR - MSE) / (MSR + (k-1)*MSE);

F = (MSR/MSE) * (1-r0)/(1+(k-1)*r0);
df1 = n - 1;
df2 = (n-1)*(k-1);
p = 1-fcdf(F, df1, df2);

FL = F / finv(1-alpha/2, n-1, (n-1)*(k-1));
FU = F * finv(1-alpha/2, (n-1)*(k-1), n-1);

LB = (FL - 1) / (FL + (k-1));
UB = (FU - 1) / (FU + (k-1));

%----------------------------------------
function [r, LB, UB, F, df1, df2, p] = ICC_case_C_k(MSR, MSE, MSC, MSW, alpha, r0, n, k)
r = (MSR - MSE) / MSR;

F = (MSR/MSE) * (1-r0);
df1 = n - 1;
df2 = (n-1)*(k-1);
p = 1-fcdf(F, df1, df2);

FL = F / finv(1-alpha/2, n-1, (n-1)*(k-1));
FU = F * finv(1-alpha/2, (n-1)*(k-1), n-1);

LB = 1 - 1 / FL;
UB = 1 - 1 / FU;

%----------------------------------------
function [r, LB, UB, F, df1, df2, p] = ICC_case_A_1(MSR, MSE, MSC, MSW, alpha, r0, n, k)
r = (MSR - MSE) / (MSR + (k-1)*MSE + k*(MSC-MSE)/n);

a = (k*r0) / (n*(1-r0));
b = 1 + (k*r0*(n-1))/(n*(1-r0));
F = MSR / (a*MSC + b*MSE);
df1 = n - 1;
df2 = (a*MSC + b*MSE)^2/((a*MSC)^2/(k-1) + (b*MSE)^2/((n-1)*(k-1)));
p = 1-fcdf(F, df1, df2);

a = k*r/(n*(1-r));
b = 1+k*r*(n-1)/(n*(1-r));
v = (a*MSC + b*MSE)^2/((a*MSC)^2/(k-1) + (b*MSE)^2/((n-1)*(k-1)));

Fs = finv(1-alpha/2, n-1, v);
LB = n*(MSR - Fs*MSE)/(Fs*(k*MSC + (k*n - k - n)*MSE) + n*MSR);

Fs = finv(1-alpha/2, v, n-1);
UB = n*(Fs*MSR-MSE)/(k*MSC + (k*n - k - n)*MSE + n*Fs*MSR);

%----------------------------------------
function [r, LB, UB, F, df1, df2, p] = ICC_case_A_k(MSR, MSE, MSC, MSW, alpha, r0, n, k)
r = (MSR - MSE) / (MSR + (MSC-MSE)/n);

c = r0/(n*(1-r0));
d = 1 + (r0*(n-1))/(n*(1-r0));
F = MSR / (c*MSC + d*MSE);
df1 = n - 1;
df2 = (c*MSC + d*MSE)^2/((c*MSC)^2/(k-1) + (d*MSE)^2/((n-1)*(k-1)));
p = 1-fcdf(F, df1, df2);

a = r/(n*(1-r));
b = 1+r*(n-1)/(n*(1-r));
v = (a*MSC + b*MSE)^2/((a*MSC)^2/(k-1) + (b*MSE)^2/((n-1)*(k-1)));

Fs = finv(1-alpha/2, n-1, v);
LB = n*(MSR - Fs*MSE)/(Fs*(MSC-MSE) + n*MSR);

Fs = finv(1-alpha/2, v, n-1);
UB = n*(Fs*MSR - MSE)/(MSC - MSE + n*Fs*MSR);

17 changes: 17 additions & 0 deletions cluster_peptides/LogPeptideList.m
@@ -0,0 +1,17 @@
function [] = LogPeptideList( idx, pep_data, ref_ann, label)
% Write out lists
gid_list = cat(1,pep_data.gid_list{idx});
gid_unq = unique(gid_list);
[~,~,idx_R] = intersect(gid_unq,ref_ann.gid);
sym_unq = unique(ref_ann.sym(idx_R));

fW = fopen(sprintf('resultSet_GI_%s.txt',label),'w');
fprintf(fW,'%s\n',gid_unq{:});
fclose(fW);

fW = fopen(sprintf('resultSet_SYM_%s.txt',label),'w');
fprintf(fW,'%s\n',sym_unq{:});
fclose(fW);

end

18 changes: 18 additions & 0 deletions cluster_peptides/MatchIDList.m
@@ -0,0 +1,18 @@
function [ idx ] = MatchIDList(map, data, qry)
% get symbols that match

qry_sym = unique(qry);

idx_sym = cellfun(@(x) any(strcmp(x,qry_sym)), map.sym_unq);

qry_pep = unique(map.gid(idx_sym(map.idx_sym_rev)));

[gid_unq,~,gid_unq_rev] = unique(data.gid_ary);

idx_pep = cellfun(@(x) any(strcmp(x,qry_pep)), gid_unq);
pos_pep = data.gid_ary_rev(idx_pep(gid_unq_rev));
idx = false(size(data.length,1),1);
idx(unique(pos_pep)) = true;

end

0 comments on commit ff26052

Please sign in to comment.