diff --git a/cluster_peptides/CalculateMeasures.m b/cluster_peptides/CalculateMeasures.m
new file mode 100755
index 0000000..68d1697
--- /dev/null
+++ b/cluster_peptides/CalculateMeasures.m
@@ -0,0 +1,43 @@
+function [ ary_cor, ary_iAB ] = CalculateMeasures( data )
+    % calculate measures
+
+	count = size(data.seqaa,1);
+	ary_rep = zeros(count,1);
+	ary_exp = zeros(count,1);
+    ary_smp = zeros(count,1);
+ 
+    ary_iAB = zeros(count,1);
+    ary_cor = zeros(count,1);
+    
+    for k = 1 : count
+     
+        % get each experiment ICC
+        EI = data.imtx(k,data.idx_exp == 1)';
+        EII = data.imtx(k,data.idx_exp == 2)';
+        ary_exp(k) = ICC([EI,EII], '1-1');
+        
+        % get each sample ICC
+        SA = data.imtx(k,data.idx_smp == 'A')';
+        SB = data.imtx(k,data.idx_smp == 'B')';
+        ary_smp(k) = -ICC([SA,SB],'1-1');
+        
+        % get each replica ICC
+        EIRI = data.imtx(k,data.idx_exp == 1 & data.idx_rep == 1)';
+        EIRII = data.imtx(k,data.idx_exp == 1 & data.idx_rep == 2)';
+        EIIRI = data.imtx(k,data.idx_exp == 2 & data.idx_rep == 1)';
+        EIIRII = data.imtx(k,data.idx_exp == 2 & data.idx_rep == 2)';
+        ary_rep(k) = ICC([EIRI,EIRII,EIIRI,EIIRII], '1-1');
+        
+        ary_cor(k) = ary_exp(k) * ary_rep(k);% ary_smp(k);
+        
+        % difference
+        iA = mean(data.imtx(k,data.idx_smp == 'A'));
+        iB = mean(data.imtx(k,data.idx_smp == 'B'));
+        %iPeak = data.intensity(k);
+        iPeak = max(data.imtx(k,:));
+        ary_iAB(k) = (iA - iB)/iPeak;
+        %}
+	end
+
+end
+
diff --git a/cluster_peptides/ICC.m b/cluster_peptides/ICC.m
new file mode 100755
index 0000000..386a065
--- /dev/null
+++ b/cluster_peptides/ICC.m
@@ -0,0 +1,189 @@
+function [r, LB, UB, F, df1, df2, p] = ICC(M, type, alpha, r0)
+% Intraclass correlation
+%   [r, LB, UB, F, df1, df2, p] = ICC(M, type, alpha, r0)
+%
+%   M is matrix of observations. Each row is an object of measurement and
+%   each column is a judge or measurement.
+%
+%   'type' is a string that can be one of the six possible codes for the desired
+%   type of ICC:
+%       '1-1': The degree of absolute agreement among measurements made on
+%         randomly seleted objects. It estimates the correlation of any two
+%         measurements.
+%       '1-k': The degree of absolute agreement of measurements that are
+%         averages of k independent measurements on randomly selected
+%         objects.
+%       'C-1': case 2: The degree of consistency among measurements. Also known
+%         as norm-referenced reliability and as Winer's adjustment for
+%         anchor points. case 3: The degree of consistency among measurements maded under
+%         the fixed levels of the column factor. This ICC estimates the
+%         corrlation of any two measurements, but when interaction is
+%         present, it underestimates reliability.
+%       'C-k': case 2: The degree of consistency for measurements that are
+%         averages of k independent measurements on randomly selected
+%         onbjectgs. Known as Cronbach's alpha in psychometrics. case 3:  
+%         The degree of consistency for averages of k independent
+%         measures made under the fixed levels of column factor.
+%       'A-1': case 2: The degree of absolute agreement among measurements. Also
+%         known as criterion-referenced reliability. case 3: The absolute 
+%         agreement of measurements made under the fixed levels of the column factor.
+%       'A-k': case 2: The degree of absolute agreement for measurements that are
+%         averages of k independent measurements on randomly selected objects.
+%         case 3: he degree of absolute agreement for measurements that are
+%         based on k independent measurements maded under the fixed levels
+%         of the column factor.
+%
+%       ICC is the estimated intraclass correlation. LB and UB are upper
+%       and lower bounds of the ICC with alpha level of significance. 
+%
+%       In addition to estimation of ICC, a hypothesis test is performed
+%       with the null hypothesis that ICC = r0. The F value, degrees of
+%       freedom and the corresponding p-value of the this test are
+%       reported.
+%
+%       (c) Arash Salarian, 2008
+%
+%       Reference: McGraw, K. O., Wong, S. P., "Forming Inferences About
+%       Some Intraclass Correlation Coefficients", Psychological Methods,
+%       Vol. 1, No. 1, pp. 30-46, 1996
+%
+
+if nargin < 3
+    alpha = .05;
+end
+
+if nargin < 4
+    r0 = 0;
+end
+
+[n, k] = size(M);
+[p, table] = anova_rm(M, 'off');
+
+SSR = table{3,2};
+SSE = table{4,2};
+SSC = table{2,2};
+SSW = SSE + SSC;
+
+MSR = SSR / (n-1);
+MSE = SSE / ((n-1)*(k-1));
+MSC = SSC / (k-1);
+MSW = SSW / (n*(k-1));
+
+switch type
+    case '1-1'
+        [r, LB, UB, F, df1, df2, p] = ICC_case_1_1(MSR, MSE, MSC, MSW, alpha, r0, n, k);
+    case '1-k'
+        [r, LB, UB, F, df1, df2, p] = ICC_case_1_k(MSR, MSE, MSC, MSW, alpha, r0, n, k);
+    case 'C-1'
+        [r, LB, UB, F, df1, df2, p] = ICC_case_C_1(MSR, MSE, MSC, MSW, alpha, r0, n, k);
+    case 'C-k'
+        [r, LB, UB, F, df1, df2, p] = ICC_case_C_k(MSR, MSE, MSC, MSW, alpha, r0, n, k);
+    case 'A-1'
+        [r, LB, UB, F, df1, df2, p] = ICC_case_A_1(MSR, MSE, MSC, MSW, alpha, r0, n, k);
+    case 'A-k'
+        [r, LB, UB, F, df1, df2, p] = ICC_case_A_k(MSR, MSE, MSC, MSW, alpha, r0, n, k);
+end
+
+
+%----------------------------------------
+function [r, LB, UB, F, df1, df2, p] = ICC_case_1_1(MSR, MSE, MSC, MSW, alpha, r0, n, k)
+r = (MSR - MSW) / (MSR + (k-1)*MSW);
+
+F = (MSR/MSW) * (1-r0)/(1+(k-1)*r0);
+df1 = n-1;
+df2 = n*(k-1);
+p = 1-fcdf(F, df1, df2);
+
+FL = F / finv(1-alpha/2, n-1, n*(k-1));
+FU = F * finv(1-alpha/2, n*(k-1), n-1);
+
+LB = (FL - 1) / (FL + (k-1));
+UB = (FU - 1) / (FU + (k-1));
+
+%----------------------------------------
+function [r, LB, UB, F, df1, df2, p] = ICC_case_1_k(MSR, MSE, MSC, MSW, alpha, r0, n, k)
+r = (MSR - MSW) / MSR;
+
+F = (MSR/MSW) * (1-r0);
+df1 = n-1;
+df2 = n*(k-1);
+p = 1-fcdf(F, df1, df2);
+
+FL = F / finv(1-alpha/2, n-1, n*(k-1));
+FU = F * finv(1-alpha/2, n*(k-1), n-1);
+
+LB = 1 - 1 / FL;
+UB = 1 - 1 / FU;
+
+%----------------------------------------
+function [r, LB, UB, F, df1, df2, p] = ICC_case_C_1(MSR, MSE, MSC, MSW, alpha, r0, n, k)
+r = (MSR - MSE) / (MSR + (k-1)*MSE);
+
+F = (MSR/MSE) * (1-r0)/(1+(k-1)*r0);
+df1 = n - 1;
+df2 = (n-1)*(k-1);
+p = 1-fcdf(F, df1, df2);
+
+FL = F / finv(1-alpha/2, n-1, (n-1)*(k-1));
+FU = F * finv(1-alpha/2, (n-1)*(k-1), n-1);
+
+LB = (FL - 1) / (FL + (k-1));
+UB = (FU - 1) / (FU + (k-1));
+
+%----------------------------------------
+function [r, LB, UB, F, df1, df2, p] = ICC_case_C_k(MSR, MSE, MSC, MSW, alpha, r0, n, k)
+r = (MSR - MSE) / MSR;
+
+F = (MSR/MSE) * (1-r0);
+df1 = n - 1;
+df2 = (n-1)*(k-1);
+p = 1-fcdf(F, df1, df2);
+
+FL = F / finv(1-alpha/2, n-1, (n-1)*(k-1));
+FU = F * finv(1-alpha/2, (n-1)*(k-1), n-1);
+
+LB = 1 - 1 / FL;
+UB = 1 - 1 / FU;
+
+%----------------------------------------
+function [r, LB, UB, F, df1, df2, p] = ICC_case_A_1(MSR, MSE, MSC, MSW, alpha, r0, n, k)
+r = (MSR - MSE) / (MSR + (k-1)*MSE + k*(MSC-MSE)/n);
+
+a = (k*r0) / (n*(1-r0));
+b = 1 + (k*r0*(n-1))/(n*(1-r0));
+F = MSR / (a*MSC + b*MSE);
+df1 = n - 1;
+df2 = (a*MSC + b*MSE)^2/((a*MSC)^2/(k-1) + (b*MSE)^2/((n-1)*(k-1)));
+p = 1-fcdf(F, df1, df2);
+
+a = k*r/(n*(1-r));
+b = 1+k*r*(n-1)/(n*(1-r));
+v = (a*MSC + b*MSE)^2/((a*MSC)^2/(k-1) + (b*MSE)^2/((n-1)*(k-1)));
+
+Fs = finv(1-alpha/2, n-1, v);
+LB = n*(MSR - Fs*MSE)/(Fs*(k*MSC + (k*n - k - n)*MSE) + n*MSR);
+
+Fs = finv(1-alpha/2, v, n-1);
+UB = n*(Fs*MSR-MSE)/(k*MSC + (k*n - k - n)*MSE + n*Fs*MSR);
+
+%----------------------------------------
+function [r, LB, UB, F, df1, df2, p] = ICC_case_A_k(MSR, MSE, MSC, MSW, alpha, r0, n, k)
+r = (MSR - MSE) / (MSR + (MSC-MSE)/n);
+
+c = r0/(n*(1-r0));
+d = 1 + (r0*(n-1))/(n*(1-r0));
+F = MSR / (c*MSC + d*MSE);
+df1 = n - 1;
+df2 = (c*MSC + d*MSE)^2/((c*MSC)^2/(k-1) + (d*MSE)^2/((n-1)*(k-1)));
+p = 1-fcdf(F, df1, df2);
+
+a = r/(n*(1-r));
+b = 1+r*(n-1)/(n*(1-r));
+v = (a*MSC + b*MSE)^2/((a*MSC)^2/(k-1) + (b*MSE)^2/((n-1)*(k-1)));
+
+Fs = finv(1-alpha/2, n-1, v);
+LB = n*(MSR - Fs*MSE)/(Fs*(MSC-MSE) + n*MSR);
+
+Fs = finv(1-alpha/2, v, n-1);
+UB = n*(Fs*MSR - MSE)/(MSC - MSE + n*Fs*MSR);
+
diff --git a/cluster_peptides/LogPeptideList.m b/cluster_peptides/LogPeptideList.m
new file mode 100755
index 0000000..7e25860
--- /dev/null
+++ b/cluster_peptides/LogPeptideList.m
@@ -0,0 +1,17 @@
+function [] = LogPeptideList( idx, pep_data, ref_ann, label)
+% Write out lists
+gid_list = cat(1,pep_data.gid_list{idx});
+gid_unq = unique(gid_list);
+[~,~,idx_R] = intersect(gid_unq,ref_ann.gid);
+sym_unq = unique(ref_ann.sym(idx_R));
+
+    fW = fopen(sprintf('resultSet_GI_%s.txt',label),'w');
+    fprintf(fW,'%s\n',gid_unq{:});
+    fclose(fW);
+    
+    fW = fopen(sprintf('resultSet_SYM_%s.txt',label),'w');
+    fprintf(fW,'%s\n',sym_unq{:});
+    fclose(fW);
+
+end
+
diff --git a/cluster_peptides/MatchIDList.m b/cluster_peptides/MatchIDList.m
new file mode 100755
index 0000000..f8ff2db
--- /dev/null
+++ b/cluster_peptides/MatchIDList.m
@@ -0,0 +1,18 @@
+function [ idx ] = MatchIDList(map, data, qry)
+% get symbols that match
+
+qry_sym = unique(qry);
+
+idx_sym = cellfun(@(x) any(strcmp(x,qry_sym)), map.sym_unq);
+
+qry_pep = unique(map.gid(idx_sym(map.idx_sym_rev)));
+
+[gid_unq,~,gid_unq_rev] = unique(data.gid_ary);
+
+idx_pep = cellfun(@(x) any(strcmp(x,qry_pep)), gid_unq);
+pos_pep = data.gid_ary_rev(idx_pep(gid_unq_rev));
+idx = false(size(data.length,1),1);
+idx(unique(pos_pep)) = true;
+
+end
+
diff --git a/cluster_peptides/PeptideAnalysis.m b/cluster_peptides/PeptideAnalysis.m
new file mode 100755
index 0000000..6a62f9f
--- /dev/null
+++ b/cluster_peptides/PeptideAnalysis.m
@@ -0,0 +1,201 @@
+% PeptideAnalysis
+clc
+clear all
+close all
+
+%%% --- calculate peptides metrics --- %%%
+if exist('PeptidesMetrics.mat','file') ~= 2
+    fprintf('Calculate metrics ...\n');
+    % read data
+    pep_data = ReadPeptideData('./data/865_peptides_refseq_07Sep2015.txt');
+    ref_ann = ReadReferenceAnnotation('./data/proteinRefSeq_rat_14Sep2015.txt');
+    
+    % calculate metrics
+    [ary_icc, ary_dab] = CalculateMeasures(pep_data);
+    
+    % save data file
+    save('PeptidesMetrics.mat','pep_data','ref_ann','ary_icc','ary_dab');
+else
+    fprintf('Load metrics ...\n');
+    load('PeptidesMetrics.mat');
+end
+
+%%% --- simulation --- %%%
+if exist('PeptidesSimulation.mat','file') ~= 2
+    fprintf('Simulate thresholds ... \n');
+    
+    % define initial thresholds
+    thresh_dab = 0.5;
+    thresh_icc = 0.75;
+    thresh_scr = prctile(pep_data.score, 5);
+    
+    Nsim = 10000;
+    Ndrw = 100;
+    
+    % index of training sets
+    idx_train_pos = (ary_icc > thresh_icc) & (ary_dab > thresh_dab) & (pep_data.score > thresh_scr);
+    idx_train_neg = (~idx_train_pos);
+    
+    % position of trained peptides
+    psn_train_pos = find(idx_train_pos);
+    psn_train_neg = find(idx_train_neg);
+    
+    % count of sets
+    cnt_train_pos = sum(idx_train_pos);
+    cnt_train_neg = sum(idx_train_neg);
+    
+    % define training matrix (ICC,dAB, Score)
+    mtx_train = [ary_icc, ary_dab, pep_data.score];
+    
+    % accumulate training choice
+    idx_used_pos = zeros(size(pep_data.score,1),1);
+    idx_used_neg = zeros(size(pep_data.score,1),1);
+    
+    % define training groups
+    grp_train = true(2*Ndrw,1);
+    grp_train(Ndrw+1:end) = false;
+    
+    % allocate class matrix
+    mtx_class = false(size(pep_data.score,1),Nsim);
+    for k = 1 : Nsim
+        
+        % shuffle random sampling position 
+        psn_smp_pos = psn_train_pos(randperm(cnt_train_pos));
+        psn_smp_neg = psn_train_neg(randperm(cnt_train_neg));
+        
+        % draw random sampling position
+        psn_smp_pos = psn_smp_pos(1:Ndrw);
+        psn_smp_neg = psn_smp_neg(1:Ndrw);
+        
+        % accumulate choice
+        idx_used_pos(psn_smp_pos) = idx_used_pos(psn_smp_pos) + 1;
+        idx_used_neg(psn_smp_neg) = idx_used_neg(psn_smp_neg) + 1;
+        
+        % build training matrix
+        mtx_train_pos = mtx_train(psn_smp_pos,:);
+        mtx_train_neg = mtx_train(psn_smp_neg,:);
+        
+        % train svm classifier
+        svm = svmtrain([mtx_train_pos;mtx_train_neg],...
+                       grp_train,...
+                       'kernel_function','quadratic');
+       
+        % classify matrix
+        mtx_class(:,k) = svmclassify(svm, mtx_train);
+                   
+    end
+    
+    % calculate simulation result
+    idx_sim_pos = sum(mtx_class,2)/Nsim >= 0.95;
+    idx_sim_neg = sum(mtx_class,2)/Nsim <= 0.05;
+    idx_sim_buf = ~(idx_sim_pos | idx_sim_neg);
+    fprintf('Positive: %d\n',sum(idx_sim_pos));
+    fprintf('Buffer: %d\n',sum(idx_sim_buf));
+    fprintf('Negative: %d\n',sum(idx_sim_neg));
+    fprintf('Total: %d\n',size(idx_sim_pos,1));
+    
+    save('PeptidesSimulation.mat','idx_sim_pos','idx_sim_neg','idx_sim_buf','Nsim','Ndrw','thresh_icc','thresh_dab','thresh_scr');
+    %}
+else
+    fprintf('Load simulation ... \n');
+    %load('PeptidesSimulation.mat');
+    idx_sim_pos = (ary_icc > 0.9) & (ary_dab > 0.8);
+    idx_sim_neg = ~idx_sim_pos;
+end
+
+%%% --- write out lists --- %%%
+LogPeptideList( idx_sim_pos, pep_data, ref_ann, 'CoreGlyco_13Oct2015');
+LogPeptideList( idx_sim_neg, pep_data, ref_ann, 'Filtered_13Oct2015');
+%LogPeptideList( idx_sim_buf, pep_data, ref_ann, 'Buffer_13Oct2015');
+
+%%% --- plot data --- %%%
+% control genes
+lbl_pos = {'Cdh2';'Gabra1';'Gabrb3';'Gabrg2';'Gria1';'Gria2';'Gria3';'Gria4';'Grin1';'Grin2a';'Grin2b'};
+lbl_neg = {'Cacng8','Nlgn2'};
+
+% index controls
+idx_ctrl_pos = MatchIDList(ref_ann, pep_data, lbl_pos);
+idx_ctrl_neg = MatchIDList(ref_ann, pep_data, lbl_neg);
+
+x_lim = [-0.2,1];
+y_lim = [-1,1];
+X = ary_icc;
+Y = ary_dab;
+figure('Color','w');
+hold on;
+plot(x_lim,[0,0],'k-.','LineWidth',0.1);
+plot([0,0],y_lim,'k-.','LineWidth',0.1);
+%plot(x_lim,[thresh_dab,thresh_dab],'k:','LineWidth',0.1);
+%plot([thresh_icc,thresh_icc],y_lim,'k:','LineWidth',0.1);
+
+plot(X(idx_sim_neg),Y(idx_sim_neg),'.','Color',[.7,.7,.7],'MarkerSize',3);
+%plot(X(idx_sim_buf),Y(idx_sim_buf),'.','Color',[.9,.9,.9],'MarkerSize',3);
+plot(X(idx_sim_pos),Y(idx_sim_pos),'.','Color',[.25,.25,.25],'MarkerSize',3);
+
+plot(X(idx_ctrl_neg),Y(idx_ctrl_neg),'.','Color',[255,69,0]./255,'MarkerSize',7.5);
+plot(X(idx_ctrl_pos),Y(idx_ctrl_pos),'.','Color',[50,205,50]./255,'MarkerSize',7.5);
+
+%{
+text(0.6,-0.2,...
+    sprintf('SVM clustering\n%d simulations\n %d peptides',Nsim,size(idx_sim_pos,1)),...
+    'VerticalAlignment','top','HorizontalAlignment','left','FontSize',8,'BackgroundColor','w');
+%}
+
+plot(0.6,-0.6,'.','Color',[.7,.7,.7],'MarkerSize',12);
+text(0.625,-0.6,sprintf('%d "bad" peptides',sum(idx_sim_neg)),...
+    'VerticalAlignment','middle','HorizontalAlignment','left','FontSize',8,'BackgroundColor','w');
+
+%{
+plot(0.6,-0.6,'.','Color',[.9,.9,.9],'MarkerSize',12);
+text(0.625,-0.6,sprintf('%d "buffer" peptides',sum(idx_sim_buf)),...
+    'VerticalAlignment','middle','HorizontalAlignment','left','FontSize',8,'BackgroundColor','w');
+%}
+    
+plot(0.6,-0.7,'.','Color',[.25,.25,.25],'MarkerSize',12);
+text(0.625,-0.7,sprintf('%d "good" peptides',sum(idx_sim_pos)),...
+    'VerticalAlignment','middle','HorizontalAlignment','left','FontSize',8,'BackgroundColor','w');
+
+plot(0.6,-0.8,'.','Color',[255,69,0]./255,'MarkerSize',12);
+text(0.625,-0.8,sprintf('%d neg.controls',sum(idx_ctrl_neg)),...
+    'VerticalAlignment','middle','HorizontalAlignment','left','FontSize',8,'BackgroundColor','w');
+
+plot(0.6,-0.9,'.','Color',[50,205,50]./255,'MarkerSize',12);
+text(0.625,-0.9,sprintf('%d pos.controls',sum(idx_ctrl_pos)),...
+    'VerticalAlignment','middle','HorizontalAlignment','left','FontSize',8,'BackgroundColor','w');
+hold off;
+
+xlabel('reproducibility [intraclass correlation]','FontSize',12);
+ylabel('normalized expression [(A-B)/max(A,B)]','FontSize',12);
+%print(gcf,'-dpsc2','-r300','figureX_Clustering_22Sep2015.eps');
+
+figure('Color','w');
+[frq_all, x_bins] = hist(ary_dab(~idx_ctrl_pos), 100);
+frq_cor = hist(ary_dab(idx_ctrl_pos), x_bins);
+hold on;
+plot(x_bins,cumsum(frq_all./sum(frq_all)),'k');
+plot(x_bins,cumsum(frq_cor./sum(frq_cor)),'g');
+plot([-0.9,-0.8],[.9,.9],'k');
+text(-0.7,0.9,'overall peptides','FontSize',12,'VerticalALignment','middle','HorizontalALignment','left');
+plot([-0.9,-0.8],[.8,.8],'g');
+text(-0.7,0.8,'pos.control peptides','FontSize',12,'VerticalALignment','middle','HorizontalALignment','left');
+hold off;
+set(gca,'Box','off','YLim',[-0.05,1.05],'YTick',0:.2:1);
+xlabel('normalized expression [(A-B)/max(A,B)]','FontSize',12);
+ylabel('cumulative ratio','FontSize',12);
+print(gcf,'-dpsc2','-r300','figureX_Difference_11Nov2015.eps');
+
+figure('Color','w');
+[frq_all, x_bins] = hist(ary_icc(~idx_ctrl_pos), 100);
+frq_cor = hist(ary_icc(idx_ctrl_pos), x_bins);
+hold on;
+plot(x_bins,cumsum(frq_all./sum(frq_all)),'k');
+plot(x_bins,cumsum(frq_cor./sum(frq_cor)),'g');
+plot([-0.9,-0.8],[.9,.9],'k');
+text(-0.7,0.9,'overall peptides','FontSize',12,'VerticalALignment','middle','HorizontalALignment','left');
+plot([-0.9,-0.8],[.8,.8],'g');
+text(-0.7,0.8,'pos.control peptides','FontSize',12,'VerticalALignment','middle','HorizontalALignment','left');
+hold off;
+set(gca,'Box','off','YLim',[-0.05,1.05],'YTick',0:.2:1);
+xlabel('reproducibility [intraclass correlation]','FontSize',12);
+ylabel('cumulative ratio','FontSize',12);
+print(gcf,'-dpsc2','-r300','figureX_ICC_11Nov2015.eps');
\ No newline at end of file
diff --git a/cluster_peptides/PeptideClustering.m b/cluster_peptides/PeptideClustering.m
new file mode 100755
index 0000000..e8a6b16
--- /dev/null
+++ b/cluster_peptides/PeptideClustering.m
@@ -0,0 +1,80 @@
+% PeptideClustering
+clc
+close all
+%
+clear all
+
+%%% --- calculate peptides metrics --- %%%
+if exist('PeptidesMetrics.mat','file') ~= 2
+    fprintf('Calculate metrics ...\n');
+    % read data
+    pep_data = ReadPeptideData('./data/865_peptides_refseq_07Sep2015.txt');
+    ref_ann = ReadReferenceAnnotation('./data/proteinRefSeq_rat_14Sep2015.txt');
+    
+    % calculate metrics
+    [ary_icc, ary_dab] = CalculateMeasures(pep_data);
+    
+    % save data file
+    save('PeptidesMetrics.mat','pep_data','ref_ann','ary_icc','ary_dab');
+else
+    fprintf('Load metrics ...\n');
+    load('PeptidesMetrics.mat');
+end
+%%% --- --- --- %%%
+%}
+%clearvars -except ary_icc ary_dab;
+x = ary_icc;
+y = ary_dab;
+
+% hierarchical clustering
+D = pdist([x,y],'euclidean');
+Z = linkage(D,'average');
+idx = cluster(Z,'cutoff',0.1,'criterion','distance');
+%c = cophenet(Z,D);
+
+% cluster centers
+ctr_x = accumarray(idx,x,[max(idx),1],@mean);
+ctr_y = accumarray(idx,y,[max(idx),1],@mean);
+
+cls_weight = sqrt((ctr_y + 1).^2 + (ctr_x +1).^2); 
+
+clr_mtx = (cls_weight - min(cls_weight))./(max(cls_weight)-min(cls_weight));
+clr_mtx = [clr_mtx,rand(max(idx),1),1-clr_mtx];
+% thresholds
+%
+thresh_icc = 0.5;
+thresh_dab = 0.5;
+
+cls_max_x = accumarray(idx,x,[max(idx),1],@max);
+cls_max_y = accumarray(idx,y,[max(idx),1],@max);
+
+cls_min_x = accumarray(idx,x,[max(idx),1],@min);
+cls_min_y = accumarray(idx,y,[max(idx),1],@min);
+
+
+
+T = (cls_min_x >= thresh_icc) & (cls_min_y >= thresh_dab);
+%P = T(idx);
+P = idx == max(idx);
+
+%LogPeptideList( P, pep_data, ref_ann, 'CoreGlyco_15Oct2015');
+%LogPeptideList( ~P, pep_data, ref_ann, 'Filtered_15Oct2015');
+
+figure('Color','w');
+hold on;
+plot(x(~P),y(~P),'.','Color',[.7,.7,.7]);
+plot(x(P),y(P),'.','Color',[.0,.9,.0]);
+hold off;
+%}
+%
+
+figure('Color','w');
+hold on;
+for k = 1 : max(idx)
+    
+    plot(x(idx==k),y(idx==k),'.','Color',clr_mtx(k,:));
+end
+hold off;
+xlabel('reproducibility [intra class correlation]','fontsize',10);
+ylabel('expression [(A-B)/max(A,B)]','fontsize',10);
+%}
\ No newline at end of file
diff --git a/cluster_peptides/PeptideSupplementary.m b/cluster_peptides/PeptideSupplementary.m
new file mode 100755
index 0000000..2bb9af4
--- /dev/null
+++ b/cluster_peptides/PeptideSupplementary.m
@@ -0,0 +1,98 @@
+% PeptideSupplementary
+clc
+close all
+%
+clear all
+load PeptidesMetrics;
+%}
+clearvars -except ary_dab ary_icc pep_data ref_ann;
+
+idx_good = (ary_icc > 0.9) & (ary_dab > 0.8);
+
+gid_good = unique(cat(1,pep_data.gid_list{idx_good}));
+
+
+cnt = size(pep_data.gid_list,1);
+idx_good_all = false(cnt,1);
+k = 1;
+tic
+for k = 1 : cnt
+    i = intersect(pep_data.gid_list{k},gid_good);
+    idx_good_all(k) = ~isempty(i);
+end  
+toc
+
+
+figure('Color','w');
+hold on;
+plot(ary_icc(~idx_good_all),ary_dab(~idx_good_all),'.','Color',[.75,.75,.75],'MarkerSize',3);
+%plot(ary_icc(idx_good_all),ary_dab(idx_good_all),'k.','MarkerSize',3);
+hold off;
+set(gca,'Box','Off',...
+        'XLim',[-0.2,1.05],...
+        'YLim',[-1.05,1.05]);
+xlabel('reproducibility [intraclass correlation]','FontSize',12);
+ylabel('normalized expression [(A-B)/max(A,B)]','FontSize',12);
+print(gcf,'-dpsc2','-r300','figureX_Clustering_NoGlyco_12Nov2015.eps');
+
+figure('Color','w');
+hold on;
+%plot(ary_icc(~idx_good_all),ary_dab(~idx_good_all),'.','Color',[.75,.75,.75],'MarkerSize',3);
+plot(ary_icc(idx_good_all),ary_dab(idx_good_all),'k.','MarkerSize',3);
+hold off;
+set(gca,'Box','Off',...
+        'XLim',[-0.2,1.05],...
+        'YLim',[-1.05,1.05]);
+xlabel('reproducibility [intraclass correlation]','FontSize',12);
+ylabel('normalized expression [(A-B)/max(A,B)]','FontSize',12);
+print(gcf,'-dpsc2','-r300','figureX_Clustering_Glyco_12Nov2015.eps');
+
+
+idx_ctrl_pos = idx_good_all;
+disp(sum(idx_ctrl_pos));
+disp(sum(~idx_ctrl_pos));
+figure('Color','w');
+[frq_all, x_bins] = hist(ary_dab(~idx_ctrl_pos), 100);
+frq_cor = hist(ary_dab(idx_ctrl_pos), x_bins);
+y_all = cumsum(frq_all./sum(frq_all));
+y_crr = cumsum(frq_cor./sum(frq_cor));
+[~,p] = kstest2(y_all,y_crr,'Alpha',0.01);
+hold on;
+plot(x_bins,cumsum(frq_all./sum(frq_all)),'k');
+plot(x_bins,cumsum(frq_cor./sum(frq_cor)),'g');
+plot([-0.9,-0.8],[.9,.9],'k');
+text(-0.7,0.9,'"bad" peptides','FontSize',12,'VerticalALignment','middle','HorizontalALignment','left');
+plot([-0.9,-0.8],[.8,.8],'g');
+text(-0.7,0.8,'core-glyco peptides','FontSize',12,'VerticalALignment','middle','HorizontalALignment','left');
+text(-0.7,0.7,sprintf('KSTest %.2E',p),'FontSize',12,'VerticalAlignment','middle','HorizontalAlignment','left');
+hold off;
+
+
+set(gca,'Box','off','YLim',[-0.05,1.05],'YTick',0:.2:1);
+xlabel('normalized expression [(A-B)/max(A,B)]','FontSize',12);
+ylabel('cumulative ratio','FontSize',12);
+print(gcf,'-dpsc2','-r300','figureX_Difference_12Nov2015.eps');
+
+figure('Color','w');
+[frq_all, x_bins] = hist(ary_icc(~idx_ctrl_pos), 100);
+frq_cor = hist(ary_icc(idx_ctrl_pos), x_bins);
+x_all = cumsum(frq_all./sum(frq_all));
+x_crr = cumsum(frq_cor./sum(frq_cor));
+[~,p] = kstest2(x_all,x_crr,'Alpha',0.01);
+hold on;
+plot(x_bins,cumsum(frq_all./sum(frq_all)),'k');
+plot(x_bins,cumsum(frq_cor./sum(frq_cor)),'g');
+plot([-0.9,-0.8],[.9,.9],'k');
+text(-0.7,0.9,'"bad" peptides','FontSize',12,'VerticalALignment','middle','HorizontalALignment','left');
+plot([-0.9,-0.8],[.8,.8],'g');
+text(-0.7,0.8,'core-glyco peptides','FontSize',12,'VerticalALignment','middle','HorizontalALignment','left');
+text(-0.7,0.7,sprintf('KSTest %.2E',p),'FontSize',12,'VerticalAlignment','middle','HorizontalAlignment','left');
+hold off;
+set(gca,'Box','off','YLim',[-0.05,1.05],'YTick',0:.2:1);
+xlabel('reproducibility [intraclass correlation]','FontSize',12);
+ylabel('cumulative ratio','FontSize',12);
+print(gcf,'-dpsc2','-r300','figureX_ICC_12Nov2015.eps');    
+    
+
+
+
diff --git a/cluster_peptides/ReadPeptideData.m b/cluster_peptides/ReadPeptideData.m
new file mode 100755
index 0000000..ea29523
--- /dev/null
+++ b/cluster_peptides/ReadPeptideData.m
@@ -0,0 +1,153 @@
+function [ data ] = ReadPeptideData(query_file)
+% read peptide information
+
+    %%% --- read raw data --- %%%
+    data = ReadRawData(query_file);
+    
+    %%% --- clean peptide naming --- %%%
+    data = CleanPeptideLabels(data);
+    
+    %%% --- clean empty intensity --- %%%
+    data = CleanEmptyIntensity(data);
+
+    %%% --- set peptide list and index --- %%%
+    data = GetPeptideList(data);
+    
+    %%% --- set experiment, sample, replica index --- %%%
+    data = SetConfigColumns(data);
+    
+end
+
+function [data] = SetConfigColumns(data)
+
+    %%% --- set experiment --- %%%
+    idx_exp = ones(24,1);
+    idx_exp(13:end) = 2;
+    data.idx_exp = idx_exp;
+    
+    %%% --- set sample --- %%%
+    idx_smp = [repmat('A',6,1);repmat('B',6,1);repmat('A',6,1);repmat('B',6,1)];
+    data.idx_smp = idx_smp;
+    
+    %%% --- set replica --- %%%
+    idx_rep = ones(6,1);
+    idx_rep(4:end) = 2;
+    idx_rep = repmat(idx_rep, 4,1);
+    data.idx_rep = idx_rep;
+end
+
+function [data] = GetPeptideList(data)
+    
+    %%% --- update gid list with razor --- %%%
+    gid_list = cellfun(@(x,y) {unique([x;y])},data.gid_razor,data.gid_list);
+    data.gid_list = gid_list;
+    
+    %%% --- symbol dictionary --- %%%
+    data.gid_cnt = cellfun('size',gid_list,1);
+    data.gid_ary = cat(1,gid_list{:});
+    
+    %%% --- order matrix index --- %%%
+    ary_idx = false(sum(data.gid_cnt,1),1);
+    ary_idx(cumsum([1;data.gid_cnt(1:end-1)])) = true;
+    data.gid_ary_rev = cumsum(ary_idx);
+    
+    %%% --- order razor ids --- %%%
+    idx_raz = cellfun(@(x,y) {strcmp(x,y)},data.gid_razor,gid_list);
+    data.gid_ary_razor = cat(1,idx_raz{:});
+%}    
+end
+
+function [data] = CleanEmptyIntensity(data)
+
+    % get empty index
+    idx_empty = all(data.intensity == 0, 2);
+    idx_list = strcmp('',data.gid_list);
+    idx_razor = strcmp('',data.gid_razor);
+    idx_empty = idx_empty | (idx_list & idx_razor);
+    
+    % remove from table
+    data.seqaa(idx_empty) = [];
+    data.length(idx_empty) = [];
+    data.gid_list(idx_empty) = [];
+    data.gid_razor(idx_empty) = [];
+    data.mass(idx_empty) = [];
+    data.score(idx_empty) = [];
+    data.intensity(idx_empty) = [];
+    data.imtx(idx_empty,:) = [];
+    
+    % put back razor to list
+    idx_list = strcmp('',data.gid_list);
+    data.gid_list(idx_list) = data.gid_razor(idx_list);
+    data.gid_list = regexp(data.gid_list,'\;','split');
+    data.gid_list = cellfun(@(x) {unique(x')},data.gid_list);
+    
+end
+
+function [data] = CleanPeptideLabels(data)
+
+    list = data.gid_list;
+    razor = data.gid_razor;
+
+    %%% --- clean " --- %%%
+    list = regexprep(list,'\"','');
+    razor = regexprep(razor,'\"','');
+    
+    %%% --- clean gi| --- %%%
+    list = regexprep(list,'gi\|','');
+    razor = regexprep(razor,'gi\|','');
+    
+    %%% --- clean CON__ --- %%%
+    list = regexprep(list,'CON\_\_','');
+    razor = regexprep(razor,'CON\_\_','');
+    
+    %%% --- clean REV__ --- %%%
+    list = regexprep(list,'REV\_\_','');
+    razor = regexprep(razor,'REV\_\_','');
+    
+    
+    %%% --- clean ENSEMBL: --- %%%
+    list = regexprep(list,'ENSEMBL\:','');
+    razor = regexprep(razor,'ENSEMBL\:','');
+    
+    %%% --- clean REFSEQ: --- %%%
+    list = regexprep(list,'REFSEQ\:','');
+    razor = regexprep(razor,'REFSEQ\:','');
+    %{
+    
+    %%% --- clean all numbers --- %%%
+    for k = 1 : 20
+        peps = regexprep(peps,sprintf('\\-%d',k),'');
+        razor = regexprep(razor,sprintf('\\-%d',k),'');
+    end
+    %}
+    
+    %%% --- update data --- %%%
+    data.gid_list = list;
+    data.gid_razor = razor;
+    
+end
+
+
+function [data] = ReadRawData(query_file)
+    % define text format
+    fmt = repmat({'%*s'},92,1);
+    fmt([2,9,58,59:82]) = {'%n'};
+    fmt([1,3,4]) = {'%s'};
+    fmt = sprintf('%s ',fmt{:});
+    fmt(end) = [];
+
+    % read file
+    fRead = fopen(query_file,'r');
+    fgetl(fRead);
+    txt = textscan(fRead,fmt,'delimiter','\t');
+    fclose(fRead);
+    data.seqaa = txt{1};
+    data.length = cellfun('size',data.seqaa,2);
+    data.gid_list = txt{3};
+    data.gid_razor = txt{4};
+    data.mass = txt{2};
+    data.score = txt{5};
+    data.intensity = log10(txt{6}+1);
+    data.imtx = log10([txt{7:end}]+1);
+    
+end
diff --git a/cluster_peptides/ReadReferenceAnnotation.m b/cluster_peptides/ReadReferenceAnnotation.m
new file mode 100755
index 0000000..9504d13
--- /dev/null
+++ b/cluster_peptides/ReadReferenceAnnotation.m
@@ -0,0 +1,27 @@
+function [ map ] = ReadReferenceAnnotation( file_map )
+% read id map
+
+    fRead = fopen(file_map, 'r');
+    fgetl(fRead);
+    txt = textscan(fRead,'%s %s %s %s %n %s %*s %n','delimiter','\t');
+    fclose(fRead);
+    map.protid = txt{1};
+    map.gid = txt{2};
+    map.sym = txt{3};
+    map.rnaid = txt{4};
+    map.entrez = txt{5};
+    map.description = txt{6};
+    map.length = txt{7};
+    
+    % unique symbols
+    [sym_unq,~,idx_sym_rev] = unique(map.sym);
+    map.idx_sym_rev = idx_sym_rev;
+    map.sym_unq = sym_unq;
+    
+    % unique protein gids
+    [pids_unq,~,idx_pids_rev] = unique(map.protid);
+    map.idx_pids_rev = idx_pids_rev;
+    map.pids_unq = pids_unq;
+    
+end
+
diff --git a/cluster_peptides/anova_rm.m b/cluster_peptides/anova_rm.m
new file mode 100755
index 0000000..10e38cd
--- /dev/null
+++ b/cluster_peptides/anova_rm.m
@@ -0,0 +1,207 @@
+function [p, table] = anova_rm(X, displayopt)
+%   [p, table] = anova_rm(X, displayopt)
+%   Single factor, tepeated measures ANOVA.
+%
+%   [p, table] = anova_rm(X, displayopt) performs a repeated measures ANOVA
+%   for comparing the means of two or more columns (time) in one or more
+%   samples(groups). Unbalanced samples (i.e. different number of subjects 
+%   per group) is supported though the number of columns (followups)should 
+%   be the same. 
+%
+%   DISPLAYOPT can be 'on' (the default) to display the ANOVA table, or 
+%   'off' to skip the display. For a design with only one group and two or 
+%   more follow-ups, X should be a matrix with one row for each subject. 
+%   In a design with multiple groups, X should be a cell array of matrixes.
+% 
+%   Example: Gait-Cycle-times of a group of 7 PD patients have been
+%   measured 3 times, in one baseline and two follow-ups:
+%
+%   patients = [
+%    1.1015    1.0675    1.1264
+%    0.9850    1.0061    1.0230
+%    1.2253    1.2021    1.1248
+%    1.0231    1.0573    1.0529
+%    1.0612    1.0055    1.0600
+%    1.0389    1.0219    1.0793
+%    1.0869    1.1619    1.0827 ];
+%
+%   more over, a group of 8 controls has been measured in the same protocol:
+%
+%   controls = [
+%     0.9646    0.9821    0.9709
+%     0.9768    0.9735    0.9576
+%     1.0140    0.9689    0.9328
+%     0.9391    0.9532    0.9237
+%     1.0207    1.0306    0.9482
+%     0.9684    0.9398    0.9501
+%     1.0692    1.0601    1.0766
+%     1.0187    1.0534    1.0802 ];
+%
+%   We are interested to see if the performance of the patients for the
+%   followups were the same or not:
+%  
+%   p = anova_rm(patients);
+%
+%   By considering the both groups, we can also check to see if the 
+%   follow-ups were significantly different and also check two see that the
+%   two groups had a different performance:
+%
+%   p = anova_rm({patients controls});
+%
+%
+%   ref: Statistical Methods for the Analysis of Repeated Measurements, 
+%     C. S. Daivs, Springer, 2002
+%
+%   Copyright 2008, Arash Salarian
+%   mailto://arash.salarian@ieee.org
+%
+
+if nargin < 2
+    displayopt = 'on';
+end
+
+if ~iscell(X)
+    X = {X};
+end
+
+%number of groups
+s = size(X,2);  
+
+%subjects per group 
+n_h = zeros(s, 1);
+for h=1:s
+    n_h(h) = size(X{h}, 1);    
+end
+n = sum(n_h);
+
+%number of follow-ups
+t = size(X{1},2);   
+
+% overall mean
+y = 0;
+for h=1:s
+    y = y + sum(sum(X{h}));
+end
+y = y / (n * t);
+
+% allocate means
+y_h = zeros(s,1);
+y_j = zeros(t,1);
+y_hj = zeros(s,t);
+y_hi = cell(s,1);
+for h=1:s
+    y_hi{h} = zeros(n_h(h),1);
+end
+
+% group means
+for h=1:s
+    y_h(h) = sum(sum(X{h})) / (n_h(h) * t);
+end
+
+% follow-up means
+for j=1:t
+    y_j(j) = 0;
+    for h=1:s
+        y_j(j) = y_j(j) + sum(X{h}(:,j));
+    end
+    y_j(j) = y_j(j) / n;
+end
+
+% group h and time j mean
+for h=1:s
+    for j=1:t
+        y_hj(h,j) = sum(X{h}(:,j) / n_h(h));
+    end
+end
+
+% subject i'th of group h mean
+for h=1:s
+    for i=1:n_h(h)
+        y_hi{h}(i) = sum(X{h}(i,:)) / t;
+    end
+end
+
+% calculate the sum of squares
+ssG = 0;
+ssSG = 0;
+ssT = 0;
+ssGT = 0;
+ssR = 0;
+
+for h=1:s
+    for i=1:n_h(h)
+        for j=1:t
+            ssG  = ssG  + (y_h(h) - y)^2;
+            ssSG = ssSG + (y_hi{h}(i) - y_h(h))^2;
+            ssT  = ssT  + (y_j(j) - y)^2;
+            ssGT = ssGT + (y_hj(h,j) - y_h(h) - y_j(j) + y)^2;
+            ssR  = ssR  + (X{h}(i,j) - y_hj(h,j) - y_hi{h}(i) + y_h(h))^2;
+        end
+    end
+end
+
+% calculate means
+if s > 1
+    msG  = ssG  / (s-1);
+    msGT = ssGT / ((s-1)*(t-1));
+end
+msSG = ssSG / (n-s);
+msT  = ssT  / (t-1);
+msR  = ssR  / ((n-s)*(t-1));
+
+
+% calculate the F-statistics
+if s > 1
+    FG  = msG  / msSG;
+    FGT = msGT / msR;
+end
+FT  = msT  / msR;
+FSG = msSG / msR;
+
+
+% single or multiple sample designs?
+if s > 1
+    % case for multiple samples
+    pG  = 1 - fcdf(FG, s-1, n-s);
+    pT  = 1 - fcdf(FT, t-1, (n-s)*(t-1));
+    pGT = 1 - fcdf(FGT, (s-1)*(t-1), (n-s)*(t-1));
+    pSG = 1 - fcdf(FSG, n-s, (n-s)*(t-1));
+
+    p = [pT, pG, pSG, pGT];
+
+    table = { 'Source' 'SS' 'df' 'MS' 'F' 'Prob>F'
+        'Time'  ssT t-1 msT FT pT
+        'Group' ssG s-1 msG FG pG
+        'Ineratcion' ssGT (s-1)*(t-1) msGT FGT pGT
+        'Subjects (matching)' ssSG n-s msSG FSG pSG
+        'Error' ssR (n-s)*(t-1) msR  [] []
+        'Total' [] [] [] [] []
+        };
+    table{end, 2} = sum([table{2:end-1,2}]);
+    table{end, 3} = sum([table{2:end-1,3}]);
+
+    if (isequal(displayopt, 'on'))
+        digits = [-1 -1 0 -1 2 4];
+        statdisptable(table, 'multi-sample repeated measures ANOVA', 'ANOVA Table', '', digits);
+    end
+else
+    % case for only one sample
+    pT  = 1 - fcdf(FT, t-1, (n-s)*(t-1));
+    pSG = 1 - fcdf(FSG, n-s, (n-s)*(t-1));
+
+    p = [pT, pSG];
+
+    table = { 'Source' 'SS' 'df' 'MS' 'F' 'Prob>F'
+        'Time'  ssT t-1 msT FT pT
+        'Subjects (matching)' ssSG n-s msSG FSG pSG
+        'Error' ssR (n-s)*(t-1) msR  [] []
+        'Total' [] [] [] [] []
+        };
+    table{end, 2} = sum([table{2:end-1,2}]);
+    table{end, 3} = sum([table{2:end-1,3}]);
+
+    if (isequal(displayopt, 'on'))
+        digits = [-1 -1 0 -1 2 4];
+        statdisptable(table, 'repeated measures ANOVA', 'ANOVA Table', '', digits);
+    end
+end