-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Updated README and uploaded all analysis scripts and figures.
- Loading branch information
1 parent
5ae4cd8
commit 69d9bb9
Showing
23 changed files
with
524 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,4 @@ | ||
.DS_Store | ||
*~ | ||
bin | ||
data |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
clc | ||
clear variables; | ||
close all; | ||
|
||
% get records | ||
fRead = fopen('../gene_counts/gene_counts_180117_only_mapped_with_header.txt','r'); | ||
% read first line (header) | ||
header = fgetl(fRead); | ||
header = strsplit(header); | ||
header(2) = []; % remove Length from header, not necessary for output file | ||
|
||
% read counts | ||
txt = textscan(fRead,'%s %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n','delimiter','\t'); | ||
fclose(fRead); | ||
symbols = txt{1}; % gene names | ||
length = txt{2}; % gene lengths | ||
counts = [txt{3:end}] + 1; % counts for each sample | ||
|
||
% remove entries with rpkm values lower than 2 | ||
idx_threshold = calculate_rpkm(counts, length); | ||
counts(idx_threshold,:) = []; | ||
symbols(idx_threshold) = []; | ||
|
||
% create figures for each experiment time range (5min, 30min, 60min) | ||
stats05 = NBinFitTest(counts(:, [2,5]), counts(:, [1,4]), 'Constant', true); | ||
stats30 = NBinFitTest(counts(:, [7,10]), counts(:, [6,9]), 'Constant', true); | ||
stats60 = NBinFitTest(counts(:, [12,15]), counts(:, [11,14,16]), 'Constant', true); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
clc | ||
clear variables; | ||
close all; | ||
|
||
% get records | ||
fRead = fopen('../gene_counts/gene_counts_180117_only_mapped_with_header.txt','r'); | ||
% read first line (header) | ||
header = fgetl(fRead); | ||
header = strsplit(header); | ||
header(1:2) = []; % remove Length from header, not necessary for output file | ||
|
||
% read counts | ||
txt = textscan(fRead,'%s %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n','delimiter','\t'); | ||
fclose(fRead); | ||
symbols = txt{1}; % gene names | ||
length = txt{2}; % gene lengths | ||
counts = [txt{3:end}] + 1; % counts for each sample | ||
|
||
% remove entries with rpkm values lower than 2 | ||
idx_threshold = calculate_rpkm(counts, length); | ||
counts(idx_threshold,:) = []; | ||
symbols(idx_threshold) = []; | ||
|
||
% create figures for each experiment time range (5min, 30min, 60min) | ||
stats05 = BinFoldChangeTest(counts(:, [2,5]), counts(:, [1,4]), 100, false); | ||
stats30 = BinFoldChangeTest(counts(:, [7,10]), counts(:, [6,9]), 100, false); | ||
stats60 = BinFoldChangeTest(counts(:, [12,15]), counts(:, [11,14,16]), 100, false); | ||
|
||
% idx_x contains genes meaninfully underexpressed | ||
% idx_y contains genes meaninfully overexpressed | ||
idx_common = stats05.idx_x | stats05.idx_y | stats30.idx_x | stats30.idx_y | stats60.idx_x | stats60.idx_y; | ||
M = [stats05.M, stats30.M, stats60.M]; | ||
M = M(idx_common,:); | ||
|
||
R = tiedrank(M)./size(M,1); | ||
|
||
clustergram(R,'Standardize', 2,'cluster',1, 'ColumnLabels', {'05 min', '30 min', '60 min'}); | ||
|
||
%{ | ||
mtx = calculate_deSeq(counts); | ||
mtx = mtx(idx_common,:); | ||
idx_remove = [3,8,13]; | ||
mtx(:,idx_remove) = []; | ||
header(idx_remove) = []; | ||
% disp(symbols(stats05.idx_y)); | ||
R = tiedrank(mtx)./size(mtx,1); | ||
clustergram(R,'Standardize',2,'cluster',1) | ||
%} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
clc | ||
clear variables; | ||
close all; | ||
|
||
% get records | ||
fRead = fopen('../gene_counts/gene_counts_180117_only_mapped_with_header.txt','r'); | ||
% read first line (header) | ||
header = fgetl(fRead); | ||
header = strsplit(header); | ||
header(1:2) = []; % remove Length from header, not necessary for output file | ||
|
||
% read counts | ||
txt = textscan(fRead,'%s %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n','delimiter','\t'); | ||
fclose(fRead); | ||
symbols = txt{1}; % gene names | ||
length = txt{2}; % gene lengths | ||
counts = [txt{3:end}] + 1; % counts for each sample | ||
|
||
% remove entries with rpkm values lower than 2 | ||
idx_threshold = calculate_rpkm(counts, length); | ||
counts(idx_threshold,:) = []; | ||
symbols(idx_threshold) = []; | ||
|
||
% create genes background list | ||
fileID = fopen('data/gene_ontology/background_list.txt','w'); | ||
formatSpec = '%s\n'; | ||
[nrows,ncols] = size(symbols); | ||
for row = 1:nrows | ||
fprintf(fileID,formatSpec,symbols{row,:}); | ||
end | ||
fclose(fileID); | ||
|
||
% create figures for each experiment time range (5min, 30min, 60min) | ||
stats05 = BinFoldChangeTest(counts(:, [2,5]), counts(:, [1,4]), 100, false); | ||
stats30 = BinFoldChangeTest(counts(:, [7,10]), counts(:, [6,9]), 100, false); | ||
stats60 = BinFoldChangeTest(counts(:, [12,15]), counts(:, [11,14,16]), 100, false); | ||
|
||
M = [stats05.M, stats30.M, stats60.M]; | ||
|
||
% get all gene expression combinations over time | ||
% 'd' means significantly downregulated | ||
% 'n' means no significant change in expression (normal) | ||
% 'u' means significantly upregulated | ||
% i.e.: u,n,n means gene upregulated in sample01, normal in sample02, normal in sample03 | ||
|
||
% http://stackoverflow.com/questions/4165859/generate-all-possible-combinations-of-the-elements-of-some-vectors-cartesian-pr#4169488 | ||
x = ['d', 'n', 'u']; % Set of possible gene expression states | ||
K = 3; % Length of each permutation | ||
s = struct('d', 'idx_x', 'n', 'idx_none', 'u', 'idx_y'); % reference of each state to corresponding index from fold change stats | ||
|
||
% Create all possible permutations (with repetition) of states stored in x | ||
C = cell(K, 1); % Preallocate a cell array | ||
[C{:}] = ndgrid(x); % Create K grids of values | ||
y = cellfun(@(x){x(:)}, C); % Convert grids to column vectors | ||
y = [y{:}]; % Obtain all permutations | ||
|
||
[nrows,ncols] = size(y); | ||
exp = []; | ||
exp_header = {}; | ||
for row = 1:nrows | ||
exp_idx = (stats05.(s.(y(row,1))) & stats30.(s.(y(row,2))) & stats60.(s.(y(row,3)))); | ||
exp_geneIds = symbols(exp_idx, :); | ||
combination_label = y(row,:); | ||
% write gene IDs to file | ||
fileName = sprintf('data/gene_ontology/%s.txt', combination_label); | ||
fileHandler = fopen(fileName,'w'); | ||
formatSpec = '%s\n'; | ||
fprintf(fileHandler,formatSpec,exp_geneIds{:}); | ||
fclose(fileHandler); | ||
% append expression combination label to header | ||
exp_header{end+1} = combination_label; | ||
% write index for all genes | ||
exp = [exp exp_idx]; | ||
end | ||
% create table with row and column labels | ||
exp_table = array2table(exp, 'RowNames', symbols, 'VariableNames', exp_header); | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
clc | ||
clear variables; | ||
close all; | ||
|
||
% get records | ||
fRead = fopen('../gene_counts/gene_counts_180117_only_mapped_with_header.txt','r'); | ||
% read first line (header) | ||
header = fgetl(fRead); | ||
header = strsplit(header); | ||
header(1:2) = []; % remove Length from header, not necessary for output file | ||
|
||
% read counts | ||
txt = textscan(fRead,'%s %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n','delimiter','\t'); | ||
fclose(fRead); | ||
symbols = txt{1}; % gene names | ||
length = txt{2}; % gene lengths | ||
counts = [txt{3:end}] + 1; % counts for each sample | ||
|
||
% remove entries with rpkm values lower than 2 | ||
idx_threshold = calculate_rpkm(counts, length); | ||
counts(idx_threshold,:) = []; | ||
symbols(idx_threshold) = []; | ||
|
||
% create figures for each experiment time range (5min, 30min, 60min) | ||
stats05 = BinFoldChangeTest(counts(:, [2,5]), counts(:, [1,4]), 100, true); | ||
stats30 = BinFoldChangeTest(counts(:, [7,10]), counts(:, [6,9]), 100, true); | ||
stats60 = BinFoldChangeTest(counts(:, [12,15]), counts(:, [11,14,16]), 100, true); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
clc | ||
clear variables; | ||
close all; | ||
|
||
% get all unique GO terms | ||
fRead = fopen('data/gene_ontology/all_go_terms.txt','r'); | ||
ref_go_terms = textscan(fRead,'%s','delimiter','\t'); | ||
ref_go_terms = ref_go_terms{1}; | ||
fclose(fRead); | ||
|
||
% get go annotation files | ||
annot_files = dir('data/gene_ontology/go_*.txt'); | ||
|
||
go_matrix = zeros(size(ref_go_terms, 1), size(annot_files, 1)); | ||
for i = 1:size(annot_files, 1) | ||
file = annot_files(i); | ||
fRead = fopen(sprintf('data/gene_ontology/%s',file.name),'r'); | ||
content = textscan(fRead,'%s %s %s %n %n %n %n %n','delimiter','\t'); | ||
qry_go_terms = content{1}; | ||
p_values = content{end}; | ||
fclose(fRead); | ||
|
||
[~, idx_ref, idx_qry] = intersect(ref_go_terms, qry_go_terms); | ||
|
||
go_matrix(idx_ref, i) = p_values(idx_qry); | ||
|
||
end | ||
|
||
go_table = array2table(go_matrix, 'RowNames', ref_go_terms, 'VariableNames', cellfun(@(s) strtok(s, '.') ,{annot_files.name}, 'UniformOutput', false)); | ||
|
||
clustergram(go_matrix,'Standardize', 2,'cluster',1, 'ColumnLabels', cellfun(@(s) strtok(s, '.') ,{annot_files.name}, 'UniformOutput', false)); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
clc | ||
clear variables; | ||
close all; | ||
|
||
% get records | ||
fRead = fopen('../gene_counts/gene_counts_180117_only_mapped_with_header.txt','r'); | ||
% read first line (header) | ||
header = fgetl(fRead); | ||
header = strsplit(header); | ||
header(2) = []; % remove Length from header, not necessary for output file | ||
|
||
% read counts | ||
txt = textscan(fRead,'%s %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n','delimiter','\t'); | ||
fclose(fRead); | ||
symbols = txt{1}; % gene names | ||
length = txt{2}; % gene lengths | ||
counts = [txt{3:end}]; % counts for each sample | ||
|
||
% remove entries with rpkm values lower than 2 | ||
idx_threshold = calculate_rpkm(counts, length); | ||
counts(idx_threshold,:) = []; | ||
symbols(idx_threshold) = []; | ||
|
||
% apply deSeq normalization to counts | ||
normalized_counts = calculate_deSeq(counts); | ||
|
||
% get normalized list of gene counts | ||
normalized_cell = [symbols num2cell(normalized_counts)]; | ||
% print list to text file | ||
normalized_table = cell2table(normalized_cell, 'VariableNames', header); | ||
% create file | ||
writetable(normalized_table, 'rpkm_and_deSeq_normalized_data.txt', 'Delimiter','\t'); | ||
|
||
% plot correlation matrix | ||
header(1) = []; % remove gene ids column from header | ||
plotCorrelationMatrix(normalized_counts, header, [16,16], 'correlation_matrix_rpkmBigger2_and_deSeq'); | ||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
function [stats] = BinFoldChangeTest(X,Y,nbins,draw) | ||
|
||
nrm = calculate_deSeq([X,Y]); | ||
Xnrm = nrm(:,1:size(X,2)); | ||
Ynrm = nrm(:,(size(X,2)+1):end); | ||
A = log10(mean(nrm, 2)); | ||
M = log2(mean(Ynrm,2)) - log2(mean(Xnrm,2)); | ||
|
||
% create bins | ||
idx_bin = zeros(size(A,1),1); | ||
idx_bin(1:nbins:end) = 1; | ||
idx_bin = cumsum(idx_bin); | ||
|
||
% resort | ||
[~, idx_sort] = sort(A,'descend'); | ||
idx_rev(idx_sort) = (1:size(idx_sort,1))'; | ||
idx_res = false(size(A,1),1); | ||
Z = zeros(size(A,1),1); | ||
for k = 1 : max(idx_bin) | ||
idx_now = idx_bin == k; | ||
M_now = M(idx_now(idx_rev)); | ||
Z_now = zscore(M_now); | ||
idx_good = abs(Z_now) >= 1.65; | ||
idx_res(idx_now(idx_rev)) = idx_good; | ||
Z(idx_now(idx_rev)) = Z_now; | ||
end | ||
|
||
idx_x = idx_res & (M < median(M)) & (A >= 1); | ||
idx_y = idx_res & (M >= median(M)) & (A >= 1); | ||
idx_none = ~(idx_x | idx_y); | ||
|
||
if draw | ||
|
||
figure('Color','w'); | ||
hold(gca,'on'); | ||
plot(A(idx_none),M(idx_none), '.', 'color', [.65,.65,.65]); | ||
plot(A(idx_x), M(idx_x), '.', 'color', [30,144,255]./255); | ||
plot(A(idx_y), M(idx_y), '.', 'color', [148,0,211]./255); | ||
plot([min(A),max(A)],[median(M),median(M)],'k'); | ||
hold(gca,'off'); | ||
|
||
set(gca, 'box','off',... | ||
'xlim',[min(A), max(A)],... | ||
'ylim',[-max(abs(M)),max(abs(M))]); | ||
|
||
end | ||
|
||
% pack in structure | ||
stats.nrm = nrm; | ||
stats.A = A; | ||
stats.M = M; | ||
stats.Z = Z; | ||
stats.idx_x = idx_x; | ||
stats.idx_y = idx_y; | ||
stats.idx_none = idx_none; | ||
|
||
end |
Oops, something went wrong.