Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
clc
clear variables;
close all;
% add helpers folder to path
addpath(genpath('helpers'));
% get records
fRead = fopen('../data/gene_counts/gene_counts_180117_only_mapped_with_header.txt','r');
% read first line (header)
header = fgetl(fRead);
header = strsplit(header);
header(1:2) = []; % remove Length from header, not necessary for output file
% read counts
txt = textscan(fRead,'%s %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n','delimiter','\t');
fclose(fRead);
symbols = txt{1}; % gene names
length = txt{2}; % gene lengths
counts = [txt{3:end}] + 1; % counts for each sample
% remove entries with rpkm values lower than 2
idx_threshold = calculate_rpkm(counts, length);
counts(idx_threshold,:) = [];
symbols(idx_threshold) = [];
% create genes background list
fileID = fopen('data/gene_ontology/background_list.txt','w');
formatSpec = '%s\n';
[nrows,ncols] = size(symbols);
for row = 1:nrows
fprintf(fileID,formatSpec,symbols{row,:});
end
fclose(fileID);
% create figures for each experiment time range (5min, 30min, 60min)
stats05 = BinFoldChangeTest(counts(:, [2,5]), counts(:, [1,4]), 100, false);
stats30 = BinFoldChangeTest(counts(:, [7,10]), counts(:, [6,9]), 100, false);
stats60 = BinFoldChangeTest(counts(:, [12,15]), counts(:, [11,14,16]), 100, false);
M = [stats05.M, stats30.M, stats60.M];
% get all gene expression combinations over time
% 'd' means significantly downregulated
% 'n' means no significant change in expression (normal)
% 'u' means significantly upregulated
% i.e.: u,n,n means gene upregulated in sample01, normal in sample02, normal in sample03
% http://stackoverflow.com/questions/4165859/generate-all-possible-combinations-of-the-elements-of-some-vectors-cartesian-pr#4169488
x = ['d', 'n', 'u']; % Set of possible gene expression states
K = 3; % Length of each permutation
s = struct('d', 'idx_x', 'n', 'idx_none', 'u', 'idx_y'); % reference of each state to corresponding index from fold change stats
% Create all possible permutations (with repetition) of states stored in x
C = cell(K, 1); % Preallocate a cell array
[C{:}] = ndgrid(x); % Create K grids of values
y = cellfun(@(x){x(:)}, C); % Convert grids to column vectors
y = [y{:}]; % Obtain all permutations
[nrows,ncols] = size(y);
exp = [];
exp_header = {};
for row = 1:nrows
exp_idx = (stats05.(s.(y(row,1))) & stats30.(s.(y(row,2))) & stats60.(s.(y(row,3))));
exp_geneIds = symbols(exp_idx, :);
combination_label = y(row,:);
% write gene IDs to file
fileName = sprintf('data/gene_ontology/%s.txt', combination_label);
fileHandler = fopen(fileName,'w');
formatSpec = '%s\n';
fprintf(fileHandler,formatSpec,exp_geneIds{:});
fclose(fileHandler);
% append expression combination label to header
exp_header{end+1} = combination_label;
% write index for all genes
exp = [exp exp_idx];
end
% create table with row and column labels
exp_table = array2table(exp, 'RowNames', symbols, 'VariableNames', exp_header);
% remove helpers folder from path
rmpath('helpers');