Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
LTP-transcriptomics/analysis_expression_development.m
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
82 lines (70 sloc)
2.96 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
clc | |
clear variables; | |
close all; | |
% add helpers folder to path | |
addpath(genpath('helpers')); | |
% get records | |
fRead = fopen('../data/gene_counts/gene_counts_180117_only_mapped_with_header.txt','r'); | |
% read first line (header) | |
header = fgetl(fRead); | |
header = strsplit(header); | |
header(1:2) = []; % remove Length from header, not necessary for output file | |
% read counts | |
txt = textscan(fRead,'%s %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n %n','delimiter','\t'); | |
fclose(fRead); | |
symbols = txt{1}; % gene names | |
length = txt{2}; % gene lengths | |
counts = [txt{3:end}] + 1; % counts for each sample | |
% remove entries with rpkm values lower than 2 | |
idx_threshold = calculate_rpkm(counts, length); | |
counts(idx_threshold,:) = []; | |
symbols(idx_threshold) = []; | |
% create genes background list | |
fileID = fopen('data/gene_ontology/background_list.txt','w'); | |
formatSpec = '%s\n'; | |
[nrows,ncols] = size(symbols); | |
for row = 1:nrows | |
fprintf(fileID,formatSpec,symbols{row,:}); | |
end | |
fclose(fileID); | |
% create figures for each experiment time range (5min, 30min, 60min) | |
stats05 = BinFoldChangeTest(counts(:, [2,5]), counts(:, [1,4]), 100, false); | |
stats30 = BinFoldChangeTest(counts(:, [7,10]), counts(:, [6,9]), 100, false); | |
stats60 = BinFoldChangeTest(counts(:, [12,15]), counts(:, [11,14,16]), 100, false); | |
M = [stats05.M, stats30.M, stats60.M]; | |
% get all gene expression combinations over time | |
% 'd' means significantly downregulated | |
% 'n' means no significant change in expression (normal) | |
% 'u' means significantly upregulated | |
% i.e.: u,n,n means gene upregulated in sample01, normal in sample02, normal in sample03 | |
% http://stackoverflow.com/questions/4165859/generate-all-possible-combinations-of-the-elements-of-some-vectors-cartesian-pr#4169488 | |
x = ['d', 'n', 'u']; % Set of possible gene expression states | |
K = 3; % Length of each permutation | |
s = struct('d', 'idx_x', 'n', 'idx_none', 'u', 'idx_y'); % reference of each state to corresponding index from fold change stats | |
% Create all possible permutations (with repetition) of states stored in x | |
C = cell(K, 1); % Preallocate a cell array | |
[C{:}] = ndgrid(x); % Create K grids of values | |
y = cellfun(@(x){x(:)}, C); % Convert grids to column vectors | |
y = [y{:}]; % Obtain all permutations | |
[nrows,ncols] = size(y); | |
exp = []; | |
exp_header = {}; | |
for row = 1:nrows | |
exp_idx = (stats05.(s.(y(row,1))) & stats30.(s.(y(row,2))) & stats60.(s.(y(row,3)))); | |
exp_geneIds = symbols(exp_idx, :); | |
combination_label = y(row,:); | |
% write gene IDs to file | |
fileName = sprintf('data/gene_ontology/%s.txt', combination_label); | |
fileHandler = fopen(fileName,'w'); | |
formatSpec = '%s\n'; | |
fprintf(fileHandler,formatSpec,exp_geneIds{:}); | |
fclose(fileHandler); | |
% append expression combination label to header | |
exp_header{end+1} = combination_label; | |
% write index for all genes | |
exp = [exp exp_idx]; | |
end | |
% create table with row and column labels | |
exp_table = array2table(exp, 'RowNames', symbols, 'VariableNames', exp_header); | |
% remove helpers folder from path | |
rmpath('helpers'); |