Moved analysis folder to different location in storage server and ref…

…lected it in the documentation.
MPIBR-Bioinformatics · Mar 7, 2017 · 3d54a5c · 3d54a5c
1 parent 89ea1af
commit 3d54a5c
Show file tree

Hide file tree

Showing 7 changed files with 75 additions and 22 deletions.
diff --git a/README.md b/README.md
@@ -23,7 +23,7 @@
 
 ## Organisation
 
-Files are located under:
+**Raw and preprocessed files are located under:**
 
 `/storage/schu/ProjectGroups/RNA/Data/RNASequencing/Novogene/NHHW162910`
 
@@ -34,10 +34,21 @@ Directory structure:
   - bams: contains the aligned reads as BAM Files
   - logs: contains the log files of each alignment run
 - gene_counts: contains the read counts for each gene in the reference.
-- analysis: contains this repository (all the scripts used for analysis) as well as generated data, external programs, etc.
+
+**Analysis files (means, after gene counting) are located under:**
+
+`/storage/schu/ProjectGroups/RNA/Projects/LTP_transcriptomics/analysis`
+
+The folder `analysis` contains this repository (all the scripts used for analysis) as well as generated data, external programs, etc.
+
+The parent folder `LTP_transcriptomics` contains also a symlink `data` to the raw and preprocessed files mentioned above.
+
 
 ## Preprocessing
 
+Preprocessing steps are run under the folder:
+`/storage/schu/ProjectGroups/RNA/Data/RNASequencing/Novogene/NHHW162910`
+
 ### 1. MD5 checksum
 
 ```bash
@@ -156,6 +167,9 @@ paste -d '\t' <(echo "GeneId") <(echo "Length") <(head -1 gene_counts_180117.txt
 
 ## Analysis
 
+Analysis steps are run under the folder:
+`/storage/schu/ProjectGroups/RNA/Projects/LTP_transcriptomics/analysis`
+
 ### 1. Normalization
 
 Removed entries with RPKM values lower than 2 and applied deSeq normalisation:
@@ -190,6 +204,12 @@ analysis_expression_development.m
 
 Run gene ontology annotation for each condition/combination file generated in the previous step. We use the perl script [GOAnalysis](https://software.scic.corp.brain.mpg.de/projects/MPIBR-Bioinformatics/GOAnalysis) by Georgi Tushev.
 
+First change to the corresponding analsyis' data folder:
+```bash
+cd data/gene_ontology
+```
+
+The run the annotation:
 ```bash
 for file in target_genes_lists/*.txt; do file_name=$(basename "$file"); file_name="${file_name%.*}"; perl ../../bin/GOAnalysis/GOAnalysis.pl -obo annotations/go-basic.obo -ann annotations/gene_association.rgd.gz -background background_list.txt -target $file > "go_"$file_name".txt"; done
 ```

diff --git a/analysis_binomial_test.m b/analysis_binomial_test.m
@@ -2,8 +2,11 @@
 clear variables;
 close all;
 
+% add helpers folder to path
+addpath(genpath('helpers'));
+
 % get records
-fRead = fopen('../gene_counts/gene_counts_180117_only_mapped_with_header.txt','r');
+fRead = fopen('../data/gene_counts/gene_counts_180117_only_mapped_with_header.txt','r');
 % read first line (header)
 header = fgetl(fRead);
 header = strsplit(header);
@@ -25,3 +28,6 @@
 stats05 = NBinFitTest(counts(:, [2,5]), counts(:, [1,4]), 'Constant', true);
 stats30 = NBinFitTest(counts(:, [7,10]), counts(:, [6,9]), 'Constant', true);
 stats60 = NBinFitTest(counts(:, [12,15]), counts(:, [11,14,16]), 'Constant', true);
+
+% remove helpers folder from path
+rmpath('helpers');
diff --git a/analysis_clustergram_foldchange.m b/analysis_clustergram_foldchange.m
@@ -2,8 +2,11 @@
 clear variables;
 close all;
 
+% add helpers folder to path
+addpath(genpath('helpers'));
+
 % get records
-fRead = fopen('../gene_counts/gene_counts_180117_only_mapped_with_header.txt','r');
+fRead = fopen('../data/gene_counts/gene_counts_180117_only_mapped_with_header.txt','r');
 % read first line (header)
 header = fgetl(fRead);
 header = strsplit(header);
@@ -36,14 +39,5 @@
 
 clustergram(R,'Standardize', 2,'cluster',1, 'ColumnLabels', {'05 min', '30 min', '60 min'});
 
-%{
-mtx = calculate_deSeq(counts);
-mtx = mtx(idx_common,:);
-idx_remove = [3,8,13];
-mtx(:,idx_remove) = [];
-header(idx_remove) = [];
-
-% disp(symbols(stats05.idx_y));
-R = tiedrank(mtx)./size(mtx,1);
-clustergram(R,'Standardize',2,'cluster',1)
-%}
+% remove helpers folder from path
+rmpath('helpers');
diff --git a/analysis_expression_development.m b/analysis_expression_development.m
@@ -2,8 +2,11 @@
 clear variables;
 close all;
 
+% add helpers folder to path
+addpath(genpath('helpers'));
+
 % get records
-fRead = fopen('../gene_counts/gene_counts_180117_only_mapped_with_header.txt','r');
+fRead = fopen('../data/gene_counts/gene_counts_180117_only_mapped_with_header.txt','r');
 % read first line (header)
 header = fgetl(fRead);
 header = strsplit(header);
@@ -75,4 +78,5 @@
 % create table with row and column labels
 exp_table = array2table(exp, 'RowNames', symbols, 'VariableNames', exp_header);
 
-
+% remove helpers folder from path
+rmpath('helpers');
diff --git a/analysis_foldchange.m b/analysis_foldchange.m
@@ -2,8 +2,11 @@
 clear variables;
 close all;
 
+% add helpers folder to path
+addpath(genpath('helpers'));
+
 % get records
-fRead = fopen('../gene_counts/gene_counts_180117_only_mapped_with_header.txt','r');
+fRead = fopen('../data/gene_counts/gene_counts_180117_only_mapped_with_header.txt','r');
 % read first line (header)
 header = fgetl(fRead);
 header = strsplit(header);
@@ -21,7 +24,22 @@
 counts(idx_threshold,:) = [];
 symbols(idx_threshold) = [];
 
+counts = calculate_deSeq(counts);
+
 % create figures for each experiment time range (5min, 30min, 60min)
-stats05 = BinFoldChangeTest(counts(:, [2,5]), counts(:, [1,4]), 100, true);
-stats30 = BinFoldChangeTest(counts(:, [7,10]), counts(:, [6,9]), 100, true);
-stats60 = BinFoldChangeTest(counts(:, [12,15]), counts(:, [11,14,16]), 100, true);
+%stats05 = BinFoldChangeTest(counts(:, [2,5]), counts(:, [1,4]), 100, true);
+%stats30 = BinFoldChangeTest(counts(:, [7,10]), counts(:, [6,9]), 100, true);
+%stats60 = BinFoldChangeTest(counts(:, [12,15]), counts(:, [11,14,16]), 100, true);
+
+stats05C = BinFoldChangeTest(counts(:,2),counts(:,5),100,true);
+stats05T = BinFoldChangeTest(counts(:,5),counts(:,4),100,true);
+%stats60A = BinFoldChangeTest(counts(:,12),counts(:,11), 100, true);
+%stats60B = BinFoldChangeTest(counts(:,15),counts(:,14),100,true);
+idx = (stats05C.idx_x | stats05C.idx_y | stats05T.idx_x | stats05T.idx_y);
+
+stats05 = BinFoldChangeTest(counts(~idx, [2,5]), counts(~idx, [1,4]), 100, true);
+stats05A = BinFoldChangeTest(counts(~idx,2),counts(~idx,1), 100, true);
+stats05B = BinFoldChangeTest(counts(~idx,5),counts(~idx,4),100,true);
+
+% remove helpers folder from path
+rmpath('helpers');
diff --git a/analysis_gene_ontology.m b/analysis_gene_ontology.m
@@ -2,6 +2,9 @@
 clear variables;
 close all;
 
+% add helpers folder to path
+addpath(genpath('helpers'));
+
 % get all unique GO terms
 fRead = fopen('data/gene_ontology/all_go_terms.txt','r');
 ref_go_terms = textscan(fRead,'%s','delimiter','\t');
@@ -32,3 +35,6 @@
 go_table = array2table(log_matrix, 'RowNames', ref_go_terms, 'VariableNames', cellfun(@(s) strtok(s, '.'), {annot_files.name}, 'UniformOutput', false));
 
 clustergram(log_matrix,'Standardize', 2,'cluster',1, 'ColumnLabels', cellfun(@(s) strtok(s, '.'), {annot_files.name}, 'UniformOutput', false));
+
+% remove helpers folder from path
+rmpath('helpers');
diff --git a/analysis_rpkm_deSeq_normalization.m b/analysis_rpkm_deSeq_normalization.m
@@ -2,8 +2,11 @@
 clear variables;
 close all;
 
+% add helpers folder to path
+addpath(genpath('helpers'));
+
 % get records
-fRead = fopen('../gene_counts/gene_counts_180117_only_mapped_with_header.txt','r');
+fRead = fopen('../data/gene_counts/gene_counts_180117_only_mapped_with_header.txt','r');
 % read first line (header)
 header = fgetl(fRead);
 header = strsplit(header);
@@ -35,3 +38,5 @@
 header(1) = []; % remove gene ids column from header
 plotCorrelationMatrix(normalized_counts, header, [16,16], 'correlation_matrix_rpkmBigger2_and_deSeq');
 
+% remove helpers folder from path
+rmpath('helpers');