From 2ec3d28d4271b5b756dc18e2ab952e9c1a60ca04 Mon Sep 17 00:00:00 2001 From: sepro Date: Wed, 9 Aug 2017 11:52:59 +0200 Subject: [PATCH] added script to merge matrices and updated docs --- docs/helper.md | 15 ++++++++++++++- helper/merge_matrix.py | 43 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 helper/merge_matrix.py diff --git a/docs/helper.md b/docs/helper.md index b5b0ff1..9142e08 100644 --- a/docs/helper.md +++ b/docs/helper.md @@ -105,4 +105,17 @@ required data is included here as well. Note that this script requires sklearn a python3 pca_powerlaw.py ./data/sbi.expression.matrix.tpm.txt ./data/sbi_annotation.txt ./data/sbi.power_law.R07.txt - +## Utilities + +### merge_matrix.py + +In case samples for one (!) species were processed in two or more batches, this script can be used to merge the +expression matrices. + +*Note that to obtain co-expression networks using the merged matrix LSTrAP needs to be run, using the merged expression +matrix, skipping all steps before the construction of co-expression.* + +*Only merge raw matrices with raw, tpm with tpm and rpkm with rpkm!* + + + python3 merge_matrix.py matrix_one.txt matrix_two.txt matrix_merged.txt \ No newline at end of file diff --git a/helper/merge_matrix.py b/helper/merge_matrix.py new file mode 100644 index 0000000..7d7f9dd --- /dev/null +++ b/helper/merge_matrix.py @@ -0,0 +1,43 @@ +import argparse +import sys + +import pandas as pd + + +def merge_matrix(first, second, output): + """ + This function will take two matrices and merge them + + :param first: first input matrix (path) + :param second: second input matrix (path) + :param output: output matrix (path) + """ + + df_first = pd.read_table(first, header=0, index_col=0) + df_second = pd.read_table(second, header=0, index_col=0) + + if df_first.shape[0] != df_second.shape[0]: + print("WARNING: attempting to merge two matrices with a different number of rows", file=sys.stderr) + + df_output = pd.concat([df_first, df_second], axis=1) + + if any([df_first.shape[0] != df_output.shape[0], + df_second.shape[0] != df_output.shape[0], + df_output.shape[1] != df_first.shape[1] + df_second.shape[1]]): + print("WARNING: output matrix has an unexpected shaped", file=sys.stderr) + + df_output.index.name = 'gene' + df_output.to_csv(output, sep='\t') + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(prog="./merge_matrix.py") + + parser.add_argument('first', help='first LSTrAP matrix to merge') + parser.add_argument('second', help='second LSTrAP matrix to merge') + parser.add_argument('output', help='path to output') + + args = parser.parse_args() + + merge_matrix(args.first, args.second, args.output)