From b348eb782319ee846ee4339b937646d7295357b5 Mon Sep 17 00:00:00 2001 From: msbentsen Date: Tue, 28 Jan 2020 12:42:50 +0100 Subject: [PATCH] Split conda environments to prevent solving issues --- environments/tobias.yaml | 49 ++++++++++++++++++++-------------- environments/tools.yaml | 4 +++ environments/uropa.yaml | 7 +++++ environments/wilson.yaml | 29 ++++++++++++++++++++ snakefiles/preprocessing.snake | 29 +++++++++++++------- snakefiles/visualization.snake | 5 +++- snakefiles/wilson.snake | 6 +++++ 7 files changed, 99 insertions(+), 30 deletions(-) create mode 100644 environments/tools.yaml create mode 100644 environments/uropa.yaml create mode 100644 environments/wilson.yaml diff --git a/environments/tobias.yaml b/environments/tobias.yaml index 15c7494..2a3912b 100644 --- a/environments/tobias.yaml +++ b/environments/tobias.yaml @@ -3,25 +3,34 @@ name: TOBIAS_ENV channels: - bioconda - conda-forge + - defaults dependencies: - - python=3 - - snakemake>=5.4 - - uropa=3 - - samtools - - moods - - unzip - - r-devtools - - r-knitr - - r-lme4 - - r-htmltable - - tobias>=0.9 - - r-hmisc - - r-pbkrtest - - r-openxlsx - - r-rio - - r-car - - bioconductor-deseq2 - - intervene - - igvtools - - icu + - python=3.6.7 + - bioconda::snakemake>=5.4 + - unzip=6.0 + - igvtools=2.5.3 + - openssl=1.0.2 + - pysam=0.15.3 + - gimmemotifs=0.13.1 + - samtools=1.5 + - sklearn-contrib-lightning=0.5.0 + - scikit-learn=0.22.1 + - scipy=1.2.1 + - icu=64.2 + - ncurses=6.1 + - numpy=1.17.5 + - boto3=1.11.9 + - setuptools=45.1.0 + - botocore=1.14.9 + - pandas=0.25.3 + - pip=20.0.2 + - libpng=1.6.37 + - pip: + - adjusttext==0.7.3 + - moods-python==1.9.4.1 + - pybigwig==0.3.17 + - tobias>=0.9 + - pypdf2==1.26.0 + - wand==0.5.8 + - xgboost==0.90 diff --git a/environments/tools.yaml b/environments/tools.yaml new file mode 100644 index 0000000..e4b447c --- /dev/null +++ b/environments/tools.yaml @@ -0,0 +1,4 @@ +name: tools_env + +dependencies: + - intervene \ No newline at end of file diff --git a/environments/uropa.yaml b/environments/uropa.yaml new file mode 100644 index 0000000..96c87f3 --- /dev/null +++ b/environments/uropa.yaml @@ -0,0 +1,7 @@ +name: uropa_env + +channels: + - bioconda + +dependencies: + - uropa diff --git a/environments/wilson.yaml b/environments/wilson.yaml new file mode 100644 index 0000000..a1e8787 --- /dev/null +++ b/environments/wilson.yaml @@ -0,0 +1,29 @@ +name: wilson_env + +dependencies: + - r-devtools + - r-data.table==1.12.8 + - r-plotly + - r-shiny + - r-shinydashboard + - r-shinythemes + - r-htmltools + - bioconductor-deseq2 + - r-ggrepel + - r-biocmanager + - r-colourpicker + - bioconductor-iranges + - bioconductor-genefilter + - r-markdown + - r-viridis + - r-tidyr + - r-reshape + - r-log4r + - r-shape + - r-ggpubr + - r-catools + - r-factominer + - r-heatmaply==1.0.0 + - r-car + - r-rjson + - r-circlize==0.4.8 diff --git a/snakefiles/preprocessing.snake b/snakefiles/preprocessing.snake index 8284c02..ab9dda8 100644 --- a/snakefiles/preprocessing.snake +++ b/snakefiles/preprocessing.snake @@ -25,7 +25,8 @@ rule copy_flatfiles: rule write_config: output: os.path.join(OUTPUTDIR, "config.yaml") - priority: 100 + priority: + 100 run: import yaml with open(output[0], 'w') as yaml_file: @@ -43,8 +44,10 @@ rule conditionbam: output: bam = os.path.join(OUTPUTDIR, "mapping", "{condition}.bam"), bai = os.path.join(OUTPUTDIR, "mapping", "{condition}.bam.bai") - threads: 99 - message: "Joining individual bamfiles from condition {wildcards.condition}" + threads: + 99 + message: + "Joining individual bamfiles from condition {wildcards.condition}" run: if len(input) > 1: shell("samtools merge -@ {threads} {output.bam} {input}") @@ -69,9 +72,12 @@ rule macs: output: macs = os.path.join(OUTPUTDIR, "peak_calling", "{condition}", "{sample_id}_peaks.broadPeak"), raw = os.path.join(OUTPUTDIR, "peak_calling", "{condition}", "{sample_id}_raw.bed") - log: os.path.join(OUTPUTDIR, "logs", "{condition}_{sample_id}_peak_calling.log") - message: "Running macs2 with .bam-file: {input}" - conda: os.path.join(environments_dir, "macs.yaml") + log: + os.path.join(OUTPUTDIR, "logs", "{condition}_{sample_id}_peak_calling.log") + message: + "Running macs2 with .bam-file: {input}" + conda: + os.path.join(environments_dir, "macs.yaml") params: "--name {sample_id}", "--outdir " + os.path.join(OUTPUTDIR, "peak_calling", "{condition}"), @@ -107,7 +113,8 @@ rule merge_condition_peaks: [os.path.join(OUTPUTDIR, "peak_calling", condition + "_union.bed") for condition in CONDITION_IDS] output: temp(os.path.join(OUTPUTDIR, "peak_calling", "all_merged.tmp")) - message: "Merging peaks across conditions" + message: + "Merging peaks across conditions" shell: "cat {input} | sort -k1,1 -k2,2n | bedtools merge -d 5 -c 4 -o distinct > {output}" @@ -170,10 +177,14 @@ rule uropa: finalhits_sub = os.path.join(OUTPUTDIR, "peak_annotation", "all_merged_annotated_finalhits_sub.txt"), peaks = os.path.join(OUTPUTDIR, "peak_annotation", "all_merged_annotated.bed"), header = os.path.join(OUTPUTDIR, "peak_annotation", "all_merged_annotated_header.txt"), - threads: 99 - log: os.path.join(OUTPUTDIR, "logs", "uropa.log") + threads: + 99 + log: + os.path.join(OUTPUTDIR, "logs", "uropa.log") params: prefix = os.path.join(OUTPUTDIR, "peak_annotation", "all_merged_annotated") + conda: + os.path.join(environments_dir, "uropa.yaml") shell: "uropa --input {input.config} --prefix {params.prefix} --threads {threads} --log {log}; " "cut -f 1-4,7-13,16-19 {output.finalhits} > {output.finalhits_sub}; " #Get a subset of columns diff --git a/snakefiles/visualization.snake b/snakefiles/visualization.snake index 8744efb..10ba50a 100644 --- a/snakefiles/visualization.snake +++ b/snakefiles/visualization.snake @@ -48,6 +48,8 @@ rule plot_venn: "--output " + os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots"), "--names " + ",".join(["'{0} bound sites'".format(condition) for condition in CONDITION_IDS]), "--title " + "'Overlap of sites for TF: {TF}'" + conda: + "../environments/tools.yaml" shell: "intervene venn --input {input} {params} > /dev/null ; " "cp {output.raw} {output.final}" @@ -105,7 +107,8 @@ rule join_pdfs: lambda wildcards: expand(os.path.join(OUTPUTDIR, "TFBS", "{TF}", "plots", "{TF}_{{plotname}}.pdf"), TF=get_TF_ids(wildcards)) output: os.path.join(OUTPUTDIR, "overview", "all_{plotname}.pdf") - message: "Joining {wildcards.plotname} plots from all TFs" + message: + "Joining {wildcards.plotname} plots from all TFs" shell: "TOBIAS MergePDF --input {input} --output {output} >/dev/null" diff --git a/snakefiles/wilson.snake b/snakefiles/wilson.snake index e6cf16a..f0fbb6e 100644 --- a/snakefiles/wilson.snake +++ b/snakefiles/wilson.snake @@ -14,6 +14,8 @@ rule wilson_app: params: wilsondir = os.path.join(OUTPUTDIR, "wilson"), scripts = scripts_dir + conda: + "../environments/wilson.yaml" shell: "mkdir -p {params.wilsondir}; " "unzip -o {input} -d {params.wilsondir}; " @@ -39,6 +41,8 @@ rule convert_individual_overview: app = os.path.join(OUTPUTDIR, "wilson", "app.R") params: scripts = scripts_dir + conda: + "../environments/wilson.yaml" output: os.path.join(OUTPUTDIR, "wilson", "data", "{TF}_overview.clarion") shell: @@ -55,6 +59,8 @@ rule convert_global_overview: params: overview = os.path.join(OUTPUTDIR, "overview", "*_results.txt"), scripts = scripts_dir + conda: + "../environments/wilson.yaml" shell: "Rscript {params.scripts}/convert_data.R --input {params.overview} --output {output} --condition_names {input.condition_names} &> /dev/null "