Skip to content

sorting scripts depending on their function #28

Merged
merged 3 commits into from
Jan 3, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
fd500a8b924f0f3ddc5391baf259b429 homo_sapiens.GRCh38.motiffeatures.20161111.gff.gz
b42e919ff359bd873c7e5eea14b49776 homo_sapiens.GRCh38.Regulatory_Build.regulatory_features.20161111.gff.gz
dbd442cdd993ca44cbbf39be620dfa23 README
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
10483 8961 homo_sapiens.GRCh38.A549.Regulatory_Build.regulatory_activity.20161111.gff.gz
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
37136 8883 homo_sapiens.GRCh38.Aorta.Regulatory_Build.regulatory_activity.20161111.gff.gz
Binary file not shown.
Binary file not shown.
5,756 changes: 5,756 additions & 0 deletions bin/3.1_create_gtf/data/UCSCData/hg38.bed

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions masterenv.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

name: masterenv
dependencies:
- python >=3
- r-seqinr
- numpy
- pybigWig
Expand Down
48 changes: 24 additions & 24 deletions pipeline.nf
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,8 @@ process footprint_extraction {
conda "${path_env}"

tag{name}
publishDir "${params.out}/footprint_extraction/", mode: 'copy', pattern: '*.bed'
publishDir "${params.out}/footprint_extraction/log", mode: 'copy', pattern: '*.log'
publishDir "${params.out}/1.1_footprint_extraction/", mode: 'copy', pattern: '*.bed'
publishDir "${params.out}/1.1_footprint_extraction/log", mode: 'copy', pattern: '*.log'

input:
set name, file (bigWig), file (bed) from footprint_in
Expand All @@ -203,7 +203,7 @@ process footprint_extraction {

script:
"""
python ${path_bin}/footprints_extraction.py --bigwig ${bigWig} --bed ${bed} --output_file ${name}_called_peaks.bed --window_length ${params.window_length} --step ${params.step} --percentage ${params.percentage}
python ${path_bin}/1.1_footprint_extraction/footprints_extraction.py --bigwig ${bigWig} --bed ${bed} --output_file ${name}_called_peaks.bed --window_length ${params.window_length} --step ${params.step} --percentage ${params.percentage}
"""
}

Expand All @@ -216,7 +216,7 @@ process extract_known_TFBS {

conda "${path_env}"

publishDir "${params.out}/known_TFBS/", mode: 'copy', pattern: '*.bed'
publishDir "${params.out}/1.2_filter_motifs/TFBSscan/", mode: 'copy', pattern: '*.bed'

input:
set file (fasta), file (db), file (bed) from for_tfbs
Expand All @@ -229,7 +229,7 @@ process extract_known_TFBS {

script:
"""
python ${path_bin}/tfbsscan.py --use moods --core ${params.threads} -m ${db} -g ${fasta} -o ${params.create_known_tfbs_path} -b ${bed}
python ${path_bin}/1.2_filter_motifs/tfbsscan.py --use moods --core ${params.threads} -m ${db} -g ${fasta} -o ${params.create_known_tfbs_path} -b ${bed}
"""
}

Expand All @@ -252,7 +252,7 @@ if(params.tfbs_path == "") {
process overlap_with_known_TFBS {
conda "${path_env}"

publishDir "${params.out}/unknown_overlap/", mode :'copy'
publishDir "${params.out}/1.2_filter_motifs/compareBed/", mode :'copy'

input:
set name, file (bed_footprints), val (bed_motifs), file (fasta) from for_overlap
Expand All @@ -263,7 +263,7 @@ process overlap_with_known_TFBS {
script:
motif_list = bed_motifs.toString().replaceAll(/\s|\[|\]/,"")
"""
${path_bin}/compareBed.sh --data ${bed_footprints} --motifs ${motif_path} --fasta ${fasta} -o ${name}_unknown.bed -min ${params.min_size_fp} -max ${params.max_size_fp} -p ${path_bin}
${path_bin}/1.2_filter_motifs/compareBed.sh --data ${bed_footprints} --motifs ${motif_path} --fasta ${fasta} -o ${name}_unknown.bed -min ${params.min_size_fp} -max ${params.max_size_fp} -p ${path_bin}/1.2/filter_motifs
"""
}

Expand All @@ -274,7 +274,7 @@ Reduce each sequence to its most conserved region.
process reduce_sequence {
conda "${path_env}"
echo true
publishDir "${params.out}/cluster/reduced_bed/", mode: 'copy'
publishDir "${params.out}/2.1_clustering/reduced_bed/", mode: 'copy'

input:
set name, file (bed) from bed_for_reducing
Expand All @@ -284,7 +284,7 @@ process reduce_sequence {

script:
"""
Rscript ${path_bin}/reduce_sequence.R -i ${bed} -k ${params.kmer} -m ${params.aprox_motif_len} -o ${name}_reduced.bed -t ${params.threads} -f ${params.motif_occurence} -s ${params.min_seq_length}
Rscript ${path_bin}/2.1_clustering/reduce_sequence.R -i ${bed} -k ${params.kmer} -m ${params.aprox_motif_len} -o ${name}_reduced.bed -t ${params.threads} -f ${params.motif_occurence} -s ${params.min_seq_length}
"""
}

Expand All @@ -296,7 +296,7 @@ process clustering {
conda "${path_env}"
echo true

publishDir "${params.out}/cluster/", mode: 'copy', pattern: '*.bed'
publishDir "${params.out}/2.1_clustering/", mode: 'copy', pattern: '*.bed'

input:
set name, file (bed) from bed_for_clustering
Expand All @@ -306,7 +306,7 @@ process clustering {

script:
"""
Rscript ${path_bin}/cdhit_wrapper.R -i ${bed} -A ${params.sequence_coverage} -o ${name}_clusterd.bed -c ${params.identity} -G ${params.global} -M ${params.memory} -l ${params.throw_away_seq} -r ${params.strand} -T ${params.threads}
Rscript ${path_bin}/2.1_clustering/cdhit_wrapper.R -i ${bed} -A ${params.sequence_coverage} -o ${name}_clusterd.bed -c ${params.identity} -G ${params.global} -M ${params.memory} -l ${params.throw_away_seq} -r ${params.strand} -T ${params.threads}
"""
}

Expand All @@ -316,7 +316,7 @@ Converting BED-File to one FASTA-File per cluster
*/
process bed_to_clustered_fasta {
conda "${path_env}"
publishDir "${params.out}/esimated_motifs/clustered_motifs/clustered_fasta/", mode: 'copy'
publishDir "${params.out}/2.2_motif_estimation/fasta/", mode: 'copy'
tag{name}

input:
Expand All @@ -328,7 +328,7 @@ process bed_to_clustered_fasta {

script:
"""
Rscript ${path_bin}/bed_to_fasta.R -i ${bed} -p ${name} -m ${params.min_seq}
Rscript ${path_bin}/2.2_motif_estimation/bed_to_fasta.R -i ${bed} -p ${name} -m ${params.min_seq}
"""
}

Expand All @@ -345,7 +345,7 @@ Generating Motifs through alignment and scoring best local matches.
process glam2 {

tag{name}
publishDir "${params.out}/esimated_motifs/clustered_motifs/${name}/", mode: 'copy'
publishDir "${params.out}/2.2_motif_estimation/glam2/${name}/", mode: 'copy'

input:
set name, file (fasta) from fasta_for_glam2
Expand All @@ -368,7 +368,7 @@ The paths are sorted numerically depending on the cluster number.
*/
process merge_meme {

publishDir "${params.out}/esimated_motifs/merged_meme/", mode: 'copy'
publishDir "${params.out}/2.2_motif_estimation/cluster_motifs/merged_meme/", mode: 'copy'

input:
val (memelist) from meme_to_merge.toList()
Expand All @@ -395,7 +395,7 @@ Output table has the information which clusters are similar to each other.
*/
process find_similar_motifs {

publishDir "${params.out}/esimated_motifs/cluster_similarity/", mode: 'copy'
publishDir "${params.out}/2.2_motif_estimation/cluster_motifs/cluster_similarity/", mode: 'copy'
input:
file (merged_meme) from merged_meme

Expand All @@ -420,7 +420,7 @@ Merging FASTA-files of similar clusters
*/
process merge_fasta {
conda "${path_env}"
publishDir "${params.out}/esimated_motifs/merged_fasta/", mode: 'copy'
publishDir "${params.out}/2.2_motif_estimation/cluster_motifs/merged_fasta/", mode: 'copy'
echo true
input:
set file (motiv_sim), val (fasta_list) from files_for_merge_fasta
Expand All @@ -436,7 +436,7 @@ process merge_fasta {
fa_sorted = fasta_list.sort(false) { it.getBaseName().tokenize('_')[-1] as Integer }
fastalist = fa_sorted.toString().replaceAll(/\s|\[|\]/,"")
"""
Rscript ${path_bin}/merge_similar_clusters.R ${motiv_sim} ${fastalist} ${params.edge_weight}
Rscript ${path_bin}/2.2_motif_estimation/merge_similar_clusters.R ${motiv_sim} ${fastalist} ${params.edge_weight}
"""
}

Expand All @@ -446,7 +446,7 @@ motif_clustered_fasta_flat = motif_clustered_fasta_list.flatten()

process clustered_glam2 {

publishDir "${params.out}/final_esimated_motifs/${name}/", mode: 'copy'
publishDir "${params.out}/2.2_motif_estimation/cluster_motifs/glam2/${name}/", mode: 'copy'

input:
file (fasta) from motif_clustered_fasta_flat
Expand Down Expand Up @@ -482,7 +482,7 @@ Tomtom searches motifs in databases.
process tomtom {

tag{name}
publishDir "${params.out}/esimated_motifs/tomtom/", mode: 'copy'
publishDir "${params.out}/2.2_motif_estimation/tomtom/", mode: 'copy'

input:
set name, file (meme) from for_tomtom
Expand Down Expand Up @@ -529,7 +529,7 @@ process check_for_unknown_motifs {
process get_best_motif {
conda "${path_env}"

publishDir "${params.out}/esimated_motifs/unknown_motifs/", mode: 'copy'
publishDir "${params.out}/2.2_motif_estimation/best_unknown_motifs/", mode: 'copy'

input:
set name, file(meme), file(tsv) from meme_for_scan
Expand All @@ -539,7 +539,7 @@ process get_best_motif {

script:
"""
python ${path_bin}/get_best_motif.py ${meme} ${name}_best.meme ${params.best_motif}
python ${path_bin}/2.2_motif_estimation/get_best_motif.py ${meme} ${name}_best.meme ${params.best_motif}
"""
}

Expand Down Expand Up @@ -579,7 +579,7 @@ process cluster_quality {
process create_GTF {
conda "${path_env}"

publishDir "${params.out}/gtf/", mode: 'copy'
publishDir "${params.out}/3.1_create_gtf/", mode: 'copy'

output:
file ('*.gtf') into gtf
Expand All @@ -589,7 +589,7 @@ process create_GTF {

script:
"""
python ${path_bin}/RegGTFExtractor.py ${params.organism} --tissue ${params.tissues} --wd ${path_bin}
python ${path_bin}/3.1_create_gtf/RegGTFExtractor.py ${params.organism} --tissue ${params.tissues} --wd ${path_bin}/3.1_create_gtf/
"""
}

Expand Down