Skip to content

Commit

Permalink
Merge pull request #28 from loosolab/estimation_motifs
Browse files Browse the repository at this point in the history
 sorting scripts depending on their function
renewiegandt committed Jan 3, 2019
2 parents 1c392da + ab6f883 commit 8993670
Showing 59 changed files with 5,786 additions and 24 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File renamed without changes.
File renamed without changes.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File renamed without changes.
Binary file not shown.
Binary file not shown.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
fd500a8b924f0f3ddc5391baf259b429 homo_sapiens.GRCh38.motiffeatures.20161111.gff.gz
b42e919ff359bd873c7e5eea14b49776 homo_sapiens.GRCh38.Regulatory_Build.regulatory_features.20161111.gff.gz
dbd442cdd993ca44cbbf39be620dfa23 README
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
10483 8961 homo_sapiens.GRCh38.A549.Regulatory_Build.regulatory_activity.20161111.gff.gz
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
37136 8883 homo_sapiens.GRCh38.Aorta.Regulatory_Build.regulatory_activity.20161111.gff.gz
Binary file not shown.
Binary file not shown.
5,756 changes: 5,756 additions & 0 deletions bin/3.1_create_gtf/data/UCSCData/hg38.bed

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions masterenv.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

name: masterenv
dependencies:
- python >=3
- r-seqinr
- numpy
- pybigWig
48 changes: 24 additions & 24 deletions pipeline.nf
Original file line number Diff line number Diff line change
@@ -192,8 +192,8 @@ process footprint_extraction {
conda "${path_env}"

tag{name}
publishDir "${params.out}/footprint_extraction/", mode: 'copy', pattern: '*.bed'
publishDir "${params.out}/footprint_extraction/log", mode: 'copy', pattern: '*.log'
publishDir "${params.out}/1.1_footprint_extraction/", mode: 'copy', pattern: '*.bed'
publishDir "${params.out}/1.1_footprint_extraction/log", mode: 'copy', pattern: '*.log'

input:
set name, file (bigWig), file (bed) from footprint_in
@@ -203,7 +203,7 @@ process footprint_extraction {

script:
"""
python ${path_bin}/footprints_extraction.py --bigwig ${bigWig} --bed ${bed} --output_file ${name}_called_peaks.bed --window_length ${params.window_length} --step ${params.step} --percentage ${params.percentage}
python ${path_bin}/1.1_footprint_extraction/footprints_extraction.py --bigwig ${bigWig} --bed ${bed} --output_file ${name}_called_peaks.bed --window_length ${params.window_length} --step ${params.step} --percentage ${params.percentage}
"""
}

@@ -216,7 +216,7 @@ process extract_known_TFBS {

conda "${path_env}"

publishDir "${params.out}/known_TFBS/", mode: 'copy', pattern: '*.bed'
publishDir "${params.out}/1.2_filter_motifs/TFBSscan/", mode: 'copy', pattern: '*.bed'

input:
set file (fasta), file (db), file (bed) from for_tfbs
@@ -229,7 +229,7 @@ process extract_known_TFBS {

script:
"""
python ${path_bin}/tfbsscan.py --use moods --core ${params.threads} -m ${db} -g ${fasta} -o ${params.create_known_tfbs_path} -b ${bed}
python ${path_bin}/1.2_filter_motifs/tfbsscan.py --use moods --core ${params.threads} -m ${db} -g ${fasta} -o ${params.create_known_tfbs_path} -b ${bed}
"""
}

@@ -252,7 +252,7 @@ if(params.tfbs_path == "") {
process overlap_with_known_TFBS {
conda "${path_env}"

publishDir "${params.out}/unknown_overlap/", mode :'copy'
publishDir "${params.out}/1.2_filter_motifs/compareBed/", mode :'copy'

input:
set name, file (bed_footprints), val (bed_motifs), file (fasta) from for_overlap
@@ -263,7 +263,7 @@ process overlap_with_known_TFBS {
script:
motif_list = bed_motifs.toString().replaceAll(/\s|\[|\]/,"")
"""
${path_bin}/compareBed.sh --data ${bed_footprints} --motifs ${motif_path} --fasta ${fasta} -o ${name}_unknown.bed -min ${params.min_size_fp} -max ${params.max_size_fp} -p ${path_bin}
${path_bin}/1.2_filter_motifs/compareBed.sh --data ${bed_footprints} --motifs ${motif_path} --fasta ${fasta} -o ${name}_unknown.bed -min ${params.min_size_fp} -max ${params.max_size_fp} -p ${path_bin}/1.2/filter_motifs
"""
}

@@ -274,7 +274,7 @@ Reduce each sequence to its most conserved region.
process reduce_sequence {
conda "${path_env}"
echo true
publishDir "${params.out}/cluster/reduced_bed/", mode: 'copy'
publishDir "${params.out}/2.1_clustering/reduced_bed/", mode: 'copy'

input:
set name, file (bed) from bed_for_reducing
@@ -284,7 +284,7 @@ process reduce_sequence {

script:
"""
Rscript ${path_bin}/reduce_sequence.R -i ${bed} -k ${params.kmer} -m ${params.aprox_motif_len} -o ${name}_reduced.bed -t ${params.threads} -f ${params.motif_occurence} -s ${params.min_seq_length}
Rscript ${path_bin}/2.1_clustering/reduce_sequence.R -i ${bed} -k ${params.kmer} -m ${params.aprox_motif_len} -o ${name}_reduced.bed -t ${params.threads} -f ${params.motif_occurence} -s ${params.min_seq_length}
"""
}

@@ -296,7 +296,7 @@ process clustering {
conda "${path_env}"
echo true

publishDir "${params.out}/cluster/", mode: 'copy', pattern: '*.bed'
publishDir "${params.out}/2.1_clustering/", mode: 'copy', pattern: '*.bed'

input:
set name, file (bed) from bed_for_clustering
@@ -306,7 +306,7 @@ process clustering {

script:
"""
Rscript ${path_bin}/cdhit_wrapper.R -i ${bed} -A ${params.sequence_coverage} -o ${name}_clusterd.bed -c ${params.identity} -G ${params.global} -M ${params.memory} -l ${params.throw_away_seq} -r ${params.strand} -T ${params.threads}
Rscript ${path_bin}/2.1_clustering/cdhit_wrapper.R -i ${bed} -A ${params.sequence_coverage} -o ${name}_clusterd.bed -c ${params.identity} -G ${params.global} -M ${params.memory} -l ${params.throw_away_seq} -r ${params.strand} -T ${params.threads}
"""
}

@@ -316,7 +316,7 @@ Converting BED-File to one FASTA-File per cluster
*/
process bed_to_clustered_fasta {
conda "${path_env}"
publishDir "${params.out}/esimated_motifs/clustered_motifs/clustered_fasta/", mode: 'copy'
publishDir "${params.out}/2.2_motif_estimation/fasta/", mode: 'copy'
tag{name}

input:
@@ -328,7 +328,7 @@ process bed_to_clustered_fasta {

script:
"""
Rscript ${path_bin}/bed_to_fasta.R -i ${bed} -p ${name} -m ${params.min_seq}
Rscript ${path_bin}/2.2_motif_estimation/bed_to_fasta.R -i ${bed} -p ${name} -m ${params.min_seq}
"""
}

@@ -345,7 +345,7 @@ Generating Motifs through alignment and scoring best local matches.
process glam2 {

tag{name}
publishDir "${params.out}/esimated_motifs/clustered_motifs/${name}/", mode: 'copy'
publishDir "${params.out}/2.2_motif_estimation/glam2/${name}/", mode: 'copy'

input:
set name, file (fasta) from fasta_for_glam2
@@ -368,7 +368,7 @@ The paths are sorted numerically depending on the cluster number.
*/
process merge_meme {

publishDir "${params.out}/esimated_motifs/merged_meme/", mode: 'copy'
publishDir "${params.out}/2.2_motif_estimation/cluster_motifs/merged_meme/", mode: 'copy'

input:
val (memelist) from meme_to_merge.toList()
@@ -395,7 +395,7 @@ Output table has the information which clusters are similar to each other.
*/
process find_similar_motifs {

publishDir "${params.out}/esimated_motifs/cluster_similarity/", mode: 'copy'
publishDir "${params.out}/2.2_motif_estimation/cluster_motifs/cluster_similarity/", mode: 'copy'
input:
file (merged_meme) from merged_meme

@@ -420,7 +420,7 @@ Merging FASTA-files of similar clusters
*/
process merge_fasta {
conda "${path_env}"
publishDir "${params.out}/esimated_motifs/merged_fasta/", mode: 'copy'
publishDir "${params.out}/2.2_motif_estimation/cluster_motifs/merged_fasta/", mode: 'copy'
echo true
input:
set file (motiv_sim), val (fasta_list) from files_for_merge_fasta
@@ -436,7 +436,7 @@ process merge_fasta {
fa_sorted = fasta_list.sort(false) { it.getBaseName().tokenize('_')[-1] as Integer }
fastalist = fa_sorted.toString().replaceAll(/\s|\[|\]/,"")
"""
Rscript ${path_bin}/merge_similar_clusters.R ${motiv_sim} ${fastalist} ${params.edge_weight}
Rscript ${path_bin}/2.2_motif_estimation/merge_similar_clusters.R ${motiv_sim} ${fastalist} ${params.edge_weight}
"""
}

@@ -446,7 +446,7 @@ motif_clustered_fasta_flat = motif_clustered_fasta_list.flatten()

process clustered_glam2 {

publishDir "${params.out}/final_esimated_motifs/${name}/", mode: 'copy'
publishDir "${params.out}/2.2_motif_estimation/cluster_motifs/glam2/${name}/", mode: 'copy'

input:
file (fasta) from motif_clustered_fasta_flat
@@ -482,7 +482,7 @@ Tomtom searches motifs in databases.
process tomtom {

tag{name}
publishDir "${params.out}/esimated_motifs/tomtom/", mode: 'copy'
publishDir "${params.out}/2.2_motif_estimation/tomtom/", mode: 'copy'

input:
set name, file (meme) from for_tomtom
@@ -529,7 +529,7 @@ process check_for_unknown_motifs {
process get_best_motif {
conda "${path_env}"

publishDir "${params.out}/esimated_motifs/unknown_motifs/", mode: 'copy'
publishDir "${params.out}/2.2_motif_estimation/best_unknown_motifs/", mode: 'copy'

input:
set name, file(meme), file(tsv) from meme_for_scan
@@ -539,7 +539,7 @@ process get_best_motif {

script:
"""
python ${path_bin}/get_best_motif.py ${meme} ${name}_best.meme ${params.best_motif}
python ${path_bin}/2.2_motif_estimation/get_best_motif.py ${meme} ${name}_best.meme ${params.best_motif}
"""
}

@@ -579,7 +579,7 @@ process cluster_quality {
process create_GTF {
conda "${path_env}"

publishDir "${params.out}/gtf/", mode: 'copy'
publishDir "${params.out}/3.1_create_gtf/", mode: 'copy'

output:
file ('*.gtf') into gtf
@@ -589,7 +589,7 @@ process create_GTF {

script:
"""
python ${path_bin}/RegGTFExtractor.py ${params.organism} --tissue ${params.tissues} --wd ${path_bin}
python ${path_bin}/3.1_create_gtf/RegGTFExtractor.py ${params.organism} --tissue ${params.tissues} --wd ${path_bin}/3.1_create_gtf/
"""
}

0 comments on commit 8993670

Please sign in to comment.