Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/dev' into dev
Browse files Browse the repository at this point in the history
# Conflicts:
#	masterenv.yml
  • Loading branch information
basti committed Dec 10, 2018
2 parents 30c17cc + 15aa68a commit f0ad6b3
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 47 deletions.
15 changes: 8 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ export PATH=[meme-suite instalation path]/bin:$PATH

Download all files from the [GitHub repository](https://github.molgen.mpg.de/loosolab/masterJLU2018).
The Nextflow-script needs a conda enviroment to run. Nextflow can create the needed enviroment from the given yaml-file.
On some systems Nrxtflow exits the run with following error:
On some systems Nextflow exits the run with following error:
```
Caused by:
Failed to create Conda environment
Expand All @@ -42,20 +42,21 @@ When the enviroment is created, set the variable 'path_env' in the configuration
nextflow run pipeline.nf --input [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --jaspar_db [MEME-file]
```
## Parameters
For a detailed overview for all parameters follow this [link](https://github.molgen.mpg.de/loosolab/masterJLU2018/wiki/Configuration).
```
Required arguments:
--input Path to BigWig-file
--bed Path to BED-file
--input Path to BigWig-file with scores on the peaks of interest
--bed Path to BED-file with peaks of interest corresponding to the BigWig file
--genome_fasta Path to genome in FASTA-format
--jaspar_db Path to motif-database in MEME-format
--organism STRING Source organism: [ hg19 | hg 38 or mm9 | mm10 ]
Optional arguments:
Footprint extraction:
--window_length INT (Default: 200)
--step INT (Default: 100)
--percentage INT(Default: 0)
--window_length INT (Default: 200) a length of a window
--step INT (Default: 100) an interval to slide the window
--percentage INT(Default: 0) a percentage to be added to background while searching for footprints
Filter unknown motifs:
--min_size_fp INT (Default: 10)
Expand All @@ -82,7 +83,7 @@ Optional arguments:
--interation INT Number of iterations done by glam2. More Interations: better results, higher runtime. (Default: 10000)
--tomtom_treshold float Threshold for similarity score. (Default: 0.01)
Moitf clustering:
Motif clustering:
--edge_weight INT Minimum weight of edges in motif-cluster-graph (Default: 5)
--motif_similarity_thresh FLOAT threshold for motif similarity score (Default: 0.00001)
Expand Down
3 changes: 1 addition & 2 deletions masterenv.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ channels:
- bioconda
- conda-forge
dependencies:
- bedtools
- r-seqinr
- numpy
- pybigWig
Expand All @@ -25,4 +24,4 @@ dependencies:
- pybedtools
- matplotlib
- seaborn
- crossmap
- crossmap
75 changes: 37 additions & 38 deletions pipeline.nf
Original file line number Diff line number Diff line change
@@ -1,10 +1,4 @@
//!/usr/bin/env nextflow

Channel.fromPath(params.input).map {it -> [it.simpleName, it]}.set {bigwig_input}
Channel.fromPath(params.bed).set {bed_input}
Channel.fromPath(params.genome_fasta).into {fa_overlap; fa_scan; fa_overlap_2}
Channel.fromPath(params.jaspar_db).into {db_for_motivscan; db_for_tomtom}
Channel.fromPath(params.config).set {config}
#!/usr/bin/env nextflow

//setting default values
params.input=""
Expand Down Expand Up @@ -65,36 +59,36 @@ log.info """
Usage: nextflow run pipeline.nf --input [BigWig-file] --bed [BED-file] --genome_fasta [FASTA-file] --jaspar_db [MEME-file]
Required arguments:
--input Path to BigWig-file
--bed Path to BED-file
--genome_fasta Path to genome in FASTA-format
--jaspar_db Path to motif-database in MEME-format
--input Path to BigWig-file
--bed Path to BED-file
--genome_fasta Path to genome in FASTA-format
--jaspar_db Path to motif-database in MEME-format
Optional arguments:
Footprint extraction:
--window_length INT (Default: 200)
--step INT (Default: 100)
--percentage INT(Default: 0)
--window_length INT (Default: 200)
--step INT (Default: 100)
--percentage INT (Default: 0)
Filter unknown motifs:
--min_size_fp INT (Default: 10)
--max_size_fp INT (Default: 100)
--min_size_fp INT (Default: 10)
--max_size_fp INT (Default: 100)
Clustering:
Sequence preparation/ reduction:
--kmer INT Kmer length (Default: 10)
--aprox_motif_len INT Motif length (Default: 10)
--motif_occurence FLOAT Percentage of motifs over all sequences. Use 1 (Default) to assume every sequence contains a motif.
--min_seq_length Interations Remove all sequences below this value. (Default: 10)
Clustering:
Sequence preparation/ reduction:
--kmer INT Kmer length (Default: 10)
--aprox_motif_len INT Motif length (Default: 10)
--motif_occurence FLOAT Percentage of motifs over all sequences. Use 1 (Default) to assume every sequence contains a motif.
--min_seq_length INT Remove all sequences below this value. (Default: 10)
Clustering:
--global INT Global (=1) or local (=0) alignment. (Default: 0)
--identity FLOAT Identity threshold. (Default: 0.8)
--sequence_coverage INT Minimum aligned nucleotides on both sequences. (Default: 8)
--memory INT Memory limit in MB. 0 for unlimited. (Default: 800)
--throw_away_seq INT Remove all sequences equal or below this length before clustering. (Default: 9)
--strand INT Align +/+ & +/- (= 1). Or align only +/+ (= 0). (Default: 0)
--global INT Global (=1) or local (=0) alignment. (Default: 0)
--identity FLOAT Identity threshold. (Default: 0.8)
--sequence_coverage INT Minimum aligned nucleotides on both sequences. (Default: 8)
--memory INT Memory limit in MB. 0 for unlimited. (Default: 800)
--throw_away_seq INT Remove all sequences equal or below this length before clustering. (Default: 9)
--strand INT Align +/+ & +/- (= 1). Or align only +/+ (= 0). (Default: 0)
Motif estimation:
--motif_min_len INT Minimum length of Motif (Default: 8)
Expand All @@ -103,21 +97,27 @@ Optional arguments:
--tomtom_treshold float Threshold for similarity score. (Default: 0.01)
Moitf clustering:
--edge_weight INT Minimum weight of edges in motif-cluster-graph (Default: 5)
--motif_similarity_thresh FLOAT threshold for motif similarity score (Default: 0.00001)
--edge_weight INT Minimum weight of edges in motif-cluster-graph (Default: 5)
--motif_similarity_thresh FLOAT Threshold for motif similarity score (Default: 0.00001)
Creating GTF:
--organism [homo_sapiens | mus_musculus]
--tissues
--organism [homo_sapiens | mus_musculus]
--tissues
All arguments can be set in the configuration files.
"""
} else {
Channel.fromPath(params.input).map {it -> [it.simpleName, it]}.set {bigwig_input}
Channel.fromPath(params.bed).set {bed_input}
Channel.fromPath(params.genome_fasta).into {fa_overlap; fa_scan; fa_overlap_2}
Channel.fromPath(params.jaspar_db).into {db_for_motivscan; db_for_tomtom}
Channel.fromPath(params.config).set {config}
}


bigwig_input.combine(bed_input).into {footprint_in}
bigwig_input.combine(bed_input).set{footprint_in}
/*
This process uses the uncontinuous score from a bigWig file to estimate footpints within peaks of interest
*/
process footprint_extraction {
conda "${path_env}"
Expand Down Expand Up @@ -154,7 +154,7 @@ process extract_known_TFBS {

script:
"""
python ${path_bin}/tfbsscan.py --use moods --core ${params.threads} -m ${db} -g ${fasta} -o ./
python ${path_bin}/tfbsscan.py --use moods --core ${params.threads} -m ${db} -g ${fasta} -o .
"""
}

Expand Down Expand Up @@ -225,7 +225,7 @@ process bed_to_clustered_fasta {
tag{name}

input:
set name, file (bed) from clustered_bed
set name, file (bed) from bed_for_motif_esitmation

when:
params.fasta == false
Expand Down Expand Up @@ -349,7 +349,6 @@ process clustered_glam2 {
"""
}

*/

/*
Running Tomtom on meme-files generated by GLAM2.
Expand Down

0 comments on commit f0ad6b3

Please sign in to comment.