-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added example data for the tutorial to visualize isoforms
- Loading branch information
Siddharth Annaldasula
committed
Mar 15, 2021
1 parent
7fe0172
commit 74acc71
Showing
10 changed files
with
214 additions
and
49 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
##### IsoTV Config File | ||
|
||
pipeline: "IsoTV" | ||
repo: "https://github.molgen.mpg.de/MayerGroup/IsoTV" | ||
|
||
### Ouput directory | ||
|
||
outdir: "example" | ||
|
||
### General pipeline parameters: | ||
|
||
basecalling: FALSE | ||
preprocess: FALSE | ||
annotation: TRUE | ||
quantification: TRUE | ||
|
||
###### ONT long read processing config | ||
### Basecalling pipeline parameters | ||
|
||
guppy: "/path/to/Guppy324/bin/guppy_basecaller" | ||
flowcell: FLO-MIN106 | ||
kit: SQK-DCS109 | ||
|
||
### Reference Files | ||
genome_fasta: "/path/to/GRCh38.p12.primary_assembly.genome.fa" | ||
genome_annot: "/path/to/gencode.v32.primary_assembly.annotation.gtf" | ||
|
||
### Samples | ||
# samples with .bottom extension must be placed in the RawData folder | ||
# condition_replicate | ||
samples: | ||
A549_1: "A549_r1_r3" | ||
A549_2: "A549_r2_r1" | ||
A549_5: "A549_r5_r3" | ||
HCT116_1: "HCT116_r1_r4" | ||
HCT116_3: "HCT116_r3_r2" | ||
HCT116_4: "HCT116_r4_r1" | ||
HCT116_5: "HCT116_r5_r1" | ||
HEPG2_1: "HEPG2_r1_r1" | ||
HEPG2_4: "HEPG2_r4_r2" | ||
HEPG2_5: "HEPG2_r5_r3" | ||
K562_1: "K562_r1_r2" | ||
K562_2: "K562_r2_r1" | ||
K562_3: "K562_r3_r1" | ||
K562_4: "K562_r4_r2" | ||
MCF7_1: "MCF7_r1_r2" | ||
MCF7_3: "MCF7_r3_r3" | ||
MCF7_4: "MCF7_r4_r2" | ||
|
||
threads: 16 | ||
|
||
# Use pychopper results | ||
pychopper: TRUE | ||
|
||
# Use annotation to improve splice junction mapping (minimap2 --junc_bed parameter) | ||
minimap2_opts_junction: TRUE | ||
|
||
# Minimum read quality to keep: | ||
min_mean_q: 5 | ||
|
||
# Stringency of porechop heuristic: | ||
porechop_heu_stringency: 0.25 | ||
|
||
# Options passed to minimap2 during indexing: | ||
minimap2_index_opts: "-k14" | ||
|
||
# Extra options passed to minimap2: | ||
minimap2_opts: "-uf" # required for stranded data e.g. when pychopper filtered | ||
|
||
# Minmum mapping quality: | ||
minimum_mapping_quality: 5 | ||
|
||
# Options passed to spliced_bam2gff: | ||
spliced_bam2gff_opts: "-s" # required for stranded data e.g. when pychopper filtered | ||
|
||
# -c parameter: | ||
minimum_cluster_size: 3 | ||
|
||
# -p parameter: | ||
minimum_isoform_percent: 1 | ||
|
||
# -d parameter: | ||
exon_boundary_tolerance: 10 | ||
|
||
# -e parameter: | ||
terminal_exon_boundary_tolerance: 50 | ||
|
||
# Extra options passed to minimap2 when mapping polished reads: | ||
minimap2_opts_polished: "-uf" # required for stranded data e.g. when pychopper filtered | ||
|
||
# Options passed to spliced_bam2gff when converting alignments of polished reads: | ||
spliced_bam2gff_opts_pol: "-s" # required for stranded data e.g. when pychopper filtered | ||
|
||
# Options passed to collapse_partials when collapsing fragmentation artifacts | ||
# Internal exon boundary tolerance: | ||
collapse_internal_tol: 5 | ||
|
||
# Five prime boundary tolerance: | ||
collapse_five_tol: 500 | ||
|
||
# Three prime boundary tolerance: | ||
collapse_three_tol: 50 | ||
|
||
maximum_secondary: 200 | ||
secondary_score_ratio: 1 | ||
|
||
##### Feature Analysis Config | ||
### Input genestt | ||
|
||
gene_file: "data/genes.tab" | ||
|
||
### Output file and folder | ||
|
||
output_plots: "test.pdf" | ||
|
||
### Processed file paths - required if not using ONT long read processing workflow | ||
|
||
nanopore_gtf: "data/PDK2.nanopore.gtf" | ||
polished_reads: "data/PDK2.transcriptome.fas" | ||
counts_data: "data/PDK2.counts.txt" | ||
|
||
# Is data continuous | ||
continuous: FALSE | ||
|
||
### External tool paths and functional analysis | ||
|
||
aa: TRUE | ||
|
||
iupred2a_path: "/path/to/iupred2a/iupred2a.py" | ||
iupred2a: TRUE | ||
|
||
brewery_path: "/path/to/Brewery/Brewery.py" | ||
porter: TRUE | ||
|
||
interproScan_path: "/path/to/my_interproscan/interproscan-5.38-76.0/interproscan.sh" | ||
pfam: TRUE | ||
|
||
prositeScan_path: "/path/to/ps_scan/ps_scan.pl" | ||
pfScan_path: "/path/to/ps_scan/pfscan" | ||
prositeDat_path: "/path/to/prosite.dat" | ||
pfScan: TRUE | ||
|
||
#plaac_path: "/path/to/plaac.jar" | ||
#prion: FALSE | ||
|
||
minIsoTPM: 1 | ||
maxIsoNum: 8 | ||
minIsoPct: 10 | ||
|
||
### Misc paths | ||
|
||
java : "/pkg/openjdk-11.0.3.2-0/profile" | ||
|
||
lock1: "/.../lock.txt" | ||
lock2: "/.../lock2.txt" |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
class_code,A549_1,A549_2,A549_5,HCT116_1,HCT116_3,HCT116_4,HCT116_5,HEPG2_1,HEPG2_4,HEPG2_5,K562_1,K562_2,K562_3,K562_4,MCF7_1,MCF7_3,MCF7_4,transcript_id,gene_name,ref_transcript,gene_id,gene_type | ||
=,10.7123842660785,15.0113335568354,16.2651214801261,0,11.5434688558883,10.49952673775,10.1420512602935,0,7.86124895592787,10.4796284084991,2.44646283064557,0,2.19493190223773,1.57696537194305,3.82454516596614,10.8899378184551,11.490643684754,TCONS_00023419,PDK2,ENST00000505897.5,ENSG00000005882.12,protein_coding | ||
=,25.7097222385884,20.0151114091139,17.2816915726339,17.5114992178197,24.75990421263,27.2674276472911,23.6647862740183,8.05561597267535,19.6531223898197,16.1225052438448,1.46787769838734,0,2.19493190223773,0,15.2981806638645,27.8298410916074,33.2994163925523,TCONS_00023420,PDK2,ENST00000503176.6,ENSG00000005882.12,protein_coding |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
chr17 pinfish transcript 50095380 50106363 . + . transcript_id "TCONS_00023419"; gene_id "XLOC_007312"; gene_name "PDK2"; oId "f3fe429f-0296-4cc4-a2cf-aa295cd23fbe|60"; cmp_ref "ENST00000505897.5"; class_code "="; tss_id "TSS13403"; | ||
chr17 pinfish exon 50095380 50095553 . + . transcript_id "TCONS_00023419"; gene_id "XLOC_007312"; exon_number "1"; | ||
chr17 pinfish exon 50097423 50097564 . + . transcript_id "TCONS_00023419"; gene_id "XLOC_007312"; exon_number "2"; | ||
chr17 pinfish exon 50105371 50105442 . + . transcript_id "TCONS_00023419"; gene_id "XLOC_007312"; exon_number "3"; | ||
chr17 pinfish exon 50105885 50106363 . + . transcript_id "TCONS_00023419"; gene_id "XLOC_007312"; exon_number "4"; | ||
chr17 pinfish transcript 50095387 50111369 . + . transcript_id "TCONS_00023420"; gene_id "XLOC_007312"; gene_name "PDK2"; oId "bf7faaeb-3870-4b4a-8d23-854e564f9462|14"; cmp_ref "ENST00000503176.6"; class_code "="; tss_id "TSS13403"; | ||
chr17 pinfish exon 50095387 50095553 . + . transcript_id "TCONS_00023420"; gene_id "XLOC_007312"; exon_number "1"; | ||
chr17 pinfish exon 50097423 50097564 . + . transcript_id "TCONS_00023420"; gene_id "XLOC_007312"; exon_number "2"; | ||
chr17 pinfish exon 50105371 50105442 . + . transcript_id "TCONS_00023420"; gene_id "XLOC_007312"; exon_number "3"; | ||
chr17 pinfish exon 50105885 50106069 . + . transcript_id "TCONS_00023420"; gene_id "XLOC_007312"; exon_number "4"; | ||
chr17 pinfish exon 50106794 50106883 . + . transcript_id "TCONS_00023420"; gene_id "XLOC_007312"; exon_number "5"; | ||
chr17 pinfish exon 50107076 50107153 . + . transcript_id "TCONS_00023420"; gene_id "XLOC_007312"; exon_number "6"; | ||
chr17 pinfish exon 50108156 50108232 . + . transcript_id "TCONS_00023420"; gene_id "XLOC_007312"; exon_number "7"; | ||
chr17 pinfish exon 50108319 50108417 . + . transcript_id "TCONS_00023420"; gene_id "XLOC_007312"; exon_number "8"; | ||
chr17 pinfish exon 50108612 50108719 . + . transcript_id "TCONS_00023420"; gene_id "XLOC_007312"; exon_number "9"; | ||
chr17 pinfish exon 50109287 50109400 . + . transcript_id "TCONS_00023420"; gene_id "XLOC_007312"; exon_number "10"; | ||
chr17 pinfish exon 50109957 50111369 . + . transcript_id "TCONS_00023420"; gene_id "XLOC_007312"; exon_number "11"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
>TCONS_00023419 | ||
GGTGCGCGAGCGCTGCCCGCGCGGGGACCACAACCAAAGTCGCGGCCGCCGCAGCCATGCGCTGGGTGTGGGCGCTGCTGAAGAATGCGTCCCTGGCAGGGGCGCCCAAGTACATAGAGCACTTCAGCAAGTTCTCCCCGTCCCCGCTGTCCATGAAGCAGTTTCTGGACTTCGGATCCAGCAATGCCTGTGAGAAAACCTCCTTCACCTTCCTCAGGCAGGAGCTGCCTGTGCGCCTGGCCAACATCATGAAAGAGATCAACCTGCTTCCCGACCGAGTGCTGAGCACACCCTCCGTGCAGCTGGTGCAGAGCTGGTATGTCCAGAGCCTCCTGGACATCATGGAGTTCCTGGACAAGGATCCCGAGGACCATCGCACCCTGAGCCAGTTCACTGACGCCCTGGTCACCATCCGGAACCGGCACAACGACGTGGTGCCCACCATGGCACAAGGCGTGCTTGAGTACAAGGACACCTACGGCGATGACCCCGTCTCCAACCAGAACATCCAGTACTTCCTGGACCGCTTCTACCTCAGCCGCATCTCCATCCGCATGCTCATCAACCAGCACAGTGGGTGCCGGCCACAGCGGCGGGGAGCGGGCGGTGGGGGGGGCGGTGCTGGGGCCCAGGGCCGGGCTGCTGAGGGGACCTAGACCACTCTTCAGAACCCCACAAAGGGAGTCTTTGAATAGTTACTCCAGTAACTATGGAGTTAATGGCTCCAACATGGAAAAATAAAATTTTTCTTTCTCATTGATTTTCCATTTCAAAACGTTTTGTTTTCAGTGTTTGCAAAATGTAAAATTATGTCACATCTTTAAAAGAATGTTTAATTTAGTATTTATAAAAACTCTCATTATGTTC | ||
>TCONS_00023420 | ||
GAGCGCTGCCCGCGCGGGGACCACAACCAAAGTCGCGGCCGCCGCAGCCATGCGCTGGGTGTGGGCGCTGCTGAAGAATGCGTCCCTGGCAGGGGCGCCCAAGTACATAGAGCACTTCAGCAAGTTCTCCCCGTCCCCGCTGTCCATGAAGCAGTTTCTGGACTTCGGATCCAGCAATGCCTGTGAGAAAACCTCCTTCACCTTCCTCAGGCAGGAGCTGCCTGTGCGCCTGGCCAACATCATGAAAGAGATCAACCTGCTTCCCGACCGAGTGCTGAGCACACCCTCCGTGCAGCTGGTGCAGAGCTGGTATGTCCAGAGCCTCCTGGACATCATGGAGTTCCTGGACAAGGATCCCGAGGACCATCGCACCCTGAGCCAGTTCACTGACGCCCTGGTCACCATCCGGAACCGGCACAACGACGTGGTGCCCACCATGGCACAAGGCGTGCTTGAGTACAAGGACACCTACGGCGATGACCCCGTCTCCAACCAGAACATCCAGTACTTCCTGGACCGCTTCTACCTCAGCCGCATCTCCATCCGCATGCTCATCAACCAGCACACCCTCATCTTTGATGGCAGCACCAACCCAGCCCATCCCAAACACATCGGCAGCATCGACCCCAACTGCAACGTCTCTGAGGTGGTCAAAGATGCCTACGACATGGCTAAGCTCCTGTGTGACAAGTATTACATGGCCTCACCTGACCTGGAGATCCAGGAGATCAATGCAGCCAACTCCAAACAGCCGATTCACATGGTCTACGTCCCCTCCCACCTCTACCACATGCTCTTTGAGCTCTTCAAGAATGCCATGAGGGCGACTGTGGAAAGCCATGAGTCCAGCCTCATTCTCCCACCCATCAAGGTCATGGTGGCCTTGGGTGAGGAAGATCTGTCCATCAAGATGAGTGACCGAGGTGGGGGTGTTCCCTTGAGGAAGATTGAGCGACTCTTCAGCTACATGTACTCCACAGCACCCACCCCCCAGCCTGGCACCGGGGGAACGCCGCTGGCTGGCTTTGGTTATGGGCTCCCCATTTCCCGCCTCTACGCCAAGTACTTCCAGGGAGACCTGCAGCTCTTCTCCATGGAAGGCTTTGGGACCGATGCTGTCATCTATCTCAAGGCCCTGTCCACGGACTCGGTGGAGCGCCTGCCTGTCTACAACAAGTCAGCCTGGCGCCACTACCAGACCATCCAGGAGGCCGGCGACTGGTGTGTGCCCAGCACGGAGCCCAAGAACACGTCCACGTACCGCGTCAGCTAAGGGCCGCCGTGCATCTGCACCTGAGAGGACGGACTGCCGCCTCTGGGTCCCCCCACCGTGGTGCCCCTCACCATCCTCCTGGGGGAGCAGGGGGTGGGTTCTCCCTGATGACCAGGTTCTGTCTCTATGGAAGTCACTGCGGTGATAGGTCTGTGATGGTCCCTAAGTGCCAGTCCATCTCTGTGGAGACCCCTCGGTGGCCTCCCTATCTCTGTGGGCGATGCCTGAGGGTTAGGGATGTCTCCACCCTGATGGGGTGTCCCAGAGACATTTTCCCATGGCAGTCCTCCTCTCTGAGACCAGGGCTGTCACTTTTCTGCCAGGGGTACTGGGTCCCCCTCAGCACCCTCCACAGCACAGGCCTTCCAAGTGGATGTCCCGTTGCCTTATTCCCCCAGCCCACAAAGGCACCCTGGCCTTGGCCTGCTGAAGTGTTAGGAAGAGGGTGGGTGCCCTCCAGACCTGGGGACTGAGTGGGGAAAGGAGTTACACCCGTGAGTGGGGAATGAGGCTGGTCCTGCAGCCTCTCCCTCCGCTCAGGGCTTGAAGGTCGGTGGCGGAGGGGGTGGCTCTCACAGGGCCCAACTCTAAAGTGGAAGAACCTTGTTAGACCGAGAGCTTGCCATCCAGCCAAGCTGCTCGAGGCCCTGCAGTGGCCTTGGCAATGTCTGTGCCACCTCCTGAGCCCTCCCAGCATGTCCTCACATGCTCATGCCCACCCGCTCCTCCACAAGCCTAGTCCATCCTGCCTGAGCTCCAGCCCCCAGCCCCCACTGTGCCCAGACATGTGTGCTCAGGGTGGCTTTCTCCCTAGGACCTTCTGTGTATATAGTTAGTTTTATAACCCTGAATGCCCCCACCCTTCCCCTAAGCACACAGGGGTTAAAGCTGTGTGTCCCTCCCAGTGGCTGTGGCAGTGACAGTGACACCCACACCCACAGTAAAGAGGAGACTGAATGAGACTGGCCTGGCTGCATCCCTGGGGGAGGGGACCCACACGTGCGCACGTACACACGCACACTGCGGTGCCCTGTGACCACCACATGACGCACCGGGGACGTCCGGCCCAGCTCAGGACTGTGAGTAAAACACTGGGCAAAGCCATCTGTTCAGAACGCCGGATGGCCCAGCACCTGGTGACAGACCTGTTGTCCCACCATACTCAGGGCCCAGCAGAGTCCGGCCAACCCAACAACTGGAGCAGACAGAAATTCTGGTCACTGGCACATCATAAGGATTTATTGAAATAAATTGAGAACTGCCTCCCCTTCA |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
PDK2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters