Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
comp-metadata/docs/quantification/transcriptome/LXPv1.xml
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
135 lines (133 sloc)
5.38 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0"?> | |
<process> | |
<name>LXP</name> | |
<version>1</version> | |
<author> | |
<name>Anupam Sinha</name> | |
<email>a.sinha@ikmb.uni-kiel.de</email> | |
</author> | |
<!-- Precise description of what this process does, what output is generated and what statistics are computed --> | |
<description> | |
* htseq-count: Generates read counts on the gene level. | |
* cufflinks: Generates FPKM values for genes and transcript isoforms. | |
* StringTie: Generates FPKM values for genes and transcript isoforms. Also generates .ctab files for analysis using Ballgown. | |
</description> | |
<!-- Following section: list input files [samples to be analysed and similar] --> | |
<inputs> | |
<filetype> | |
<identifier>.bam</identifier> | |
<format></format> | |
<quantity>single</quantity> | |
<comment>Unfiltered aligned reads</comment> | |
</filetype> | |
</inputs> | |
<!-- Following section: list reference files [e.g. reference genomes] used in this process --> | |
<references> | |
<filetype> | |
<identifier>gencode.v19.annotation.gtf</identifier> | |
<format>GTF</format> | |
<quantity>single</quantity> | |
<comment>Gencode gene annotation file in gene transfer format.</comment> | |
</filetype> | |
<filetype> | |
<identifier>reference.fa</identifier> | |
<format>multi fasta</format> | |
<quantity>single</quantity> | |
<comment>The reference genome file; see aspera.dkfz.de > download > results > references > genomes > human > WholeGenome</comment> | |
</filetype> | |
</references> | |
<!-- Following section: list output files of process [e.g. fpkm files, read counts files from htseq etc.] --> | |
<outputs> | |
<filetype> | |
<identifier>[sampleID].LXPv1.[DATE].readcounts.txt</identifier> | |
<format>text file</format> | |
<quantity>single</quantity> | |
<comment>This file contains the read counts on the gene level.</comment> | |
</filetype> | |
<filetype> | |
<identifier>[sampleID].LXPv1.[DATE].genes.fpkm.tracking</identifier> | |
<format>text file</format> | |
<quantity>single</quantity> | |
<comment>Output file containing the FPKM counts on the gene level.</comment> | |
</filetype> | |
<filetype> | |
<identifier>[sampleID].LXPv1.[DATE].isoforms.fpkm.tracking</identifier> | |
<format>text file</format> | |
<quantity>single</quantity> | |
<comment>Output file containing the FPKM counts on the isoform level.</comment> | |
</filetype> | |
<filetype> | |
<identifier>[sampleID].LXPv1.[DATE].transcripts.gtf</identifier> | |
<format>gene transfer format</format> | |
<quantity>single</quantity> | |
<comment>This file contains assembled transcripts.</comment> | |
</filetype> | |
<filetype> | |
<identifier>[sampleID].LXPv1.[DATE].stringtie.gtf</identifier> | |
<format>gene transfer format</format> | |
<quantity>single</quantity> | |
<comment>This file contains assembled transcripts.</comment> | |
</filetype> | |
<filetype> | |
<identifier>[sampleID].LXPv1.[DATE].ballgown</identifier> | |
<format>tab separated fields (.ctab) format</format> | |
<quantity>five</quantity> | |
<comment>This is a folder containing 5 .ctab files. These .ctab files contain the expression values of exons, introns and transcripts. Two files list the internal(generated by ballgown) association ids between exons, introns, and transcripts.</comment> | |
</filetype> | |
</outputs> | |
<software> | |
<tool> | |
<name>Python</name> | |
<version>2.7</version> | |
<command_line><![CDATA[ CMDLINE ]]></command_line> | |
<loop>no looping</loop> | |
<comment></comment> | |
</tool> | |
<tool> | |
<name>Samtools</name> | |
<version>0.1.19-44428cd</version> | |
<command_line><![CDATA[ CMDLINE ]]></command_line> | |
<loop>no looping</loop> | |
<comment></comment> | |
</tool> | |
<tool> | |
<name>htseq-count</name> | |
<version>0.6.1p1</version> | |
<command_line>samtools sort -n -@ 8 -m 4G ${_sample}.bam ${_sample}_sorted | |
samtools/samtools view -F 256 ${_sample}_sorted.bam > ${_sample}.sam | |
htseq-count -s reverse -m union -a 20 ${_sample}.sam gencode.v19.annotation.gtf > ${_sample}_htseq.txt | |
</command_line> | |
<loop>no looping</loop> | |
<comment>DESeq2 requires bam files sorted by read name (step 1). After sorting, all non-primary alignments are removed during the bam to sam conversion. \ | |
Invoking htseq-count counts the number of reads per gene. \ | |
Please see http://www-huber.embl.de/users/anders/HTSeq/doc/count.html#count for further information. | |
</comment> | |
</tool> | |
<tool> | |
<name>cufflinks</name> | |
<version>v2.0.2</version> | |
<command_line> | |
<![CDATA[ | |
cufflinks -p 16 --frag-bias-correct reference.fa --multi-read-correct --library-type fr-firststrand | |
--compatible-hits-norm -G gencode.v19.annotation_transcripts_only.gtf ${_sample}.bam | |
]]> | |
</command_line> | |
<loop>no looping</loop> | |
<comment>Please see http://cufflinks.cbcb.umd.edu/manual.html for further information.</comment> | |
</tool> | |
<tool> | |
<name>StringTie</name> | |
<version>v1.0.3</version> | |
<command_line> | |
<![CDATA[ | |
stringtie -p 16 -e -b ${_sample}.ballgown -o ${_sample}_stringtie.gtf -G gencode.v19.annotation_transcripts_only.gtf | |
]]> | |
</command_line> | |
<loop>no looping</loop> | |
<comment>Please see http://ccb.jhu.edu/software/stringtie/ for further information. \ | |
"-b" option creates a folder which contains the .ctab files for analysis using Ballgown. \ | |
Please see https://github.com/alyssafrazee/ballgown for further information. | |
</comment> | |
</tool> | |
</software> | |
</process> |