#### all parameteres default are found at /groups/churchman/jd187/Program/STAR/STAR-STAR_2.4.0d/parametersDefault file ##############
### versions
#versionSTAR 020201
# int>0: STAR release numeric ID. Please do not change this value!
#versionGenome 020101 020200
# int>0: oldest value of the Genome version compatible with this STAR release. Please do not change this value!
#parametersFiles -
# string: name of a user-defined parameters file, "-": none. Can only be defined on the command line.
# provided at the command line level
#runMode alignReads
# string: type of the run: alignReads ... map reads
# genomeGenerate ... generate genome files
# inputAlignmentsFromBAM ... input alignments from BAM. Presently only works with --outWigType to generate wiggle files.
#runThreadN 1
# int: number of threads to run STAR
# provided at the command line level in case we need to tweek the memory settings etc
#genomeDir ./GenomeDir/
# string: path to the directory where genome files are stored (if runMode!=generateGenome) or will be generated (if runMode==generateGenome)
# provided at the command line level, in case we wanna provide a diff genome (f.ex. built without Jct) without changing the param file
#genomeLoad NoSharedMemory
# mode of shared memory usage for the genome files
# string: LoadAndKeep ... load genome into shared and keep it in memory after run
# LoadAndRemove ... load genome into shared but remove it after run
# LoadAndExit ... load genome into shared memory and exit, keeping the genome in memory for future runs
# Remove ... do not map anything, just remove loaded genome from memory
# NoSharedMemory ... do not use shared memory, each job will have its own private copy of the genome
#genomeFastaFiles -
# fasta files with genomic sequence for genome files generation, only used if runMode==genomeGenerate
# string(s): path(s) to the files from the working directory (separated by spaces)
#genomeChrBinNbits 18
# int: =log2(chrBin), where chrBin is the size of the bins for genome storage: each chromosome will occupy an integer number of bins
#genomeSAindexNbases 14
# int: length (bases) of the SA pre-indexing string. Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches.
#genomeSAsparseD 1
# int>0: suffux array sparsity, i.e. distance between indices: use bigger numbers to decrease needed RAM at the cost of mapping speed reduction
#inputBAMfile -
# string: path to BAM input file, to be used with --runMode inputAlignmentsFromBAM
#readFilesIn Read1 Read2
# string(s): paths to files that contain input read1 (and, if needed, read2)
# given at the command line level as the read names is gonna depend on the sample name
readFilesCommand cat
# string(s): cmd line to execute for each of the input file. This command should generate FASTA or FASTQ text and send it to stdout
# For example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc.
#readMapNumber -1
# int: number of reads to map from the beginning of the file
# -1: map all reads
#readMatesLengthsIn NotEqual
# string: Equal/NotEqual - lengths of names,sequences,qualities for both mates are the same / not the same. NotEqual is safe in all situations.
#clip3pNbases 0
# int(s): number(s) of bases to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.
#clip5pNbases 0
# int(s): number(s) of bases to clip from 5p of each mate. If one value is given, it will be assumed the same for both mates.
# string(s): adapter sequences to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.
clip3pAdapterMMp 0.21
# double(s): max proportion of mismatches for 3p adpater clipping for each mate. If one value is given, it will be assumed the same for both mates.
clip3pAfterAdapterNbases 1
# int(s): number of bases to clip from 3p of each mate after the adapter clipping. If one value is given, it will be assumed the same for both mates.
#limitGenomeGenerateRAM 31000000000
# int>0: maximum available RAM (bytes) for genome generation
limitIObufferSize 200000000
# int>0: max available buffers size (bytes) for input/output, per thread
#limitOutSAMoneReadBytes 100000
#int>0: max size of the SAM record for one read. Recommended value: >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax)
#limitOutSJoneRead 1000
# int>0: max number of junctions for one read (including all multi-mappers)
#limitOutSJcollapsed 1000000
# int>0: max number of collapsed junctions
limitBAMsortRAM 64000000000
# int>=0: maximum available RAM for sorting BAM. If =0, it will be set to the genome index size
#outFileNamePrefix ./
# string: output files name prefix (including full or relative path). Can only be defined on the command line.
# we define it on the command line as it is depending on the sample name
#outTmpDir -
# string: path to a directory that will be used as temporary by STAR. All contents of this directory will be removed!
# - the temp directory will default to outFileNamePrefix_tmp
#outStd Log
# string: which output will be directed to stdout (standard out)
# Log : log messages
# SAM : alignments in .sam format (which normally are output to Aligned.out.sam file), normal std output will go into Log.std.out
# BAM_Unsorted : alignments in BAM format, unsorted. Requires --outSAMtype BAM Unsorted
# BAM_SortedByCoordinate : alignments in BAM format, unsorted. Requires --outSAMtype BAM SortedByCoordinate
# BAM_Quant : alignments to transcriptome in BAM format, unsorted. Requires --quantMode TranscriptomeSAM
outReadsUnmapped Fastx
# string: output of unmapped reads (besides SAM)
# None : no output
# Fastx : output in separate fasta/fastq files, Unmapped.out.mate1/2
#outQSconversionAdd 0
# int: add this number to the quality score (e.g. to convert from Illumina to Sanger, use -31)
outSAMtype BAM SortedByCoordinate
# strings: type of SAM/BAM output
# 1st word:
# BAM : output BAM without sorting
# SAM : output SAM without sorting
# None : no SAM/BAM output
# 2nd, 3rd ...
# Unsorted : standard unsorted
# SortedByCoordinate : sorted by coordinate
#outSAMmode Full
# string: mode of SAM output None : no SAM output
# Full : full SAM output
# NoQS : full SAM but without quality scores
#outSAMstrandField None
# string: Cufflinks-like strand field flag None : not used
# intronMotif : strand derived from the intron motif. Reads with inconsisent and/or non-canonical introns are filtered out.
#If you have stranded RNA-seq data, you do not need to use any specific STAR options. Instead, you need
#to run Cufflinks with the library option --library-type options. For example, cufflinks <…> -
#library-type fr-firststrand should be used for the “standard” dUTP protocol. This option has
#to be used only for Cufflinks runs and not for STAR runs.
outSAMattributes All
# string: a string of desired SAM attributes, in the order desired for the output SAM
# Standard : NH HI AS nM
# All : NH HI AS nM NM MD jM jI
# None
#outSAMunmapped None
# string: output of unmapped reads in the SAM format
# None : no output
# Within : output unmapped reads within the main SAM file (i.e. Aligned.out.sam)
#outSAMorder Paired
# string: type of sorting for the SAM output
# Paired: one mate after the other for all paired alignments
# PairedKeepInputOrder: one mate after the other for all paired alignments, the order is kept the same as in the input FASTQ files
#outSAMprimaryFlag OneBestScore
# string: which alignments are considered primary - all others will be marked with 0x100 bit in the FLAG
# OneBestScore : only one alignment with the best score is primary
# AllBestScore : all alignments with the best score are primary
#outSAMreadID Standard
# string: read ID record type
# Standard : first word (until space) from the FASTx read ID line, removing /1,/2 from the end
# Number : read number (index) in the FASTx file
#outSAMmapqUnique 255
# int: 0 to 255: the MAPQ value for unique mappers
#outSAMattrRGline -
# string(s): SAM/BAM read group line. The first word contains the read group identifier and must start with "ID:",
# e.g. --outSAMattrRGline ID:xxx CN:yy "DS:z z z".
# xxx will be added as RG tag to each output alignment. Any spaces in the tag values have to be double quoted.
# Comma separated RG lines correspons to different (comma separated) input files in --readFilesIn.
#outSAMheaderHD -
# strings: @HD (header) line of the SAM header
#outSAMheaderPG -
# strings: extra @PG (software) line of the SAM header (in addition to STAR)
#outSAMheaderCommentFile -
# string: path to the file with @CO (comment) lines of the SAM header
#outBAMcompression -1
# int: -1 ... 10 BAM compression level, -1=default compression, 0=no compression, 10=maximum compression
#bamRemoveDuplicatesType -
# string: remove duplicates from BAM file, for now only works with sorted BAM feeded with inputBAMfile
# UniqueIdentical : removes all multimappers, and duplicate unique mappers. The coordinates, FLAG, CIGAR must be identical
#bamRemoveDuplicatesMate2basesN 0
# int>0: number of bases from the 5' of mate 2 to use in collapsing (e.g. for RAMPAGE)
#outWigType None
# string(s): type of signal output, e.g. "bedGraph" OR "bedGraph read1_5p"
# 1st word:
# None
# bedGraph
# wiggle
# 2nd word:
# read1_5p : signal from only 5' of the 1st read, useful for CAGE/RAMPAGE etc
# read2 : signal from only 2nd read
#outWigStrand Stranded
# string: strandedness of wigglle (bedGraph) output
# Stranded: separate strands, str1 and str2
# Unstranded: collapsed strands
#outWigReferencesPrefix -
# string: prefix matching reference names to include in the output wiggle file, e.g. "chr.*"
# default: "-" - include all references
#outWigNorm RPM
# string: type of normalization for the signal
# RPM : reads per million of mapped reads
# None : no normalization, "raw" counts
#outFilterType Normal
# string: type of filtering
# Normal: standard filtering using only current alignment
# BySJout: keep only those reads that contain junctions that passed filtering into
#outFilterMultimapScoreRange 1
# int: the score range below the maximum score for multimapping alignments.
# as -10*log10(1-1/2) = MapQual for read mapping twice = 3.013, 3.013 -range 3
# keep all multi aligned reads (at least up to 100 times as -10*log10(1-1/100) =
# 0.04364805)... I think... Not sure I got it right though...
outFilterMultimapNmax 2
# int: read alignments will be output only if the read maps fewer than this value, otherwise no alignments will be output
#outFilterMismatchNmax 10
# int: alignment will be output only if it has fewer mismatches
#outFilterMismatchNoverLmax 0.3
# int: alignment will be output only if its ratio of mismatches to *mapped* length is less than this value; to make it consistent with parmater used with TH
#outFilterMismatchNoverReadLmax 1
# int: alignment will be output only if its ratio of mismatches to *read* length is less than this value
#outFilterScoreMin 0
# int: alignment will be output only if its score is higher than this value
#outFilterScoreMinOverLread 0.66
# float: outFilterScoreMin normalized to read length (sum of mates' lengths for paired-end reads)
#outFilterMatchNmin 0
# int: alignment will be output only if the number of matched bases is higher than this value; to make it consistent with parmater used with TH
#outFilterMatchNminOverLread 0.66
# float: outFilterMatchNmin normalized to read length (sum of mates' lengths for paired-end reads)
# I guess it has something to do with soft clipping ? I don't know, so I let the default value...
#outFilterIntronMotifs None
# string: filter alignment using their motifs
# None : no filtering
# RemoveNoncanonical : filter out alignments that contain non-canonical junctions
# RemoveNoncanonicalUnannotated : filter out alignments that contain non-canonical unannotated junctions when using annotated
# splice junctions database. The annotated non-canonical junctions will be kept.
#outSJfilterReads All
# string: which reads to consider for collapsed splice junctions output
# All: all reads, unique- and multi-mappers
# Unique: uniquely mapping reads only
outSJfilterOverhangMin 3 1 1 1
# 4*int: minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG motif,
# (3) GC/AG motif, (4) AT/AC motif. -1 means no output for that motif does not apply to annotated junctions
#outSJfilterCountUniqueMin 3 1 1 1
# 4*int: minimum uniquely mapping read count per junction for: (1) non-canonical motifs, (2) GT/AG motif, (3) GC/AG motif,
# (4) AT/AC motif. -1 means no output for that motif. Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied does not apply to annotated junctions
#outSJfilterCountTotalMin 3 1 1 1
# 4*int: minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG motif, (3) GC/AG motif, (4) AT/AC motif. -1 means no output for that motif Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied does not apply to annotated junctions
outSJfilterDistToOtherSJmin 0 0 0 0
# 4*int>=0: minimum allowed distance to other junctions' donor/acceptor
# does not apply to annotated junctions
#outSJfilterIntronMaxVsReadN 50000 100000 200000
# N*int>=0: maximum gap allowed for junctions supported by 1,2,3...N reads
# i.e. by default junctions supported by 1 read can have gaps <=50000b, by 2 reads: <=100000b, by 3 reads: <=200000. by >=4
# reads any gap <=alignIntronMax does not apply to annotated junctions
#scoreGap 0
# gap open penalty
#scoreGapNoncan -8
# non-canonical gap open penalty (in addition to scoreGap)
#scoreGapGCAG -4
# GCAG gap open penalty (in addition to scoreGap)
#scoreGapATAC -8
# ATAC gap open penalty (in addition to scoreGap)
#scoreGenomicLengthLog2scale -0.25
# extra score logarithmically scaled with genomic length of the alignment: -scoreGenomicLengthLog2scale*log2(genomicLength)
#scoreDelOpen -2
# deletion open penalty
#scoreDelBase -2
# deletion extension penalty per base (in addition to scoreDelOpen)
#scoreInsOpen -2
# insertion open penalty
#scoreInsBase -2
# insertion extension penalty per base (in addition to scoreInsOpen)
#scoreStitchSJshift 1
# maximum score reduction while searching for SJ boundaries inthe stitching step
#seedSearchStartLmax 50
# int>0: defines the search start point through the read - the read is split into pieces no longer than this value
#seedSearchStartLmaxOverLread 1.0
# float: seedSearchStartLmax normalized to read length (sum of mates' lengths for paired-end reads)
#seedSearchLmax 0
# int>=0: defines the maximum length of the seeds, if =0 max seed lengthis infinite
#seedMultimapNmax 10000
# int>0: only pieces that map fewer than this value are utilized in the stitching procedure
#seedPerReadNmax 1000
# int>0: max number of seeds per read
#seedPerWindowNmax 50
# int>0: max number of seeds per window
#seedNoneLociPerWindow 10
# int>0: max number of one seed loci per window
alignIntronMin 11
# minimum intron size: genomic gap is considered intron if its length>=alignIntronMin, otherwise it is considered Deletion
# modified specifically for tRNA introns
#alignIntronMax 0
# maximum intron size, if 0, max intron size will be determined by (2^winBinNbits)*winAnchorDistNbins
#alignMatesGapMax 0
# maximum gap between two mates, if 0, max intron gap will be determined by (2^winBinNbits)*winAnchorDistNbins
#alignSJoverhangMin 5
# int>0: minimum overhang (i.e. block size) for spliced alignments
#alignSJDBoverhangMin 3
# int>0: minimum overhang (i.e. block size) for annotated (sjdb) spliced alignments
#alignSplicedMateMapLmin 0
# int>0: minimum mapped length for a read mate that is spliced
#alignSplicedMateMapLminOverLmate 0.66
# float>0: alignSplicedMateMapLmin normalized to mate length
#alignWindowsPerReadNmax 10000
# int>0: max number of windows per read
#alignTranscriptsPerWindowNmax 100
# int>0: max number of transcripts per window
#alignTranscriptsPerReadNmax 10000
# max number of different alignments per read to consider
alignEndsType EndToEnd
# string: type of read ends alignment
# Local : standard local alignment with soft-clipping allowed
# EndToEnd: force end-to-end read alignment, do not soft-clip
#sjdbFileChrStartEnd -
# string: path to the file with genomic coordinates (chr <tab> start <tab> end <tab> strand) for the introns
# we will provide it in the command line directly, as the path is gonna depend on the sample
# NO: The annotations can be supplied in the form of splice junctions’ loci or GTF (or GFF3) file AND ipmortantly The annotations have to be supplied at the genome/suffix array generation step
#sjdbGTFfile -
# string: path to the GTF file with annotations
# we will provide it in the command line directly
#sjdbGTFchrPrefix -
# string: prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL annotations with UCSC geneomes)
#sjdbGTFfeatureExon exon
# string: feature type in GTF file to be used as exons for building transcripts
#sjdbGTFtagExonParentTranscript transcript_id
# string: tag name to be used as exons' parents for building transcripts
#sjdbOverhang 0
# int>=0: length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)
# if =0, splice junction database is not used
#sjdbScore 2
# int: extra alignment score for alignmets that cross database junctions
#winAnchorMultimapNmax 50
# int>0: max number of loci anchors are allowed to map to
#winBinNbits 16
# int>0: =log2(winBin), where winBin is the size of the bin for the windows/clustering,each window will occupy an integer nb of bins.
#winAnchorDistNbins 9
# int>0: max number of bins between two anchors that allows aggregation of anchors into one window
#winFlankNbins 4
# int>0: log2(winFlank), where win Flank is the size of the left and right flanking regions for each window
#chimSegmentMin 0
## int>0: minimum length of chimeric segment length, if ==0, no chimeric output
#chimScoreMin 0
## int>0: minimum total (summed) score of the chimeric segments
#chimScoreDropMax 20
## int>0: max drop (difference) of chimeric score (the sum of scores of all chimeric segements) from the read length
#chimScoreSeparation 10
## int>0: minimum difference (separation) between the best chimeric score and the next one
#chimScoreJunctionNonGTAG -1
## int: penalty for a non-GT/AG chimeric junction
#chimJunctionOverhangMin 20
## int>0: minimum overhang for a chimeric junction
#quantMode -
# string(s): types of qunatification requested
# - : None
# TranscriptomeSAM : output SAM/BAM alignments to transcriptome into a separate file
#### 2-PASS
#twopass1readsN 0
# int>0: number of reads to process for the 1st step. 0 : 1-step only, no 2nd pass; use very large number to map all reads in the first step
#twopassSJlimit 1000000
# int>=0: maximum number of junction detected in the 1st step