diff --git a/Snakefile b/Snakefile
new file mode 100644
index 0000000..65cff2e
--- /dev/null
+++ b/Snakefile
@@ -0,0 +1,294 @@
+import os
+
+sampleID = config["sampleID"]
+WORKDIR = os.getcwd()
+spikeIn = config["spikeIn"]
+reference = config["reference"]
+mask_bed = config["mask_bed"]
+
+def resultFiles(spikeIn, reference, sampleID):
+    if spikeIn=="no" :
+        bigwigs=expand("BigWig/{sampleID}/{type}{sampleID}.{strand}.bw", sampleID=sampleID, type=["","PCRtrack_","RTtrack_"],strand = ["pos","neg"])
+        if mask_bed!='':
+            masked=expand("Results/Masked_{sampleID}.{strand}.bedGraph", sampleID=sampleID, strand = ["pos","neg"])
+            return bigwigs+masked
+        return bigwigs
+    else :
+        bedgraphs=expand("Bedgraphs/{sampleID}/{ref}/{type}{sampleID}.{strand}.bedGraph", sampleID=sampleID, ref = [reference,spikeIn], type=["","PCRtrack_","RTtrack_"],strand = ["pos","neg"])
+        normalized=expand("Results/Normalized_{sampleID}.{strand}.bedGraph", sampleID=sampleID, strand = ["pos","neg"])
+        if mask_bed!='':
+            masked=expand("Results/Masked_{sampleID}.{strand}.bedGraph", sampleID=sampleID, strand = ["pos","neg"])
+            return bedgraphs+normalized+masked
+        return bedgraphs+normalized
+
+rule all:
+    input:
+        resultFiles(spikeIn, reference, sampleID),
+        "Stats/"+sampleID+".csv"
+
+rule create_link_fastq:
+    params:
+        file = config["file"],
+        out = WORKDIR+"/Data/" + sampleID + ".fastq." + config["compression"]
+    output:
+        temp("Data/" + sampleID + ".fastq." + config["compression"])
+    threads: 1
+    resources:
+        memory = 10,
+        time = 1
+    shell:
+        "ln -s {params.file} {params.out}"
+
+rule decompress:
+    input:
+        "Data/" + sampleID + ".fastq." + config["compression"]
+    output:
+        temp("TemporaryData/"+sampleID+".fastq")
+    params:
+        compression = config["compression"]
+    resources:
+        memory = 50,
+        time = 1
+    threads: 1
+    run:
+        if params.compression=="bz2":
+            shell("bunzip2 -c -k {input} > {output}")
+        elif params.compression=="gz":
+            shell("gunzip -c -k {input} > {output}")
+        elif params.compression=="no":
+            shell("cp {input} {output}")
+
+rule remove_adapter:
+    input:
+        "TemporaryData/"+sampleID+".fastq"
+    output:
+        temp("TemporaryData/"+sampleID+"_noAdapter.fastq")
+    params:
+        cutadapt=config["cutadapt"],
+        cutadapt_params=config["cutadapt_params"],
+        min_len=config["barcode_len"] + 10
+    resources:
+        memory = 100,
+        time = 4
+    threads:
+        30
+    shell:
+        "{params.cutadapt} {params.cutadapt_params} -m {params.min_len} -j {threads} -o {output} {input}"
+
+rule fastqc:
+    input:
+        "TemporaryData/"+sampleID+"_noAdapter.fastq"
+    output:
+        "QC/"+sampleID+"_noAdapter_fastqc.html"
+    threads:
+        1
+    params:
+        fastqc=config["fastqc"],
+        outdir=WORKDIR+"/QC/"
+    shell:
+        "{params.fastqc} --outdir={params.outdir} -i {input}"
+
+rule collapse_reads:
+    input:
+        "TemporaryData/"+sampleID+"_noAdapter.fastq"
+    output:
+        "TemporaryData/"+sampleID+"_collapsed.txt"
+    params:
+        starcode=config["starcode"],
+        starcode_params=config["starcode_params"]
+    resources:
+        memory = 800,
+        time = 8
+    threads:
+        10
+    shell:
+        "{params.starcode} {params.starcode_params} -t {threads} -o {output} -i {input}"
+
+rule barcode_dictionary:
+    input:
+        "TemporaryData/"+sampleID+"_collapsed.txt"
+    output:
+        "TemporaryData/"+sampleID+"_barcodes.pickle",
+        temp("TemporaryData/"+sampleID+"_noBC.fasta")
+    resources:
+        memory = 400,
+        time = 4
+    threads:
+        1
+    params:
+        barcode_len=config["barcode_len"]
+    script:
+        "source/createBarcodeDictionary.py"
+
+rule map_reads:
+    input:
+        reads="TemporaryData/"+sampleID+"_noBC.fasta"
+    output:
+        temp("TemporaryData/Mapping/"+sampleID+"_Aligned.out.bam")
+    params:
+        input="TemporaryData/"+sampleID+"_noBC.fasta",
+        star=config["star"],
+        params=config["star_params"],
+        star_index=config["star_index"],
+        output_prefix="TemporaryData/Mapping/"+sampleID+"_"
+    resources:
+        memory = 200,
+        time = 4
+    threads:
+        50
+    shell:
+        "{params.star} --outSAMtype BAM Unsorted --genomeDir {params.star_index} --readFilesIn {params.input} --parametersFiles {params.params} --runThreadN {threads} --outFileNamePrefix {params.output_prefix}"
+
+rule sort_bam:
+    input:
+        "TemporaryData/Mapping/"+sampleID+"_Aligned.out.bam"
+    output:
+        "TemporaryData/Mapping/"+sampleID+"_Aligned.sorted.bam"
+    params:
+        samtools=config["samtools"],
+    resources:
+        memory = 200,
+        time = 4
+    threads:
+        10
+    shell:
+        "{params.samtools} sort -@ {threads} -o {output} {input}"
+
+rule parse_positions:
+    input:
+        "TemporaryData/"+sampleID+"_barcodes.pickle",
+        "TemporaryData/Mapping/"+sampleID+"_Aligned.sorted.bam"
+    output:
+        "TemporaryData/"+sampleID+"_posDict.pickle"
+    params:
+        barcode_len=config["barcode_len"],
+    resources:
+        memory = 300,
+        time = 4
+    threads:
+        1
+    script:
+        "source/parsePositions.py"
+
+rule create_bedgraphs:
+    input:
+        "TemporaryData/"+sampleID+"_posDict.pickle",
+    output:
+        "Bedgraphs/"+sampleID+"/"+sampleID+"_allReads.pos.bedGraph",
+        "Bedgraphs/"+sampleID+"/"+sampleID+"_allReads.neg.bedGraph",
+        "Bedgraphs/"+sampleID+"/"+sampleID+"_noPCR.pos.bedGraph",
+        "Bedgraphs/"+sampleID+"/"+sampleID+"_noPCR.neg.bedGraph",
+        "Bedgraphs/"+sampleID+"/"+sampleID+"_noPCR_noSI.pos.bedGraph",
+        "Bedgraphs/"+sampleID+"/"+sampleID+"_noPCR_noSI.neg.bedGraph",
+        "Bedgraphs/"+sampleID+"/"+"PCRtrack_"+sampleID+".pos.bedGraph",
+        "Bedgraphs/"+sampleID+"/"+"PCRtrack_"+sampleID+".neg.bedGraph",
+        "Bedgraphs/"+sampleID+"/"+"RTtrack_"+sampleID+".pos.bedGraph",
+        "Bedgraphs/"+sampleID+"/"+"RTtrack_"+sampleID+".neg.bedGraph",
+        "Bedgraphs/"+sampleID+"/"+sampleID+".pos.bedGraph",
+        "Bedgraphs/"+sampleID+"/"+sampleID+".neg.bedGraph"
+    params:
+        annotation=config["annotation"],
+        genome=config["genome_fa"],
+        barcode_len=config["barcode_len"],
+        barcode_linker=config["barcode_linker"],
+        threshold=0.05
+    resources:
+        memory = 300,
+        time = 4
+    threads:
+        1
+    script:
+        "source/createBedgraphs.py"
+
+rule create_stats:
+    input:
+        "TemporaryData/"+sampleID+".fastq",
+        "Bedgraphs/"+sampleID+"/"+sampleID+"_allReads.pos.bedGraph",
+        "Bedgraphs/"+sampleID+"/"+sampleID+"_allReads.neg.bedGraph",
+        "Bedgraphs/"+sampleID+"/"+sampleID+"_noPCR.pos.bedGraph",
+        "Bedgraphs/"+sampleID+"/"+sampleID+"_noPCR.neg.bedGraph",
+        "Bedgraphs/"+sampleID+"/"+sampleID+"_noPCR_noSI.pos.bedGraph",
+        "Bedgraphs/"+sampleID+"/"+sampleID+"_noPCR_noSI.neg.bedGraph"
+    output:
+        "Stats/"+sampleID+".csv",
+        "Stats/Number_"+sampleID+".pdf",
+        "Stats/Percent_"+sampleID+".pdf"
+    params:
+        config["stats_bed"],
+        config["prefix"],
+        config["mask_downstream"]
+    resources:
+        memory = 100,
+        time = 10,
+        temp_memory = 100
+    threads:
+        1
+    script:
+        "source/createStats.py"
+
+rule create_bw:
+    input:
+        bg = "Bedgraphs/"+sampleID+"/{X}.bedGraph"
+    output:
+        bw = "BigWig/"+sampleID+"/{X}.bw"
+    resources:
+        memory = 100,
+        time = 1
+    params:
+        bedGraphToBigWig = config["bedGraphToBigWig"],
+        chrLen = config["star_index"] + "chrNameLength.txt"
+    threads:
+        1
+    shell:
+        """
+        sort -k1,1 -k2,2n {input.bg} > {input.bg}.temp
+    	{params.bedGraphToBigWig} {input.bg}.temp {params.chrLen} {output.bw}
+        rm -f {input.bg}.temp
+        """
+
+rule splitBedgraph:
+    params:
+        prefix = config["prefix"],
+    input:
+        bg = "Bedgraphs/"+sampleID+"/{X}.bedGraph"
+    output:
+        reference_bg = "Bedgraphs/"+sampleID+"/"+reference+"/{X}.bedGraph",
+        spikeIn_bg = "Bedgraphs/"+sampleID+"/"+spikeIn+"/{X}.bedGraph"
+    resources:
+        memory = 10,
+        time = 1
+    shell:
+        """
+        grep -v {params.prefix} {input.bg} > {output.reference_bg}
+        grep {params.prefix} {input.bg} | sed "s/^{params.prefix}//g" > {output.spikeIn_bg}
+        """
+
+rule normalize_with_spikeIn:
+    input:
+        local_spike=expand("Bedgraphs/"+sampleID+"/"+spikeIn+"/"+sampleID+".{strand}.bedGraph", strand = ["pos","neg"]),
+        local_sample="Bedgraphs/"+sampleID+"/"+reference+"/{X}.bedGraph"
+    output:
+        "Results/Normalized_{X}.bedGraph",
+        "Results/factor_{X}.txt"
+    resources:
+        memory = 100,
+        time = 1
+    threads:
+        1
+    script:
+        "source/normalizeSpikeIn.py"
+
+rule maskRegions:
+    input:
+        "Bedgraphs/"+sampleID+"/"+reference+"/{X}.bedGraph"
+    output:
+        "Results/Masked_{X}.bedGraph"
+    resources:
+        memory = 100,
+        time = 1
+    threads:
+        1
+    params:
+        config["mask_bed"] if config["mask_bed"]!="" else ""
+    script:
+        "source/maskRegion.py"
diff --git a/config_example.yaml b/config_example.yaml
new file mode 100644
index 0000000..e479ad0
--- /dev/null
+++ b/config_example.yaml
@@ -0,0 +1,52 @@
+
+# sample ids
+sampleID : test
+
+# InputFile:
+file: /project/ReadThrough/HiS-NET-Seq_pipeline/data/test/test.fastq.gz
+
+reference: GRCh38.p12
+spikeIn: GRCm38.p6
+prefix: Mus_musculus_
+
+# compression type, gz or bz2
+compression: gz
+
+barcode_len: 10
+
+barcode_linker: CTGTAGGCACCATCAAT
+
+# star path
+star : /project/owlmayer/Applications/STAR-2.7.3a/bin/Linux_x86_64/STAR
+
+# star index path
+star_index : /project/ReadThrough/HiS-NET-Seq_pipeline/data/GRCh38_p12_GRCm38_p6
+
+star_params: /project/ReadThrough/HiS-NET-Seq_pipeline/data/Parameters.in
+
+annotation: /project/ReadThrough/HiS-NET-Seq_pipeline/data/gencode.v28.vM18.primary_assembly.annotation.gff3
+
+genome_fa: /project/ReadThrough/HiS-NET-Seq_pipeline/data/GRCh38.p12.GRCm38.p6.primary_assembly.genome.fa
+
+#FastQC
+fastqc : /usr/local/package/bin/fastqc
+
+# samtools
+samtools : /usr/local/package/bin/samtools
+
+starcode: /project/owlmayer/Applications/starcode/starcode
+
+starcode_params: "-d 0"
+
+cutadapt: /usr/local/package/bin/cutadapt
+
+cutadapt_params: "-a ATCTCGTATGCCGTCTTCTGCTTG -a AAAAAAAAAAGGGGGGGGGGGGGG -a GGGGGGGGGGGGGGGGGGGGGGG -e 0.2 -q 5 --max-n 0.9"
+
+bedGraphToBigWig : /usr/local/package/bin/bedGraphToBigWig
+
+mask_bed: /project/ReadThrough/HiS-NET-Seq_pipeline/data/masked.bed
+
+star_params_unmapped: /project/ReadThrough/HiS-NET-Seq_pipeline/data/Parameters.unmappedin
+stats_bed: /project/ReadThrough/HiS-NET-Seq_pipeline/data/maskingClasses.bed
+
+mask_downstream: 300
diff --git a/createConfig.sh b/createConfig.sh
new file mode 100644
index 0000000..54f816a
--- /dev/null
+++ b/createConfig.sh
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+
+SOURCE=${BASH_SOURCE[0]}
+DIR=$( dirname "$SOURCE" )
+DIR_REALPATH=`realpath $DIR`
+
+sampleID=$1
+fastq=`realpath $2`
+config=$3
+
+STAR=`which STAR`
+STAR_realpath=`realpath $STAR`
+
+fastqc=`which fastqc`
+fastqc_realpath=`realpath $fastqc`
+
+samtools=`which samtools`
+samtools_realpath=`realpath $samtools`
+
+starcode=`which starcode`
+starcode_realpath=`realpath $starcode`
+
+cutadapt=`which cutadapt`
+cutadapt_realpath=`realpath $cutadapt`
+
+bedGraphToBigWig=`which bedGraphToBigWig`
+bedGraphToBigWig_realpath=`realpath $bedGraphToBigWig`
+
+
+echo "
+# sample ids
+sampleID : $sampleID
+
+# InputFile:
+file: $fastq
+
+reference: GRCh38.p12
+spikeIn: GRCm38.p6
+prefix: Mus_musculus_
+
+# compression type, gz or bz2
+compression: gz
+
+barcode_len: 10
+
+barcode_linker: CTGTAGGCACCATCAAT
+
+# star path
+star : $STAR_realpath
+
+# star index path
+star_index : $DIR_REALPATH/data/GRCh38_p12_GRCm38_p6
+
+star_params: $DIR_REALPATH/data/Parameters.in
+
+annotation: $DIR_REALPATH/data/gencode.v28.vM18.primary_assembly.annotation.gff3
+
+genome_fa: $DIR_REALPATH/data/GRCh38.p12.GRCm38.p6.primary_assembly.genome.fa
+
+#FastQC
+fastqc : $fastqc_realpath
+
+# samtools
+samtools : $samtools_realpath
+
+starcode: $starcode_realpath
+
+starcode_params: \"-d 0\"
+
+cutadapt: $cutadapt_realpath
+
+cutadapt_params: \"-a ATCTCGTATGCCGTCTTCTGCTTG -a AAAAAAAAAAGGGGGGGGGGGGGG -a GGGGGGGGGGGGGGGGGGGGGGG -e 0.2 -q 5 --max-n 0.9\"
+
+bedGraphToBigWig : $bedGraphToBigWig
+
+mask_bed: $DIR_REALPATH/data/masked.bed
+
+star_params_unmapped: $DIR_REALPATH/data/Parameters.unmappedin
+stats_bed: $DIR_REALPATH/data/maskingClasses.bed
+
+mask_downstream: 300" > $config
diff --git a/data/GRCh38.p12.GRCm38.p6.primary_assembly.genome.fa b/data/GRCh38.p12.GRCm38.p6.primary_assembly.genome.fa
new file mode 120000
index 0000000..c36a190
--- /dev/null
+++ b/data/GRCh38.p12.GRCm38.p6.primary_assembly.genome.fa
@@ -0,0 +1 @@
+/project/owlmayerTemporary/owlmayer_Applications/01_NetSeqPipeline.v2.spikeIns/data/GRCh38.p12.GRCm38.p6.primary_assembly.genome.fa
\ No newline at end of file
diff --git a/data/GRCh38_p12_GRCm38_p6 b/data/GRCh38_p12_GRCm38_p6
new file mode 120000
index 0000000..1b9a218
--- /dev/null
+++ b/data/GRCh38_p12_GRCm38_p6
@@ -0,0 +1 @@
+/project/owlmayerTemporary/STAR/RNAseq_STAR-2.7.3a/GRCh38_p12_GRCm38_p6/
\ No newline at end of file
diff --git a/data/Parameters.in b/data/Parameters.in
new file mode 100755
index 0000000..71ae3f1
--- /dev/null
+++ b/data/Parameters.in
@@ -0,0 +1,480 @@
+#### all parameteres default are found at /groups/churchman/jd187/Program/STAR/STAR-STAR_2.4.0d/parametersDefault file ##############
+
+### versions
+#versionSTAR             020201
+#    int>0: STAR release numeric ID. Please do not change this value!
+#versionGenome           020101 020200
+#    int>0: oldest value of the Genome version compatible with this STAR release. Please do not change this value!
+
+### PARAMETERS
+#parametersFiles          -
+#    string: name of a user-defined parameters file, "-": none. Can only be defined on the command line.
+# provided at the command line level
+
+### RUN PARAMETERS
+#runMode                         alignReads
+#    string: type of the run:    alignReads      ... map reads
+#                                genomeGenerate  ... generate genome files
+#                                inputAlignmentsFromBAM    ... input alignments from BAM. Presently only works with --outWigType to generate wiggle files.
+
+#runThreadN                      1
+#    int: number of threads to run STAR
+# provided at the command line level in case we need to tweek the memory settings etc
+
+### GENOME PARAMETERS
+#genomeDir                   ./GenomeDir/
+#    string: path to the directory where genome files are stored (if runMode!=generateGenome) or will be generated (if runMode==generateGenome)
+# provided at the command line level, in case we wanna provide a diff genome (f.ex. built without Jct) without changing the param file
+
+#genomeLoad              NoSharedMemory
+#    mode of shared memory usage for the genome files
+#    string:        LoadAndKeep     ... load genome into shared and keep it in memory after run
+#                   LoadAndRemove   ... load genome into shared but remove it after run
+#                   LoadAndExit     ... load genome into shared memory and exit, keeping the genome in memory for future runs
+#                   Remove          ... do not map anything, just remove loaded genome from memory
+#                   NoSharedMemory  ... do not use shared memory, each job will have its own private copy of the genome
+
+### GENOME GENERATION PARAMETERS
+#genomeFastaFiles            -
+#    fasta files with genomic sequence for genome files generation, only used if runMode==genomeGenerate
+#    string(s): path(s) to the files from the working directory (separated by spaces)
+
+#genomeChrBinNbits           18
+#    int: =log2(chrBin), where chrBin is the size of the bins for genome storage: each chromosome will occupy an integer number of bins
+
+#genomeSAindexNbases         14
+#    int: length (bases) of the SA pre-indexing string. Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches.
+
+#genomeSAsparseD             1
+#    int>0: suffux array sparsity, i.e. distance between indices: use bigger numbers to decrease needed RAM at the cost of mapping speed reduction
+
+
+### INPUT FROM BAM
+#inputBAMfile                -
+#    string: path to BAM input file, to be used with --runMode inputAlignmentsFromBAM
+
+
+### READ PARAMETERS
+#readFilesIn                 Read1 Read2
+#    string(s): paths to files that contain input read1 (and, if needed,  read2)
+# given at the command line level as the read names is gonna depend on the sample name
+
+readFilesCommand        cat
+#    string(s): cmd line to execute for each of the input file. This command should generate FASTA or FASTQ text and send it to stdout
+#               For example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc.
+
+#readMapNumber               -1
+#    int: number of reads to map from the beginning of the file
+#                            -1: map all reads
+
+#readMatesLengthsIn          NotEqual
+#    string: Equal/NotEqual - lengths of names,sequences,qualities for both mates are the same  / not the same. NotEqual is safe in all situations.
+
+#clip3pNbases                 0
+#    int(s): number(s) of bases to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates.
+#
+#clip5pNbases                 0
+#    int(s): number(s) of bases to clip from 5p of each mate. If one value is given, it will be assumed the same for both mates.
+
+clip3pAdapterSeq           ATCTCGTATGCCGTCTTCTGCTTG
+#    string(s): adapter sequences to clip from 3p of each mate.  If one value is given, it will be assumed the same for both mates.
+
+clip3pAdapterMMp            0.21
+#    double(s): max proportion of mismatches for 3p adpater clipping for each mate. If one value is given, it will be assumed the same for both mates.
+#
+
+clip3pAfterAdapterNbases    1
+#    int(s): number of bases to clip from 3p of each mate after the adapter clipping. If one value is given, it will be assumed the same for both mates.
+
+
+
+### LIMITS
+#limitGenomeGenerateRAM               31000000000
+#    int>0: maximum available RAM (bytes) for genome generation
+
+limitIObufferSize                    200000000
+#    int>0: max available buffers size (bytes) for input/output, per thread
+
+#limitOutSAMoneReadBytes              100000
+#int>0: max size of the SAM record for one read. Recommended value: >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax)
+
+#limitOutSJoneRead                    1000
+#    int>0: max number of junctions for one read (including all multi-mappers)
+
+#limitOutSJcollapsed                  1000000
+#    int>0: max number of collapsed junctions
+
+limitBAMsortRAM                         64000000000
+#    int>=0: maximum available RAM for sorting BAM. If =0, it will be set to the genome index size
+
+
+### OUTPUT: GENERAL
+#outFileNamePrefix               ./
+#    string: output files name prefix (including full or relative path). Can only be defined on the command line.
+#   we define it on the command line as it is depending on the sample name
+
+#outTmpDir                       -
+#    string: path to a directory that will be used as temporary by STAR. All contents of this directory will be removed!
+#            - the temp directory will default to outFileNamePrefix_tmp
+
+#outStd                          Log
+#    string: which output will be directed to stdout (standard out)
+#      Log : log messages
+#      SAM : alignments in .sam format (which normally are output to Aligned.out.sam file), normal std output will go into Log.std.out
+#      BAM_Unsorted : alignments in BAM format, unsorted. Requires --outSAMtype BAM Unsorted
+#      BAM_SortedByCoordinate : alignments in BAM format, unsorted. Requires --outSAMtype BAM SortedByCoordinate
+#      BAM_Quant : alignments to transcriptome in BAM format, unsorted. Requires --quantMode TranscriptomeSAM
+
+outReadsUnmapped                Fastx
+#   string: output of unmapped reads (besides SAM)
+#                                None    : no output
+#                                Fastx   : output in separate fasta/fastq files, Unmapped.out.mate1/2
+
+#outQSconversionAdd              0
+#   int: add this number to the quality score (e.g. to convert from Illumina to Sanger, use -31)
+
+
+### OUTPUT: SAM/BAM
+outSAMtype                      BAM SortedByCoordinate
+#    strings: type of SAM/BAM output
+#                                1st word:
+#                                BAM : output BAM without sorting
+#                                SAM : output SAM without sorting
+#                                None : no SAM/BAM output
+#                                2nd, 3rd ...
+#                                Unsorted : standard unsorted
+#                                SortedByCoordinate : sorted by coordinate
+
+#outSAMmode                      Full
+#    string: mode of SAM output  None : no SAM output
+#                                Full : full SAM output
+#                                NoQS : full SAM but without quality scores
+
+#outSAMstrandField                               None
+#    string: Cufflinks-like strand field flag    None        : not used
+#                                                intronMotif : strand derived from the intron motif. Reads with inconsisent and/or non-canonical introns are filtered out.
+
+#If you have stranded RNA-seq data, you do not need to use any specific STAR options. Instead, you need
+#to run Cufflinks with the library option --library-type options. For example, cufflinks <…> -
+#library-type fr-firststrand should be used for the “standard” dUTP protocol. This option has
+#to be used only for Cufflinks runs and not for STAR runs.
+
+outSAMattributes                All
+#    string: a string of desired SAM attributes, in the order desired for the output SAM
+#                                NH HI AS nM NM MD jM jI XS
+#                                Standard    : NH HI AS nM
+#                                All         : NH HI AS nM NM MD jM jI
+#                                None
+
+
+#outSAMunmapped          None
+#    string: output of unmapped reads in the SAM format
+#                                None    : no output
+#                                Within  : output unmapped reads within the main SAM file (i.e. Aligned.out.sam)
+
+#outSAMorder                     Paired
+#    string: type of sorting for the SAM output
+#                                Paired: one mate after the other for all paired alignments
+#                                PairedKeepInputOrder: one mate after the other for all paired alignments, the order is kept the same as in the input FASTQ files
+
+#outSAMprimaryFlag               OneBestScore
+#    string: which alignments are considered primary - all others will be marked with 0x100 bit in the FLAG
+#                                OneBestScore  : only one alignment with the best score is primary
+#                                AllBestScore  : all alignments with the best score are primary
+
+#outSAMreadID                    Standard
+#    string: read ID record type
+#                                Standard  : first word (until space) from the FASTx read ID line, removing /1,/2 from the end
+#                                Number    : read number (index) in the FASTx file
+
+#outSAMmapqUnique        255
+#    int: 0 to 255: the MAPQ value for unique mappers
+
+#outSAMattrRGline        -
+#    string(s): SAM/BAM read group line. The first word contains the read group identifier and must start with "ID:",
+#            e.g. --outSAMattrRGline ID:xxx CN:yy "DS:z z z".
+#            xxx will be added as RG tag to each output alignment. Any spaces in the tag values have to be double quoted.
+#            Comma separated RG lines correspons to different (comma separated) input files in --readFilesIn.
+
+#outSAMheaderHD          -
+#    strings: @HD (header) line of the SAM header
+
+#outSAMheaderPG          -
+#    strings: extra @PG (software) line of the SAM header (in addition to STAR)
+#
+#outSAMheaderCommentFile -
+#    string: path to the file with @CO (comment) lines of the SAM header
+#
+#outBAMcompression   -1
+#    int: -1 ... 10  BAM compression level, -1=default compression, 0=no compression, 10=maximum compression
+#
+#bamRemoveDuplicatesType  -
+#    string: remove duplicates from BAM file, for now only works with sorted BAM feeded with inputBAMfile
+#           UniqueIdentical : removes all multimappers, and duplicate unique mappers. The coordinates, FLAG, CIGAR must be identical
+#
+#bamRemoveDuplicatesMate2basesN   0
+#    int>0: number of bases from the 5' of mate 2 to use in collapsing (e.g. for RAMPAGE)
+#
+
+### OUTPUT WIGGLE
+#outWigType          None
+#    string(s): type of signal output, e.g. "bedGraph" OR "bedGraph read1_5p"
+#                    1st word:
+#                    None
+#                    bedGraph
+#                    wiggle
+#                    2nd word:
+#                    read1_5p : signal from only 5' of the 1st read, useful for CAGE/RAMPAGE etc
+#                    read2 : signal from only 2nd read
+#
+#outWigStrand        Stranded
+#    string: strandedness of wigglle (bedGraph) output
+#                    Stranded: separate strands, str1 and str2
+#                    Unstranded: collapsed strands
+#
+#outWigReferencesPrefix    -
+#    string: prefix matching reference names to include in the output wiggle file, e.g. "chr.*"
+#    default: "-" - include all references
+#
+#outWigNorm              RPM
+#    string: type of normalization for the signal
+#                        RPM : reads per million of mapped reads
+#                        None : no normalization, "raw" counts
+#
+
+
+### OUTPUT FILTERING
+#outFilterType           Normal
+#    string: type of filtering
+#                                Normal: standard filtering using only current alignment
+#                                BySJout: keep only those reads that contain junctions that passed filtering into SJ.out.tab
+
+#outFilterMultimapScoreRange     1
+#    int: the score range below the maximum score for multimapping alignments.
+# as -10*log10(1-1/2) = MapQual for read mapping twice = 3.013, 3.013 -range 3
+# keep all multi aligned reads (at least up to 100 times as -10*log10(1-1/100) =
+# 0.04364805)... I think... Not sure I got it right though...
+
+outFilterMultimapNmax   1
+#    int: read alignments will be output only if the read maps fewer than this value, otherwise no alignments will be output
+
+#outFilterMismatchNmax   10
+#    int: alignment will be output only if it has fewer mismatches
+
+#outFilterMismatchNoverLmax  0.3
+#    int: alignment will be output only if its ratio of mismatches to *mapped* length is less than this value; to make it consistent with parmater used with TH
+
+#outFilterMismatchNoverReadLmax  1
+#    int: alignment will be output only if its ratio of mismatches to *read* length is less than this value
+
+#outFilterScoreMin       0
+#    int: alignment will be output only if its score is higher than this value
+
+#outFilterScoreMinOverLread  0.66
+#        float: outFilterScoreMin normalized to read length (sum of mates' lengths for paired-end reads)
+
+#outFilterMatchNmin      0
+#    int: alignment will be output only if the number of matched bases is higher than this value; to make it consistent with parmater used with TH
+
+#outFilterMatchNminOverLread 0.66
+#    float: outFilterMatchNmin normalized to read length (sum of mates' lengths for paired-end reads)
+# I guess it has something to do with soft clipping ? I don't know, so I let the default value...
+
+#outFilterIntronMotifs   None
+#    string: filter alignment using their motifs
+#           None : no filtering
+#           RemoveNoncanonical : filter out alignments that contain non-canonical junctions
+#           RemoveNoncanonicalUnannotated : filter out alignments that contain non-canonical unannotated junctions when using annotated
+#                                           splice junctions database. The annotated non-canonical junctions will be kept.
+
+### OUTPUT FILTERING: SPLICE JUNCTIONS
+#outSJfilterReads        All
+#    string: which reads to consider for collapsed splice junctions output
+#                All: all reads, unique- and multi-mappers
+#                Unique: uniquely mapping reads only
+
+outSJfilterOverhangMin  3  1  1  1
+#    4*int:    minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG motif,
+#                       (3) GC/AG motif, (4) AT/AC motif. -1 means no output for that motif does not apply to annotated junctions
+
+#outSJfilterCountUniqueMin   3   1   1   1
+#    4*int: minimum uniquely mapping read count per junction for: (1) non-canonical motifs, (2) GT/AG motif, (3) GC/AG motif,
+#           (4) AT/AC motif. -1 means no output for that motif. Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied does not apply to annotated junctions
+
+#outSJfilterCountTotalMin    3   1   1   1
+#    4*int: minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG motif, (3) GC/AG motif, (4) AT/AC motif. -1 means no output for that motif Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied does not apply to annotated junctions
+
+outSJfilterDistToOtherSJmin 0  0   0   0
+#    4*int>=0: minimum allowed distance to other junctions' donor/acceptor
+#                                does not apply to annotated junctions
+
+#outSJfilterIntronMaxVsReadN        50000 100000 200000
+#    N*int>=0: maximum gap allowed for junctions supported by 1,2,3...N reads
+#              i.e. by default junctions supported by 1 read can have gaps <=50000b, by 2 reads: <=100000b, by 3 reads: <=200000. by >=4
+#              reads any gap <=alignIntronMax does not apply to annotated junctions
+
+### SCORING
+#scoreGap                     0
+#    gap open penalty
+
+#scoreGapNoncan               -8
+#    non-canonical gap open penalty (in addition to scoreGap)
+
+#scoreGapGCAG                 -4
+#    GCAG gap open penalty (in addition to scoreGap)
+
+#scoreGapATAC                 -8
+#    ATAC gap open penalty  (in addition to scoreGap)
+
+#scoreGenomicLengthLog2scale   -0.25
+#    extra score logarithmically scaled with genomic length of the alignment: -scoreGenomicLengthLog2scale*log2(genomicLength)
+
+#scoreDelOpen                 -2
+#    deletion open penalty
+
+#scoreDelBase                 -2
+#    deletion extension penalty per base (in addition to scoreDelOpen)
+
+#scoreInsOpen                 -2
+#    insertion open penalty
+
+#scoreInsBase                 -2
+#    insertion extension penalty per base (in addition to scoreInsOpen)
+
+#scoreStitchSJshift           1
+#    maximum score reduction while searching for SJ boundaries inthe stitching step
+
+
+### ALIGNMENT and SEED PARAMETERS
+#seedSearchStartLmax          50
+#    int>0: defines the search start point through the read - the read is split into pieces no longer than this value
+
+#seedSearchStartLmaxOverLread    1.0
+#    float: seedSearchStartLmax normalized to read length (sum of mates' lengths for paired-end reads)
+
+#seedSearchLmax          0
+#    int>=0: defines the maximum length of the seeds, if =0 max seed lengthis infinite
+
+#seedMultimapNmax        10000
+#    int>0: only pieces that map fewer than this value are utilized in the stitching procedure
+
+#seedPerReadNmax         1000
+#    int>0: max number of seeds per read
+
+#seedPerWindowNmax       50
+#    int>0: max number of seeds per window
+
+#seedNoneLociPerWindow   10
+#    int>0: max number of one seed loci per window
+
+alignIntronMin          11
+#    minimum intron size: genomic gap is considered intron if its length>=alignIntronMin, otherwise it is considered Deletion
+# modified specifically for tRNA introns
+
+#alignIntronMax          0
+#    maximum intron size, if 0, max intron size will be determined by (2^winBinNbits)*winAnchorDistNbins
+
+#alignMatesGapMax            0
+#    maximum gap between two mates, if 0, max intron gap will be determined by (2^winBinNbits)*winAnchorDistNbins
+
+#alignSJoverhangMin          5
+#    int>0: minimum overhang (i.e. block size) for spliced alignments
+
+#alignSJDBoverhangMin        3
+#    int>0: minimum overhang (i.e. block size) for annotated (sjdb) spliced alignments
+
+#alignSplicedMateMapLmin     0
+#    int>0: minimum mapped length for a read mate that is spliced
+
+#alignSplicedMateMapLminOverLmate 0.66
+#    float>0: alignSplicedMateMapLmin normalized to mate length
+
+#alignWindowsPerReadNmax     10000
+#    int>0: max number of windows per read
+
+#alignTranscriptsPerWindowNmax   100
+#    int>0: max number of transcripts per window
+
+#alignTranscriptsPerReadNmax     10000
+#    max number of different alignments per read to consider
+
+alignEndsType           EndToEnd
+#    string: type of read ends alignment
+#                        Local   : standard local alignment with soft-clipping allowed
+#                        EndToEnd: force end-to-end read alignment, do not soft-clip
+
+### SPLICE JUNCTIONS DATABASE PARAMETERS
+#sjdbFileChrStartEnd                     -
+#    string: path to the file with genomic coordinates (chr <tab> start <tab> end <tab> strand) for the introns
+# we will provide it in the command line directly, as the  path is gonna depend on the sample
+# NO: The annotations can be supplied in the form of splice junctions’ loci or GTF (or GFF3) file AND ipmortantly The annotations have to be supplied at the genome/suffix array generation step
+
+#sjdbGTFfile                             -
+#    string: path to the GTF file with annotations
+# we will provide it in the command line directly
+
+#sjdbGTFchrPrefix                        -
+#    string: prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL annotations with UCSC geneomes)
+
+#sjdbGTFfeatureExon                      exon
+#    string: feature type in GTF file to be used as exons for building transcripts
+
+#sjdbGTFtagExonParentTranscript          transcript_id
+#    string: tag name to be used as exons' parents for building transcripts
+
+#sjdbOverhang                            0
+#    int>=0: length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1)
+#            if =0, splice junction database is not used
+
+#sjdbScore                               2
+#    int: extra alignment score for alignmets that cross database junctions
+
+### WINDOWS, ANCHORS, BINNING
+#winAnchorMultimapNmax           50
+#    int>0: max number of loci anchors are allowed to map to
+
+#winBinNbits                     16
+#    int>0: =log2(winBin), where winBin is the size of the bin for the windows/clustering,each window will occupy an integer nb of bins.
+
+#winAnchorDistNbins              9
+#    int>0: max number of bins between two anchors that allows aggregation of anchors into one window
+
+#winFlankNbins               4
+#    int>0: log2(winFlank), where win Flank is the size of the left and right flanking regions for each window
+
+
+### CHIMERIC ALIGNMENTS
+#chimSegmentMin              0
+##    int>0: minimum length of chimeric segment length, if ==0, no chimeric output
+
+#chimScoreMin                0
+##    int>0: minimum total (summed) score of the chimeric segments
+
+#chimScoreDropMax            20
+##    int>0: max drop (difference) of chimeric score (the sum of scores of all chimeric segements) from the read length
+
+#chimScoreSeparation         10
+##    int>0: minimum difference (separation) between the best chimeric score and the next one
+
+#chimScoreJunctionNonGTAG    -1
+##    int: penalty for a non-GT/AG chimeric junction
+
+#chimJunctionOverhangMin     20
+##    int>0: minimum overhang for a chimeric junction
+
+
+### QUANTIFICATION OF ANNOTATIONS
+#quantMode                   -
+#    string(s): types of qunatification requested
+#                            - : None
+#                            TranscriptomeSAM : output SAM/BAM alignments to transcriptome into a separate file
+
+
+#### 2-PASS
+#twopass1readsN              0
+#    int>0: number of reads to process for the 1st step. 0 : 1-step only, no 2nd pass; use very large number to map all reads in the first step
+#
+#twopassSJlimit              1000000
+#    int>=0: maximum number of junction detected in the 1st step
+#
+
diff --git a/data/Parameters.unmapped.in b/data/Parameters.unmapped.in
new file mode 120000
index 0000000..a696866
--- /dev/null
+++ b/data/Parameters.unmapped.in
@@ -0,0 +1 @@
+/project/owlmayer/Applications/21_NETseqStats/config/Parameters.in
\ No newline at end of file
diff --git a/data/gencode.v28.vM18.primary_assembly.annotation.gff3 b/data/gencode.v28.vM18.primary_assembly.annotation.gff3
new file mode 120000
index 0000000..94363ee
--- /dev/null
+++ b/data/gencode.v28.vM18.primary_assembly.annotation.gff3
@@ -0,0 +1 @@
+/project/owlmayerTemporary/owlmayer_Applications/01_NetSeqPipeline.v2.spikeIns/data/gencode.v28.vM18.primary_assembly.annotation.gff3
\ No newline at end of file
diff --git a/data/masked.bed b/data/masked.bed
new file mode 120000
index 0000000..1a6c41d
--- /dev/null
+++ b/data/masked.bed
@@ -0,0 +1 @@
+/project/owlmayer/Applications/04_Pausing_from_NETseq/Snakemake/Data/masked.bed
\ No newline at end of file
diff --git a/data/maskingClasses.bed b/data/maskingClasses.bed
new file mode 120000
index 0000000..866e816
--- /dev/null
+++ b/data/maskingClasses.bed
@@ -0,0 +1 @@
+/project/owlmayerTemporary/Assembly/Human/GRCh38_p12/Non-nascent/maskingFiles/maskingClasses.bed
\ No newline at end of file
diff --git a/data/test/test.fastq.gz b/data/test/test.fastq.gz
new file mode 100644
index 0000000..86f385e
Binary files /dev/null and b/data/test/test.fastq.gz differ
diff --git a/runs.sh b/runs.sh
new file mode 100644
index 0000000..5af6fc1
--- /dev/null
+++ b/runs.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+SOURCE=${BASH_SOURCE[0]}
+DIR=$( dirname "$SOURCE" )
+
+############# create config test file for your localenvironment, please adjust for custom data
+
+bash createConfig.sh "test" $DIR/data/test/test.fastq.gz $DIR/config.yaml
+
+############
+config=$DIR/config.yaml
+dr=output
+cores=5
+mkdir -p $dr
+
+snakemake -c$cores --directory $dr --configfile $config
+
diff --git a/source/createBarcodeDictionary.py b/source/createBarcodeDictionary.py
new file mode 100755
index 0000000..1663f42
--- /dev/null
+++ b/source/createBarcodeDictionary.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+import pickle
+
+bLen=snakemake.params[0]
+d={}
+print(bLen)
+with open(snakemake.input[0],'r') as txt:
+    for line in txt:
+        counts=int(line.strip().split()[1])
+        ln=line.strip().split()[0]
+        r=ln[bLen:].strip()
+        if r in d:
+            if ln[:bLen] in d[r]:
+                d[r][ln[:bLen]]+=counts
+            else:
+                d[r][ln[:bLen]]=counts
+        else:
+            d[r]={ln[:bLen]:counts}
+
+i=1
+sd={}
+with open(snakemake.output[1],'w') as f:
+    for key in d:
+        sd[i]=d[key]
+        f.write('>'+str(i)+'\n')
+        f.write(key+'\n')
+        i+=1
+
+with open(snakemake.output[0],'wb') as f:
+    pickle.dump(sd,f,pickle.HIGHEST_PROTOCOL)
diff --git a/source/createBedgraphs.py b/source/createBedgraphs.py
new file mode 100755
index 0000000..9c7bd65
--- /dev/null
+++ b/source/createBedgraphs.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python
+import pickle
+import pandas as pd
+import numpy as np
+from Bio import SeqIO
+
+def createSI(fn):
+    '''
+    Parameters
+    ----------
+    fn : string
+        Path to the file with genome annotation.
+
+    Returns
+    -------
+    d : set of tuples (string,int)
+        Set of splicing intermediates coordinates in form: (chromosome, position).
+        The sign of the position indicates strand.
+    '''
+    d=[]
+    with open(fn,'r') as a:
+        for ln in a:
+            if ln[0]!='#':
+                l=ln.strip().split()
+                if l[2]=='exon':
+                    if l[6]=='+':
+                        d.append((l[0],int(l[4]))) # 3'end of exon
+                        d.append((l[0],int(l[3])-1)) # 3'end of intron
+                    else:
+                        d.append((l[0],-1*(int(l[3])-1))) # 3'end of exon
+                        d.append((l[0],-1*int(l[4]))) # 3'end of intron
+    d=set(d)
+    return d
+
+annotation=snakemake.params[0]
+SI=createSI(annotation)
+genomefn=snakemake.params[1]
+genome=SeqIO.index(genomefn,"fasta")
+#barcode length
+bLen=snakemake.params[2]
+#barcode linker sequence
+barcode_linker=snakemake.params[3]
+lLen=len(barcode_linker)
+d=pickle.load(open(snakemake.input[0],'rb'))
+#rt threshold
+threshold=snakemake.params[4]
+
+
+chrm=''
+reads={}
+reads_noPCR={}
+info=[]
+for line in d:
+    if line[0]!=chrm:
+        chrm=line[0]
+        chrom_sequence=genome[chrm].seq[:]
+    if line[1]>0:
+        bseq=str(chrom_sequence[line[1]:line[1]+bLen].reverse_complement())
+        lseq=str(chrom_sequence[line[1]+bLen:line[1]+bLen+lLen])
+    else:
+        bseq=str(chrom_sequence[-line[1]-bLen:-line[1]])
+        lseq=str(chrom_sequence[-line[1]-bLen-lLen:-line[1]-bLen].reverse_complement())
+    stats=np.zeros(bLen+1,dtype=np.int64)
+    stats_noPCR=np.zeros(bLen+1,dtype=np.int64)
+    for barcode in d[line]:
+        s=sum([a0==b0 for a0,b0 in zip(bseq,barcode)]) +barcode.count('N')
+        stats[s]+=d[line][barcode]
+        stats_noPCR[s]+=1
+    reads[line]=stats
+    reads_noPCR[line]=stats_noPCR
+    info.append([line[0],line[1],bseq,lseq])
+
+all_pos=snakemake.output[0]
+all_neg=snakemake.output[1]
+noPCR_pos=snakemake.output[2]
+noPCR_neg=snakemake.output[3]
+noPCR_noSI_pos=snakemake.output[4]
+noPCR_noSI_neg=snakemake.output[5]
+PCRtrack_pos=snakemake.output[6]
+PCRtrack_neg=snakemake.output[7]
+RTtrack_pos=snakemake.output[8]
+RTtrack_neg=snakemake.output[9]
+noPCR_noSI_noRT_pos=snakemake.output[10]
+noPCR_noSI_noRT_neg=snakemake.output[11]
+with open(all_pos,'w') as p, open(all_neg,'w') as n, \
+    open(noPCR_pos,'w') as ppcrfree, open(noPCR_neg,'w') as npcrfree, \
+        open(noPCR_noSI_pos,'w') as pold, open(noPCR_noSI_neg,'w') as nold, \
+            open(PCRtrack_pos,'w') as ppcr, open(PCRtrack_neg,'w') as npcr, \
+                open(RTtrack_pos,'w') as prt, open(RTtrack_neg,'w') as nrt, \
+                    open(noPCR_noSI_noRT_pos,'w') as pfinal, open(noPCR_noSI_noRT_neg,'w') as nfinal:
+                        for line in reads:
+                            if line[1]>0:
+                                position=line[0]+'\t'+str(line[1]-1)+'\t'+str(line[1])+'\t'
+                                p.write(position+str(reads[line].sum())+'\n') #all reads mapping to the position
+                                ppcrfree.write(position+str(reads_noPCR[line][:-1].sum())+'\n') #all reads with UMI mapping to the position (once per UMI)
+                                prt.write(position+str(100*reads[line][-1]/reads[line].sum())+'\n') #pct of rt reads at the position
+                                ppcr.write(position+str(100-100*reads_noPCR[line].sum()/reads[line].sum())+'\n') #pct of pcr duplication at the position
+                                if line not in SI:
+                                    pold.write(position+str(reads_noPCR[line][:-1].sum())+'\n')
+                                    if reads[line][-1]/reads[line].sum()< threshold:
+                                        pfinal.write(position+str(reads_noPCR[line][:-1].sum())+'\n')
+                            else:
+                                position=line[0]+'\t'+str(-line[1])+'\t'+str(-line[1]+1)+'\t'
+                                n.write(position+str(reads[line].sum())+'\n')
+                                npcrfree.write(position+str(reads_noPCR[line][:-1].sum())+'\n')
+                                nrt.write(position+str(100*reads[line][-1]/reads[line].sum())+'\n')
+                                npcr.write(position+str(100-100*reads_noPCR[line].sum()/reads[line].sum())+'\n')
+                                if line not in SI:
+                                    nold.write(position+str(reads_noPCR[line][:-1].sum())+'\n')
+                                    if reads[line][-1]/reads[line].sum()< threshold:
+                                        nfinal.write(position+str(reads_noPCR[line][:-1].sum())+'\n')
diff --git a/source/createSequences.py b/source/createSequences.py
new file mode 100755
index 0000000..84f98ba
--- /dev/null
+++ b/source/createSequences.py
@@ -0,0 +1,14 @@
+#!/usr/bin/env python
+bLen=snakemake.params[0]
+d=[]
+with open(snakemake.input[0],'r') as txt:
+    for line in txt:
+        ln=line.strip().split()[0]
+        d.append(ln[bLen:].strip())
+
+d=list(set(d))
+with open(snakemake.output[0],'w') as f:
+    for i in range(len(d)): 
+        f.write('>'+str(i)+'\n')
+        f.write(d[i]+'\n')
+
diff --git a/source/createStats.py b/source/createStats.py
new file mode 100755
index 0000000..a64c81a
--- /dev/null
+++ b/source/createStats.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import pandas as pd
+import pybedtools
+import matplotlib.pylab as plt
+
+fq=snakemake.input[0]
+strands={"pos":"+","neg":"-"}
+d={"pos":{},"neg":{}}
+d["pos"]["all"]=snakemake.input[1]
+d["neg"]["all"]=snakemake.input[2]
+d["pos"]["noPCR"]=snakemake.input[3]
+d["neg"]["noPCR"]=snakemake.input[4]
+d["pos"]["noPCR_noSI"]=snakemake.input[5]
+d["neg"]["noPCR_noSI"]=snakemake.input[6]
+
+mfile=snakemake.params[0]
+prefix=snakemake.params[1]
+downstream=snakemake.params[2]
+
+b={"pos":{},"neg":{}}
+for s in d.keys():
+    for c in d[s].keys():
+        b[s][c]=pybedtools.BedTool(d[s][c])
+        
+masking=pd.read_csv(mfile,sep='\t',header=None)
+masking[1]=masking.apply(lambda x: x[1]-downstream if (x[5]=='-' and x[0]!='chrM') else x[1],1)
+masking[2]=masking.apply(lambda x: x[2]+downstream if x[5]=='+' else x[2],1)
+
+classes=['PolM','PolI','PolIII','snRNA','snoRNA','miRNA']
+stats=pd.DataFrame(0, index=["unmapped","allMapped"]+classes+["spikeIns","informative\nReads"], columns=["all","noPCR","noPCR_noSI"])
+
+#allReads
+l=0
+with open(fq,'r') as f:
+    for ln in f:
+        l+=1
+allReads=l/4
+
+#allMapped
+for s in b.keys():
+    for t in b[s].keys():
+        temp_df=b[s][t].to_dataframe()
+        stats.loc["allMapped"][t]+=temp_df.apply(lambda x: x['name']*(x['end']-x['start']),1).sum()
+
+#classes
+for c in classes:
+    for s in b.keys():
+        for t in b[s].keys():
+            temp_masking=masking[masking[3]==c]
+            temp_masking=temp_masking[temp_masking[5]==strands[s]]
+            masking_bed=pybedtools.BedTool.from_dataframe(temp_masking)
+            masking_bed=masking_bed.merge(s=True,c=[4,5,6],o='distinct') 
+            temp_bed=b[s][t].intersect(masking_bed)
+            temp_df=temp_bed.to_dataframe()
+            stats.loc[c][t]+=temp_df.apply(lambda x: x['name']*(x['end']-x['start']),1).sum()
+            b[s][t]=b[s][t].subtract(masking_bed)
+   
+#spikeins and rest
+for s in b.keys():
+    for t in b[s].keys():
+        temp_df=b[s][t].to_dataframe()
+        spikein_df=temp_df[temp_df['chrom'].str.startswith(prefix)]
+        stats.loc["spikeIns"][t]+=spikein_df.apply(lambda x: x['name']*(x['end']-x['start']),1).sum()
+        temp_df=temp_df[~temp_df['chrom'].str.startswith(prefix)]
+        stats.loc["informative\nReads"][t]+=temp_df.apply(lambda x: x['name']*(x['end']-x['start']),1).sum()
+        
+
+stats['PCR duplicates']=stats['all']-stats["noPCR"]
+stats['Splicing Intermediates']=stats['noPCR']-stats["noPCR_noSI"]   
+stats['Unique']=stats["noPCR_noSI"] 
+stats.loc["unmapped"]["Unique"]=allReads-stats.loc["allMapped"]["all"]
+ 
+totals=stats[["Unique","Splicing Intermediates","PCR duplicates"]]
+totals=totals[totals.index!="allMapped"]
+percents=totals*100/allReads
+
+totals.to_csv(snakemake.output[0],sep='\t')
+
+totals.plot.bar(stacked=True)
+plt.ylabel("Number of reads")
+plt.tight_layout()
+plt.savefig(snakemake.output[1])
+
+percents.plot.bar(stacked=True)
+plt.ylabel("% of reads")
+plt.tight_layout()
+plt.savefig(snakemake.output[2])
diff --git a/source/maskRegion.py b/source/maskRegion.py
new file mode 100644
index 0000000..881b21d
--- /dev/null
+++ b/source/maskRegion.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+import pybedtools
+import pandas as pd
+
+ps = pybedtools.BedTool(snakemake.input[0])
+if '.pos.' in snakemake.input[0]:
+    strand='+'
+elif '.neg.' in snakemake.input[0]:
+    strand='-'
+
+if snakemake.params[0]=='':
+    df=ps
+else:   
+    frame_mf=pd.read_csv(snakemake.params[0],delimiter='\t',header=None)
+    frame_mf=frame_mf[frame_mf[5]==strand]
+    mf = pybedtools.BedTool.from_dataframe(frame_mf)
+    df = ps.subtract(mf)
+df.moveto(snakemake.output[0])
+
diff --git a/source/normalizeSpikeIn.py b/source/normalizeSpikeIn.py
new file mode 100755
index 0000000..6d190b3
--- /dev/null
+++ b/source/normalizeSpikeIn.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python
+import pandas as pd
+
+spikes=snakemake.input[0]
+print(spikes)
+aligned=0
+aligned+=pd.read_csv(snakemake.input[0],delimiter='\t',header=None)[3].sum()
+aligned+=pd.read_csv(snakemake.input[1],delimiter='\t',header=None)[3].sum()
+
+data=pd.read_csv(snakemake.input[2],delimiter='\t',header=None)
+data[3]=data[3]/(aligned/1000000)
+
+data.to_csv(snakemake.output[0],sep='\t',header=False,index=False)
+with open(snakemake.output[1],'w') as f:
+    f.write(str(aligned))
diff --git a/source/parsePositions.py b/source/parsePositions.py
new file mode 100755
index 0000000..0d96d36
--- /dev/null
+++ b/source/parsePositions.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python
+import pysam
+import pickle
+
+rcol=snakemake.input[0]
+iBAM = pysam.Samfile(snakemake.input[1], 'r')
+bLen=snakemake.params[0]
+
+d=pickle.load(open(rcol, "rb" ))
+
+
+NETseq={}
+for line in iBAM:
+    if line.flag==0:
+        if (line.reference_name,int(-line.reference_start)) in NETseq:
+            for barcode in d[int(line.qname)]:
+                if barcode in NETseq[(line.reference_name,int(-line.reference_start))]:
+                    NETseq[(line.reference_name,int(-line.reference_start))][barcode]+=d[int(line.qname)][barcode]
+                else:
+                    NETseq[(line.reference_name,int(-line.reference_start))][barcode]=d[int(line.qname)][barcode]
+        else:
+            NETseq[(line.reference_name,int(-line.reference_start))]=d[int(line.qname)]
+    elif line.flag==16:        
+        if (line.reference_name,int(line.reference_end)) in NETseq:
+            for barcode in d[int(line.qname)]:
+                if barcode in NETseq[(line.reference_name,int(line.reference_end))]:
+                    NETseq[(line.reference_name,int(line.reference_end))][barcode]+=d[int(line.qname)][barcode]
+                else:
+                    NETseq[(line.reference_name,int(line.reference_end))][barcode]=d[int(line.qname)][barcode]
+        else:
+            NETseq[(line.reference_name,int(line.reference_end))]=d[int(line.qname)]
+# close file
+iBAM.close()
+with open(snakemake.output[0],'wb') as f:
+    pickle.dump(NETseq,f,pickle.HIGHEST_PROTOCOL)
+