Skip to content

Commit

Permalink
adding all before you know
Browse files Browse the repository at this point in the history
  • Loading branch information
Siddharth Annaldasula committed Mar 13, 2020
1 parent e3098e6 commit 0eae1fd
Show file tree
Hide file tree
Showing 7 changed files with 705 additions and 474 deletions.
114 changes: 66 additions & 48 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,27 @@ Transcripts = config["polished_reads"]

rule all:
input:
expand("/home/annaldas/projects/result/{gene}/{gene}_transcripts_filtered_coding_potential_analysis.txt", gene = GENES),
#expand("/home/annaldas/projects/result/{gene}/{gene}_blastx_protein_analysis.txt", gene = GENES)
#expand("/home/annaldas/projects/result/{gene}/{gene}_sashimi.pdf", gene = GENES)
#expand("/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_transcripts_filtered_coding_potential_analysis.pdf", gene = GENES),
#expand("//project/owlmayerTemporary/Sid/isoform_analysis//result/{gene}/{gene}_blastx_protein_analysis.txt", gene = GENES)
expand("/project/owlmayerTemporary/Sid/isoform_analysis/result/all/{gene}_protein_sequences.txt", gene = GENES)

rule gene_transcript:
input:
NanoporeGTF,
Transcripts
output:
"/home/annaldas/projects/result/{gene}/{gene}_seq.fa"#,
#"/home/annaldas/projects/result/{gene}/{gene}_sashimi.sh"
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_seq.fa"
params:
gene = "{gene}"
script:
"gene_transcripts.py"

rule blastx:
input:
gene_fa = "/home/annaldas/projects/result/{gene}/{gene}_seq.fa",
gene_fa = "/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_seq.fa",
db = config["human_protein"]
output:
"/home/annaldas/projects/result/{gene}/{gene}_blastx.out"
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_blastx.out"
threads:
4
shell:
Expand All @@ -40,10 +39,10 @@ rule blastx:

rule protein_sequence:
input:
blastx = "/home/annaldas/projects/result/{gene}/{gene}_blastx.out",
blastx = "/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_blastx.out",
db = config["human_protein"]
output:
"/home/annaldas/projects/result/{gene}/{gene}_blastx_protein.fa"
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_blastx_protein.fa"
shell:
"sh protein_transcript_sequences.sh {input.blastx} {input.db} {output}"

Expand All @@ -53,74 +52,91 @@ rule utr_regions:
params:
gene = "{gene}"
output:
"/home/annaldas/projects/result/{gene}/{gene}_utr_regions.bed"
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_utr_regions.bed"
script:
"utr_regions.py"

rule utr_sequences:
input:
hg = config["human_genome"],
utr = "/home/annaldas/projects/result/{gene}/{gene}_utr_regions.bed"
utr = "/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_utr_regions.bed"
output:
"/home/annaldas/projects/result/{gene}/{gene}_utr_regions.fa"
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_utr_regions.fa"
shell:
"bedtools getfasta -fi {input.hg} -bed {input.utr} -fo {output} -name"

rule transcript_filter_utr:
input:
utr = "/home/annaldas/projects/result/{gene}/{gene}_utr_regions.fa",
seq = "/home/annaldas/projects/result/{gene}/{gene}_seq.fa"
output:
"/home/annaldas/projects/result/{gene}/{gene}_seq_filt.fa"
script:
"filter_utr.py"
#rule transcript_filter_utr:
# input:
# utr = "/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_utr_regions.fa",
# seq = "/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_seq.fa"
# output:
# "/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_seq_filt.fa"
# script:
# "filter_utr.py"

checkpoint mapping:
input:
"/home/annaldas/projects/result/{gene}/{gene}_seq_filt.fa"
utr = "/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_utr_regions.fa",
seq = "/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_seq.fa"
output:
directory("/home/annaldas/projects/result/{gene}/transcripts") #/{transcript}/{transcript}_map_protein.fa"
directory("/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts") #/{transcript}/{transcript}_map_protein.fa"
params:
gene = "{gene}"
script:
"translation_protein.py"

def aggregate_mapping(wildcards):
checkpoint_output = checkpoints.mapping.get(**wildcards).output[0]
return expand("/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts/{transcript}_map_protein.fa",
gene=wildcards.gene,
transcript=glob_wildcards(os.path.join(checkpoint_output,"{transcript}_map_protein.fa")).transcript)

rule aggregate_mapping:
input:
aggregate_mapping
output:
"/project/owlmayerTemporary/Sid/isoform_analysis/result/all/{gene}_protein_sequences.txt"
params:
gene = "{gene}"
shell:
"cat {input} > {output}"

rule iupred2a_analysis:
input:
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein.fa",
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts/{transcript}_map_protein.fa",
config["iupred2a"]
output:
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_transcript_sequence.txt",
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein_iupred2a.txt"
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts/{transcript}_transcript_sequence.txt",
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts/{transcript}_map_protein_iupred2a.txt"
script:
"iupred2a_analysis.py"

rule interpro_scan:
input:
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein.fa"
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts/{transcript}_map_protein.fa"
output:
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map.gff3"
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts/{transcript}_map.gff3"
params:
db = "Pfam,ProDom,Gene3D,CDD,Coils,MobiDBLite,SMART"
shell:
"sh /home/annaldas/my_interproscan/interproscan-5.38-76.0/interproscan.sh -i {input} -o {output} -f GFF3 -appl {params.db} -dra"

rule brewery_analysis:
input:
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein.fa"
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts/{transcript}_map_protein.fa"
output:
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein.fa.ss3"
#"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein.fa.ss8"
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts/{transcript}_map_protein.fa.ss3"
#"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts/{transcript}_map_protein.fa.ss8"
params:
brewery = config["brewery"]
shell:
"python3 {params.brewery} -i {input} --cpu 32 --noTA --noSA --noCD"

rule functional_site_analysis:
input:
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein.fa"
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts/{transcript}_map_protein.fa"
output:
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein_sites.txt"
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts/{transcript}_map_protein_sites.txt"
params:
ps_scan = config["prosite_scan"],
prosite_dat = config["prosite_dat"]
Expand All @@ -129,65 +145,67 @@ rule functional_site_analysis:

rule individual_transcript_analysis:
input:
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein_iupred2a.txt",
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map.gff3",
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein.fa.ss3",
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein_sites.txt"
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts/{transcript}_map_protein_iupred2a.txt",
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts/{transcript}_map.gff3",
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts/{transcript}_map_protein.fa.ss3",
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts/{transcript}_map_protein_sites.txt"
output:
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein_analysis.txt"
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts/{transcript}_map_protein_analysis.txt"
script:
"transcript_analysis.py"

def aggregate_input(wildcards):
checkpoint_output = checkpoints.mapping.get(**wildcards).output[0]
return expand("/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein_analysis.txt",
return expand("/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts/{transcript}_map_protein_analysis.txt",
gene=wildcards.gene,
transcript=glob_wildcards(os.path.join(checkpoint_output,"{transcript}_map_protein.fa")).transcript)

rule aggregate:
input:
aggregate_input
output:
"/home/annaldas/projects/result/{gene}/{gene}_transcripts_filtered_analysis.txt"
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_transcripts_filtered_analysis.txt"
params:
gene = "{gene}"
shell:
"cat {input} > {output}"

#rule filter_transcripts:
# input:
# "/home/annaldas/projects/result/{gene}/{gene}_transcripts_analysis.txt"
# "/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_transcripts_analysis.txt"
# output:
# "/home/annaldas/projects/result/{gene}/{gene}_transcripts_filtered_analysis.txt"
# "/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_transcripts_filtered_analysis.txt"
# script:
# "filter_transcripts.py"

rule protein_coding_potential_analysis:
input:
"/home/annaldas/projects/result/{gene}/{gene}_transcripts_filtered_analysis.txt"
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_transcripts_filtered_analysis.txt"
output:
"/home/annaldas/projects/result/{gene}/{gene}_transcripts_filtered_coding_potential_analysis.txt"
"/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_transcripts_filtered_coding_potential_analysis.pdf"
params:
gene = "{gene}"
script:
"interproscan_analysis.py"

#rule protein_domain_analysis:
# input:
# "/home/annaldas/projects/result/{gene}/transcripts/{transcript}/{transcript}_map.gff3",
# "/home/annaldas/projects/result/{gene}/transcripts/{transcript}/{transcript}_map_protein_iupred2a.txt"
# "/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts/{transcript}/{transcript}_map.gff3",
# "/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/transcripts/{transcript}/{transcript}_map_protein_iupred2a.txt"
# output:
# "/home/annaldas/projects/result/{gene}/{gene}_map_protein_analysis.txt"
# "/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_map_protein_analysis.txt"
# params:
# gene = "{gene}"
# script:
# "interproscan_analysis.py"

#rule sashimi_plot:
# input:
# sashimi_sh = "/home/annaldas/projects/result/{gene}/{gene}_sashimi.sh",
# sashimi_sh = "/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_sashimi.sh",
# sashimi_py = config["sashimi"],
# bams = config["input_bams"],
# gtf = NanoporeGTF
# output:
# "/home/annaldas/projects/result/{gene}/{gene}_sashimi.pdf"
# "/project/owlmayerTemporary/Sid/isoform_analysis/result/{gene}/{gene}_sashimi.pdf"
# shell:
# "sh {input.sashimi_sh} {input.sashimi_py} {input.bams} {input.gtf} {output}"
4 changes: 1 addition & 3 deletions gene_transcripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
# Mapping oID to transcript id
# Mapping transcript id to exons

ABI2_info = []

gene_oID = dict()
oID_tID = dict()
gene_pos = dict()
Expand Down Expand Up @@ -57,7 +55,7 @@
# Extracting isoforms from related genes

output = []
gene = snakemake.params[0].upper()
gene = snakemake.params[0]
for oID in gene_oID[gene]:
tID = oID_tID[transcripts[oID].id]
transID = tID_pos[tID]
Expand Down
68 changes: 67 additions & 1 deletion genes.tab
Original file line number Diff line number Diff line change
@@ -1,2 +1,68 @@
gene_symbol
RPS24
AC112178.1
AL133395.1
AL590617.2
ANO7
APOOL
BAK1
BCKDHB
CADM2
CHRNA1
CUZD1
DACT3
DENND5B
DLEU7
EBF2
EPB41L5
ERBB2
ERGIC3
FAIM
FAM78B
GDF1
GLS
HAGHL
HOMEZ
IKBIP
KCNH3
KCNJ6
KIF1B
KIF7
KLHL35
KTN1-AS1
LAGE3
LARGE2
LINC00467
MIR302CHG
MIR4787
NCAN
NECTIN2
NKAIN3
NKAIN4
NRF1
NRG1
ONECUT2
PDE4DIP
PIWIL2
PPP1R1C
PRKCZ
RAC3
REEP1
RENBP
RGS9
RIPOR2
RPS24
RRP7BP
SGK3
SLC17A7
SLC44A3-AS1
SLC6A15
SNTG2
SWSAP1
SYNGR1
THAP7-AS1
THRA
TLN1
TPM2
TYW1
VSIG10
WFDC2
Loading

0 comments on commit 0eae1fd

Please sign in to comment.