Skip to content

Commit

Permalink
updating
Browse files Browse the repository at this point in the history
  • Loading branch information
Siddharth Annaldasula committed Feb 10, 2020
1 parent 3ac2192 commit bca7730
Show file tree
Hide file tree
Showing 15 changed files with 567 additions and 139 deletions.
120 changes: 99 additions & 21 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,17 @@ Transcripts = config["polished_reads"]

rule all:
input:
expand("/home/annaldas/projects/result/{gene}/{gene}_map_protein_analysis.txt", gene = GENES),
expand("/home/annaldas/projects/result/{gene}/{gene}_blastx_protein_analysis.txt", gene = GENES)
expand("/home/annaldas/projects/result/{gene}/{gene}_transcripts_filtered_coding_potential_analysis.txt", gene = GENES),
#expand("/home/annaldas/projects/result/{gene}/{gene}_blastx_protein_analysis.txt", gene = GENES)
#expand("/home/annaldas/projects/result/{gene}/{gene}_sashimi.pdf", gene = GENES)

rule gene_transcript:
input:
NanoporeGTF,
Transcripts
output:
"/home/annaldas/projects/result/{gene}/{gene}_seq.fa",
"/home/annaldas/projects/result/{gene}/{gene}_sashimi.sh"
"/home/annaldas/projects/result/{gene}/{gene}_seq.fa"#,
#"/home/annaldas/projects/result/{gene}/{gene}_sashimi.sh"
params:
gene = "{gene}"
script:
Expand Down Expand Up @@ -75,41 +75,119 @@ rule transcript_filter_utr:
script:
"filter_utr.py"

rule mapping:
checkpoint mapping:
input:
"/home/annaldas/projects/result/{gene}/{gene}_seq_filt.fa"
output:
"/home/annaldas/projects/result/{gene}/{gene}_map_protein.fa"
directory("/home/annaldas/projects/result/{gene}/transcripts") #/{transcript}/{transcript}_map_protein.fa"
params:
gene = "{gene}"
script:
"translation_protein.py"

rule iupred2a_analysis:
input:
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein.fa",
config["iupred2a"]
output:
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_transcript_sequence.txt",
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein_iupred2a.txt"
script:
"iupred2a_analysis.py"

rule interpro_scan:
input:
"/home/annaldas/projects/result/{gene}/{gene}_{type}_protein.fa"
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein.fa"
output:
"/home/annaldas/projects/result/{gene}/{gene}_{type}.gff3"
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map.gff3"
params:
db = "Pfam,ProDom,Gene3D,CDD,Coils,MobiDBLite"
db = "Pfam,ProDom,Gene3D,CDD,Coils,MobiDBLite,SMART"
shell:
"sh /home/annaldas/my_interproscan/interproscan-5.38-76.0/interproscan.sh -i {input} -o {output} -f GFF3 -appl {params.db} -dra"

rule protein_domain_analysis:
rule brewery_analysis:
input:
"/home/annaldas/projects/result/{gene}/{gene}_{type}.gff3"
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein.fa"
output:
"/home/annaldas/projects/result/{gene}/{gene}_{type}_protein_analysis.txt"
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein.fa.ss3"
#"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein.fa.ss8"
params:
gene = "{gene}"
brewery = config["brewery"]
shell:
"python3 {params.brewery} -i {input} --cpu 32 --noTA --noSA --noCD"

rule functional_site_analysis:
input:
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein.fa"
output:
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein_sites.txt"
params:
ps_scan = config["prosite_scan"],
prosite_dat = config["prosite_dat"]
shell:
"perl {params.ps_scan} -d {params.prosite_dat} {input} > {output}"

rule individual_transcript_analysis:
input:
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein_iupred2a.txt",
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map.gff3",
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein.fa.ss3",
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein_sites.txt"
output:
"/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein_analysis.txt"
script:
"interproscan_analysis.py"
"transcript_analysis.py"

rule sashimi_plot:
def aggregate_input(wildcards):
checkpoint_output = checkpoints.mapping.get(**wildcards).output[0]
return expand("/home/annaldas/projects/result/{gene}/transcripts/{transcript}_map_protein_analysis.txt",
gene=wildcards.gene,
transcript=glob_wildcards(os.path.join(checkpoint_output,"{transcript}_map_protein.fa")).transcript)

rule aggregate:
input:
sashimi_sh = "/home/annaldas/projects/result/{gene}/{gene}_sashimi.sh",
sashimi_py = config["sashimi"],
bams = config["input_bams"],
gtf = NanoporeGTF
aggregate_input
output:
"/home/annaldas/projects/result/{gene}/{gene}_sashimi.pdf"
"/home/annaldas/projects/result/{gene}/{gene}_transcripts_filtered_analysis.txt"
params:
gene = "{gene}"
shell:
"sh {input.sashimi_sh} {input.sashimi_py} {input.bams} {input.gtf} {output}"
"cat {input} > {output}"

#rule filter_transcripts:
# input:
# "/home/annaldas/projects/result/{gene}/{gene}_transcripts_analysis.txt"
# output:
# "/home/annaldas/projects/result/{gene}/{gene}_transcripts_filtered_analysis.txt"
# script:
# "filter_transcripts.py"

rule protein_coding_potential_analysis:
input:
"/home/annaldas/projects/result/{gene}/{gene}_transcripts_filtered_analysis.txt"
output:
"/home/annaldas/projects/result/{gene}/{gene}_transcripts_filtered_coding_potential_analysis.txt"
script:
"interproscan_analysis.py"

#rule protein_domain_analysis:
# input:
# "/home/annaldas/projects/result/{gene}/transcripts/{transcript}/{transcript}_map.gff3",
# "/home/annaldas/projects/result/{gene}/transcripts/{transcript}/{transcript}_map_protein_iupred2a.txt"
# output:
# "/home/annaldas/projects/result/{gene}/{gene}_map_protein_analysis.txt"
# params:
# gene = "{gene}"
# script:
# "interproscan_analysis.py"

#rule sashimi_plot:
# input:
# sashimi_sh = "/home/annaldas/projects/result/{gene}/{gene}_sashimi.sh",
# sashimi_py = config["sashimi"],
# bams = config["input_bams"],
# gtf = NanoporeGTF
# output:
# "/home/annaldas/projects/result/{gene}/{gene}_sashimi.pdf"
# shell:
# "sh {input.sashimi_sh} {input.sashimi_py} {input.bams} {input.gtf} {output}"
16 changes: 8 additions & 8 deletions gene_transcripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,12 @@

# Create sashmimi sh

output_filename = snakemake.output[1]
output_file = open(output_filename,"w+")

chrm,start,stop = gene_pos[gene][0],gene_pos[gene][1],gene_pos[gene][2]
binbash = "#!/bin/bash"
sashimi = "python $1 -b $2 -c %s:%d-%d -g $3 -M 10 -C 3 -O 3 --shrink --alpha 1 --base-size=20 --ann-height=5 --height=7 --width=18 -S both -o $4" %(chrm,start,stop)
output_file.write("\n".join([binbash,sashimi]))
output_file.close()
#output_filename = snakemake.output[1]
#output_file = open(output_filename,"w+")

#chrm,start,stop = gene_pos[gene][0],gene_pos[gene][1],gene_pos[gene][2]
#binbash = "#!/bin/bash"
#sashimi = "python $1 -b $2 -c %s:%d-%d -g $3 -M 10 -C 3 -O 3 --shrink --alpha 1 --base-size=20 --ann-height=5 --height=7 --width=18 -S both -o $4" %(chrm,start,stop)
#output_file.write("\n".join([binbash,sashimi]))
#output_file.close()

15 changes: 1 addition & 14 deletions genes.tab
Original file line number Diff line number Diff line change
@@ -1,15 +1,2 @@
gene_symbol
APP
ABI2
DNMBP
SEPTIN8
EPB41L5
SEPTIN6
TFDP2
ERBB2
RPS24
ZNRD2
COX11
ERGIC3
PFN2
NKAIN4
RPS24
59 changes: 59 additions & 0 deletions interproscan_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
filename = snakemake.input[0]
file = open(filename, "r")
lines = file.readlines()
file.close()

class transcript:
def __init__(self, tcons, idr, ips, ss8):
self.tcons = tcons
self.idr = idr
self.ips = ips
self.ss8 = ss8
self.pss = pss

transcripts = dict()

idr_lines = []
ips_lines = []
ss8_lines = []
pss_lines = []

for line in lines:
if (line.startswith(">")):
if (len(ips_lines) > 0):
transcripts[tcons] = transcript(tcons, idr, ips, ss8)

new = True
idr_lines = []
ips_lines = []
ss8_lines = []
pss_lines = []
idr = False
ips = False
ss8 = False
pss = False
tcons = line[1:].strip().split("|")[0]


if (line.startswith("#####IUPred2A Analysis")):
idr = True
elif (line.startswith("#####InterProScan")):
ips = True
idr = False
elif (line.startswith("#####BrewerySS8 Analysis")):
ss8 = True
ips = False

if (idr):
idr_lines.append(line.strip())
elif (ips):
ips_lines.append(line.strip())
elif (ss8):
ss8_lines.append(line.strip())

transcripts[tcons] = transcript(tcons, idr, ips, ss8)




print(transcripts)
Loading

0 comments on commit bca7730

Please sign in to comment.