Skip to content

Commit

Permalink
adding scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
Siddharth Annaldasula committed Feb 10, 2020
1 parent bca7730 commit e3098e6
Show file tree
Hide file tree
Showing 4 changed files with 173 additions and 0 deletions.
34 changes: 34 additions & 0 deletions filter_transcripts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
filename = snakemake.input[0]
file = open(filename, "r")
lines = file.readlines()
file.close()

output = []
current = []
fake = True
ips = False

for line in lines:
if (line.startswith(">")):
if (not fake):
output += current
current = []
fake = False

if (line.startswith("#####InterProScan")):
ips = True
count = -1

if (line.startswith("#####BrewerySS8 Analysis")):
if (count < 1):
fake = True
ips = False

if (ips == True):
count += 1

current.append(line.strip())

output_file = open(snakemake.output[0],"w+")
output_file.write("\n".join(output))
output_file.close()
53 changes: 53 additions & 0 deletions filter_utr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from Bio import SeqIO

utr_sequences = open(snakemake.input[0], "r")
utr_lines = utr_sequences.readlines()
utr_sequences.close()
trans_utr = dict()
for line in utr_lines:
if (line.startswith(">")):
trans_id = line[1:].strip()
if (trans_id not in trans_utr):
trans_utr[trans_id] = []
else:
trans_utr[trans_id].append(line.strip())

transcripts_filename = snakemake.input[1]
transcripts = SeqIO.index(transcripts_filename, "fasta")

output = []

for transcript in transcripts:
transcript_id = str(transcripts[transcript].id).split("|")[-1].strip()
seq = str(transcripts[transcript].seq)

try:
if transcript_id in trans_utr:
for utr in trans_utr[transcript_id]:
pos = seq.find(utr)
if (pos != -1):
seq = seq[:pos] + seq[pos + len(utr):]
long_seq = seq
else:
long_seq_len = 0
long_seq = ""
for t in trans_utr:
s = seq
seq_len = len(trans_utr[t])
if (seq_len > long_seq_len):
long_seq_len = seq_len
for utr in trans_utr[t]:
pos = s.find(utr)
if (pos != -1):
s = s[:pos] + s[pos + len(utr):]
long_seq = s
except:
long_seq = seq

output.append(">" + str(transcripts[transcript].id))
output.append(long_seq)

output_file = open(snakemake.output[0],"w+")
output_file.write("\n".join(output))
output_file.close()

33 changes: 33 additions & 0 deletions iupred2a_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import subprocess,os

transcripts_filename = snakemake.input[0]
transcripts_file = open(transcripts_filename, "r")
transcripts = transcripts_file.readlines()
transcripts_file.close()

transcript_name = transcripts[0].strip()
transcript_sequence = transcripts[1].strip()
output_file = open(snakemake.output[0],"w+")
output_file.write(transcript_sequence)
output_file.close()

subprocess.run('echo "%s" >> %s' %(transcript_name,snakemake.output[1]), shell = True)
subprocess.run("%s -a %s long >> %s" %(snakemake.input[1],snakemake.output[0],snakemake.output[1]), shell=True)



#transcripts = SeqIO.index(transcripts_filename, "fasta")

#if (not os.path.exists(snakemake.output[1])):
# open(snakemake.output[1], 'w+').close()



#for transcript in transcripts:
# output = [str(transcripts[transcript].seq)]
# output_file = open(snakemake.output[0],"w+")
# output_file.write("\n".join(output))
# output_file.close()



53 changes: 53 additions & 0 deletions transcript_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@

iupred2a_filename = snakemake.input[0]
iupred2a_file = open(iupred2a_filename, "r")
idr_positions = iupred2a_file.readlines()
iupred2a_file.close()

interproscan_filename = snakemake.input[1]
interproscan_file = open(interproscan_filename, "r")
regions = interproscan_file.readlines()
interproscan_file.close()


breweryss8_filename = snakemake.input[2]
breweryss8_file = open(breweryss8_filename, "r")
secondarystructures8 = breweryss8_file.readlines()
breweryss8_file.close()

prositesites_filename = snakemake.input[3]
prositesites_filename_file = open(prositesites_filename, "r")
sites = prositesites_filename_file.readlines()
prositesites_filename_file.close()

transcript_name = idr_positions[0].strip()
idr_positions = idr_positions[7:]
regions = regions[5:]


output = [transcript_name]

output.append("#####IUPred2A Analysis")
for position in idr_positions:
output.append(position.strip())

output.append("#####InterProScan Analysis")
for region in regions:
if (not region.startswith("#")):
output.append(region.strip())
else:
break

output.append("#####BrewerySS8 Analysis")
for position in secondarystructures8:
output.append(position.strip())

output.append("#####PrositeScan Analysis")
for site in sites:
output.append(site.strip().replace(">","#"))

output.append("")

output_file = open(snakemake.output[0],"w+")
output_file.write("\n".join(output))
output_file.close()

0 comments on commit e3098e6

Please sign in to comment.