From e3098e630eb89a255b10e4bff47d9d35d3e5ed3b Mon Sep 17 00:00:00 2001
From: Siddharth Annaldasula <annaldas@elcattivo.molgen.mpg.de>
Date: Mon, 10 Feb 2020 17:51:03 +0100
Subject: [PATCH] adding scripts

---
 filter_transcripts.py  | 34 +++++++++++++++++++++++++++
 filter_utr.py          | 53 ++++++++++++++++++++++++++++++++++++++++++
 iupred2a_analysis.py   | 33 ++++++++++++++++++++++++++
 transcript_analysis.py | 53 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 173 insertions(+)
 create mode 100644 filter_transcripts.py
 create mode 100644 filter_utr.py
 create mode 100644 iupred2a_analysis.py
 create mode 100644 transcript_analysis.py

diff --git a/filter_transcripts.py b/filter_transcripts.py
new file mode 100644
index 0000000..b51c494
--- /dev/null
+++ b/filter_transcripts.py
@@ -0,0 +1,34 @@
+filename = snakemake.input[0]
+file = open(filename, "r")
+lines = file.readlines()
+file.close()
+
+output = []
+current = []
+fake = True
+ips = False
+
+for line in lines:
+    if (line.startswith(">")):
+        if (not fake):
+            output += current
+        current = []    
+        fake = False
+        
+    if (line.startswith("#####InterProScan")):
+        ips = True
+        count = -1
+        
+    if (line.startswith("#####BrewerySS8 Analysis")):
+        if (count < 1):
+            fake = True
+        ips = False
+        
+    if (ips == True):
+        count += 1
+        
+    current.append(line.strip())
+        
+output_file = open(snakemake.output[0],"w+")
+output_file.write("\n".join(output))
+output_file.close()
diff --git a/filter_utr.py b/filter_utr.py
new file mode 100644
index 0000000..d5db09a
--- /dev/null
+++ b/filter_utr.py
@@ -0,0 +1,53 @@
+from Bio import SeqIO
+
+utr_sequences = open(snakemake.input[0], "r")
+utr_lines = utr_sequences.readlines()
+utr_sequences.close()
+trans_utr = dict()
+for line in utr_lines:
+    if (line.startswith(">")):
+        trans_id = line[1:].strip()
+        if (trans_id not in trans_utr):
+            trans_utr[trans_id] = []
+    else:
+        trans_utr[trans_id].append(line.strip())
+
+transcripts_filename = snakemake.input[1]
+transcripts = SeqIO.index(transcripts_filename, "fasta")
+
+output = []
+
+for transcript in transcripts:
+    transcript_id = str(transcripts[transcript].id).split("|")[-1].strip()
+    seq = str(transcripts[transcript].seq)
+    
+    try:
+        if transcript_id in trans_utr:
+            for utr in trans_utr[transcript_id]:
+                pos = seq.find(utr)
+                if (pos != -1):
+                    seq = seq[:pos] + seq[pos + len(utr):]
+            long_seq = seq
+        else:
+            long_seq_len = 0
+            long_seq = ""
+            for t in trans_utr:
+                s = seq
+                seq_len = len(trans_utr[t])
+                if (seq_len > long_seq_len):
+                    long_seq_len = seq_len
+                    for utr in trans_utr[t]:
+                        pos = s.find(utr)
+                        if (pos != -1):
+                            s = s[:pos] + s[pos + len(utr):]
+                    long_seq = s
+    except:
+        long_seq = seq
+    
+    output.append(">" + str(transcripts[transcript].id))
+    output.append(long_seq)
+
+output_file = open(snakemake.output[0],"w+")
+output_file.write("\n".join(output))
+output_file.close()
+    
\ No newline at end of file
diff --git a/iupred2a_analysis.py b/iupred2a_analysis.py
new file mode 100644
index 0000000..f3a8566
--- /dev/null
+++ b/iupred2a_analysis.py
@@ -0,0 +1,33 @@
+import subprocess,os
+
+transcripts_filename = snakemake.input[0]
+transcripts_file = open(transcripts_filename, "r")
+transcripts = transcripts_file.readlines()
+transcripts_file.close()
+
+transcript_name = transcripts[0].strip()
+transcript_sequence = transcripts[1].strip()
+output_file = open(snakemake.output[0],"w+")
+output_file.write(transcript_sequence)
+output_file.close()
+
+subprocess.run('echo "%s" >> %s' %(transcript_name,snakemake.output[1]), shell = True)
+subprocess.run("%s -a %s long >> %s" %(snakemake.input[1],snakemake.output[0],snakemake.output[1]), shell=True)
+
+
+
+#transcripts = SeqIO.index(transcripts_filename, "fasta")
+
+#if (not os.path.exists(snakemake.output[1])):
+#    open(snakemake.output[1], 'w+').close()
+    
+
+
+#for transcript in transcripts:
+#    output = [str(transcripts[transcript].seq)]
+#    output_file = open(snakemake.output[0],"w+")
+#    output_file.write("\n".join(output))
+#    output_file.close()
+   
+
+    
\ No newline at end of file
diff --git a/transcript_analysis.py b/transcript_analysis.py
new file mode 100644
index 0000000..d7b33c5
--- /dev/null
+++ b/transcript_analysis.py
@@ -0,0 +1,53 @@
+
+iupred2a_filename = snakemake.input[0]
+iupred2a_file = open(iupred2a_filename, "r")
+idr_positions = iupred2a_file.readlines()
+iupred2a_file.close()
+
+interproscan_filename = snakemake.input[1]
+interproscan_file = open(interproscan_filename, "r")
+regions = interproscan_file.readlines()
+interproscan_file.close()
+
+
+breweryss8_filename = snakemake.input[2]
+breweryss8_file = open(breweryss8_filename, "r")
+secondarystructures8 = breweryss8_file.readlines()
+breweryss8_file.close()
+
+prositesites_filename = snakemake.input[3]
+prositesites_filename_file = open(prositesites_filename, "r")
+sites = prositesites_filename_file.readlines()
+prositesites_filename_file.close()
+
+transcript_name = idr_positions[0].strip()
+idr_positions = idr_positions[7:]
+regions = regions[5:]
+
+
+output = [transcript_name]
+
+output.append("#####IUPred2A Analysis")
+for position in idr_positions:
+    output.append(position.strip())
+
+output.append("#####InterProScan Analysis")
+for region in regions:
+    if (not region.startswith("#")):
+        output.append(region.strip())
+    else:
+        break
+        
+output.append("#####BrewerySS8 Analysis")
+for position in secondarystructures8:
+    output.append(position.strip())
+    
+output.append("#####PrositeScan Analysis")
+for site in sites:
+    output.append(site.strip().replace(">","#"))
+
+output.append("")
+
+output_file = open(snakemake.output[0],"w+")
+output_file.write("\n".join(output))
+output_file.close()