Skip to content

Commit

Permalink
Changed Paths
Browse files Browse the repository at this point in the history
  • Loading branch information
basti committed Dec 4, 2018
1 parent 8b2824e commit 0a04f30
Show file tree
Hide file tree
Showing 9 changed files with 49 additions and 226 deletions.
168 changes: 0 additions & 168 deletions .gitignore

This file was deleted.

14 changes: 7 additions & 7 deletions bin/Modules/Ensembl/ActivityCategorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,21 @@

class ActivityCategorizer:

def __init__(self, release, organism):
def __init__(self, release, organism, wd):

# List of all Folders with Activity Tables

self.folderlist = []

# Dictionary from celltypes_organism.json mit key = Kategorie und Value = [Ordner]

self.c_dict = self.read_config(organism)
self.c_dict = self.read_config(organism, wd)

# Activity table from all files as dict

self.activity = {}

self.get_activity_data(release, organism)
self.get_activity_data(release, organism, wd)

# Categorized Activity from Json-config
print("Categorization: This may take a while")
Expand All @@ -29,10 +29,10 @@ def __init__(self, release, organism):
def get_categorization(self):
return self.categorization

def read_config(self, organism):
def read_config(self, organism, wd):

c_dict = {}
path_to_config = os.path.join("../config/celltypes_"+organism+".json")
path_to_config = os.path.join(wd, "../config/celltypes_"+organism+".json")
with open(path_to_config) as input:
data = json.loads(input.read())
for x in data:
Expand All @@ -41,11 +41,11 @@ def read_config(self, organism):

return c_dict

def get_activity_data(self, release, organism):
def get_activity_data(self, release, organism, wd):

for folder in self.folderlist:
# Generate path to binary File
file = os.path.join("./EnsemblData", release, organism, "activity", folder, "table.bin")
file = os.path.join(wd, "/EnsemblData", release, organism, "activity", folder, "table.bin")
with open(file, "rb") as tables:
self.activity[folder] = bytearray(tables.read())

Expand Down
4 changes: 2 additions & 2 deletions bin/Modules/Ensembl/ActivityTable.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ class ActivityTable:
4, "activity=NA"
"""

def __init__(self, organism, current_release):
self.link = os.path.join("./EnsemblData/", current_release, organism, "activity")
def __init__(self, organism, current_release, wd):
self.link = os.path.join(wd, "/EnsemblData/", current_release, organism, "activity")
self.folders = next(os.walk(self.link))[1]
self.generator = ATGenerator(["activity=ACTIVE",
"activity=POISED",
Expand Down
18 changes: 9 additions & 9 deletions bin/Modules/Ensembl/Ensembl.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
from Modules.Ensembl.ActivityTable import ActivityTable
from Modules.Ensembl.FTPHandling.VersionChecker import EnsemblRegulationFTPRetriever as FTPRetriever
from Modules.Ensembl.ActivityCategorizer import ActivityCategorizer
from Modules.Ensembl.GTFGen import GTFGen
from bin.Modules.Ensembl.ActivityTable import ActivityTable
from bin.Modules.Ensembl.FTPHandling.VersionChecker import EnsemblRegulationFTPRetriever as FTPRetriever
from bin.Modules.Ensembl.ActivityCategorizer import ActivityCategorizer
from bin.Modules.Ensembl.GTFGen import GTFGen


class Ensembl:

def __init__(self, organism):
def __init__(self, organism, wd):
print("Starting Ensembl")
self.updater = FTPRetriever(organism)
self.updater = FTPRetriever(organism, wd)
self.release = self.updater.get_release()
self.acttable = ActivityTable(organism, self.release)
self.acttable = ActivityTable(organism, self.release, wd)
self.acttable.check_and_generate_activity_table()
self.categorizer = ActivityCategorizer(self.release, organism)
self.categorizer = ActivityCategorizer(self.release, organism, wd)
print("Generating GTF")
self.gtf_generator = GTFGen(organism, self.release)
self.gtf_generator = GTFGen(organism, self.release, wd)

print("Ensembl Finished !")

Expand Down
2 changes: 1 addition & 1 deletion bin/Modules/Ensembl/FTPHandling/URLRetrieve.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import ftplib
from Modules.Ensembl.FTPHandling.FTPEntry import FTPEntry
from bin.Modules.Ensembl.FTPHandling.FTPEntry import FTPEntry


class FTPHandler:
Expand Down
26 changes: 13 additions & 13 deletions bin/Modules/Ensembl/FTPHandling/VersionChecker.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from Modules.Ensembl.FTPHandling.URLRetrieve import FTPHandler
from bin.Modules.Ensembl.FTPHandling.URLRetrieve import FTPHandler
import os.path


Expand All @@ -9,12 +9,12 @@ class EnsemblRegulationFTPRetriever:
And downloading newest version if necessary
"""

def __init__(self, organism):
def __init__(self, organism, wd):
self.site_ftp = FTPHandler("ftp.ensembl.org", "pub")
self.remoteversion = self.get_current_ftp_version()
self.localversion = self.get_current_local_version()
if self.check_version_difference(organism):
self.download_currentversion_version(self.remoteversion, organism)
self.localversion = self.get_current_local_version(wd)
if self.check_version_difference(organism, wd):
self.download_currentversion_version(self.remoteversion, organism, wd)
else:
print("Newest Version installed, no update needed.")

Expand All @@ -31,15 +31,15 @@ def get_current_ftp_version(self):
print("Current release is "+c_release)
return c_release

def check_organism(self, organism, release):
if organism in next(os.walk("./EnsemblData/"+release+"/"))[1]:
def check_organism(self, organism, release, wd):
if organism in next(os.walk(os.path.join(wd, "/EnsemblData/"+release+"/")))[1]:
return False
else:
print("No Local Version for "+organism+" installed. Installing...")
return True

def get_current_local_version(self):
directories = next(os.walk("./EnsemblData/"))[1]
def get_current_local_version(self, wd):
directories = next(os.walk(os.path.join(wd, "/EnsemblData/")))[1]
for dir in directories:
if "release" in dir:
localversion = sorted(directories, reverse=True)[0]
Expand All @@ -51,7 +51,7 @@ def get_current_local_version(self):
print("No Version installed !")
return None

def check_version_difference(self, organism):
def check_version_difference(self, organism, wd):

local_version = self.localversion
remote_version = self.remoteversion
Expand All @@ -64,16 +64,16 @@ def check_version_difference(self, organism):
print("Outdated Version detected ! local: " + local_version + " remote: " + remote_version)
return True
else:
if self.check_organism(organism, local_version):
if self.check_organism(organism, local_version, wd):
return True
else:
return False

def download_currentversion_version(self, version, organism):
def download_currentversion_version(self, version, organism, wd):

# Download Base File

targetfolder = os.path.join("./EnsemblData/", version, organism)
targetfolder = os.path.join(wd, "/EnsemblData/", version, organism)
os.makedirs(targetfolder)
folder_url = "/pub/"+version+"/regulation/"+organism+"/"
self.site_ftp.change_dir(folder_url)
Expand Down
8 changes: 4 additions & 4 deletions bin/Modules/Ensembl/GTFGen.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@

class GTFGen:

def __init__(self, organism, release):
def __init__(self, organism, release, wd):

self.gff_lines = self.get_organism_as_gff(organism, release)
self.gff_lines = self.get_organism_as_gff(organism, release, wd)
self.value_map = {0: "ACTIVE", 1: "POISED", 2: "REPRESSED", 3: "INACTIVE", 4: "NA"}

def get_organism_as_gff(self, organism, release):
def get_organism_as_gff(self, organism, release, wd):

directory = os.path.join("./EnsemblData/", release, organism)
directory = os.path.join(wd, "/EnsemblData/", release, organism)
inputfile = ""
for file in os.listdir(directory):
if file.endswith("gff.gz"):
Expand Down
Loading

0 comments on commit 0a04f30

Please sign in to comment.