Skip to content
Permalink
4f40b11ca2
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
148 lines (115 sloc) 5.76 KB
from Modules.Ensembl.FTPHandling.URLRetrieve import FTPHandler
import os.path
class EnsemblRegulationFTPRetriever:
"""
Class for checking current version locally and remote on ftp.
And downloading newest version if necessary
@author: Sebastian Beyvers
@contact: sebastian.beyvers@med.uni-giessen.de
"""
def __init__(self, organism, wd, data_dir):
# Constructor:
# input_parameter: organism = input organism
# wd = working dir (default working directory, data_dir is used if specified)
# data_dir = data directory (this is used as directory if specified)
self.site_ftp = FTPHandler("ftp.ensembl.org", "pub")
self.remoteversion = self.get_current_ftp_version()
self.localversion = self.get_current_local_version(wd, data_dir)
if self.check_version_difference(organism, wd, data_dir):
self.download_currentversion_version(self.remoteversion, organism, wd, data_dir)
else:
print("Newest Version installed, no update needed.")
def get_release(self):
# Getter method for release version from FTP.
return self.remoteversion
def get_current_ftp_version(self):
# Gets the current ftp-version from ftp.ensembl.org
# return_value: string for current release on FTP
entries = self.site_ftp.get_all_entries()
versionlist = []
for entry in entries:
if "release" in entry:
versionlist.append(entry)
c_release = sorted(versionlist, reverse=True)[0]
print("Current release is "+c_release)
return c_release
def check_organism(self, organism, release, wd, data_dir):
# Check if organism is locally existing
# input_parameter: as in __init__
# return_value: Boolean if data locally exists or not
if data_dir:
if organism in next(os.walk(os.path.join(data_dir+"/EnsemblData/"+release+"/")))[1]:
return False
else:
print("No Local Version for "+organism+" installed. Installing...")
return True
else:
if organism in next(os.walk(os.path.join(wd+"/data/EnsemblData/"+release+"/")))[1]:
return False
else:
print("No Local Version for "+organism+" installed. Installing...")
return True
def get_current_local_version(self, wd, data_dir):
# Method to check for the current local version
# input_parameters: wd, data_dir as in __init__()
# return_value: String for local release_version or if not existing None
if data_dir:
directories = next(os.walk(os.path.join(data_dir + "/EnsemblData/")))[1]
else:
directories = next(os.walk(os.path.join(wd+"/data/EnsemblData/")))[1]
for dir in directories:
if "release" in dir:
localversion = sorted(directories, reverse=True)[0]
print("Local Version found: " + localversion)
return localversion
else:
print("No Version installed !")
return None
print("No Version installed !")
return None
def check_version_difference(self, organism, wd, data_dir):
# Method to check if local version is differing from remote version
# input_parameters: wd, data_dir, organism as in __init__()
# return_value: Boolean if the version is differing or not
local_version = self.localversion
remote_version = self.remoteversion
if local_version is None:
return True
remote_nr = int(remote_version.split("-")[1])
local_nr = int(local_version.split("-")[1])
if remote_nr > local_nr:
print("Outdated Version detected ! local: " + local_version + " remote: " + remote_version)
return True
else:
if self.check_organism(organism, local_version, wd, data_dir):
return True
else:
return False
def download_currentversion_version(self, version, organism, wd, data_dir):
# Method to download current version from FTP if local version is not up-to-date
# input_parameters: version = version to download
# organism = input organism
# wd = working directory
# data_dir = data directory
# Download Base File
if data_dir:
targetfolder = os.path.join(data_dir + "/EnsemblData/", version, organism)
else:
targetfolder = os.path.join(wd+"/data/EnsemblData/", version, organism)
os.makedirs(targetfolder)
folder_url = "/pub/"+version+"/regulation/"+organism+"/"
self.site_ftp.change_dir(folder_url)
self.site_ftp.save_entries_to_file(folder_url, targetfolder)
# Download Regulation Activity
activityfolder_local = os.path.join(targetfolder, "activity") # local Folder for Activity Data
activityfolder_remote = folder_url+"RegulatoryFeatureActivity/" # remote (ftp) folder for activity data
os.mkdir(activityfolder_local) # Create New Folder
celltypes_list = self.site_ftp.get_all_entries_from_dir(activityfolder_remote) # Get List for all entries in activity Folder
# Iterate over Celltype List and Download in corresponding subfolder
for celltype in celltypes_list:
link_local = os.pathchainfil.join(activityfolder_local, celltype)
link_origin = activityfolder_remote+"/"+celltype
os.mkdir(link_local)
self.site_ftp.save_entries_to_file(link_origin, link_local)
# Debug section
# e = EnsemblRegulationFTPRetriever("mus_musculus")