Skip to content
Permalink
83460e9671
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
111 lines (91 sloc) 4.32 KB
from Modules.Ensembl.FTPHandling.URLRetrieve import FTPHandler
import os.path
class EnsemblRegulationFTPRetriever:
"""
Class for checking current version locally and remote on ftp.
And downloading newest version if necessary
"""
def __init__(self, organism, wd, data_dir):
self.site_ftp = FTPHandler("ftp.ensembl.org", "pub")
self.remoteversion = self.get_current_ftp_version()
self.localversion = self.get_current_local_version(wd, data_dir)
if self.check_version_difference(organism, wd, data_dir):
self.download_currentversion_version(self.remoteversion, organism, wd, data_dir)
else:
print("Newest Version installed, no update needed.")
def get_release(self):
return self.remoteversion
def get_current_ftp_version(self):
entries = self.site_ftp.get_all_entries()
versionlist = []
for entry in entries:
if "release" in entry:
versionlist.append(entry)
c_release = sorted(versionlist, reverse=True)[0]
print("Current release is "+c_release)
return c_release
def check_organism(self, organism, release, wd, data_dir):
if data_dir:
if organism in next(os.walk(os.path.join(data_dir+"/EnsemblData/"+release+"/")))[1]:
return False
else:
print("No Local Version for "+organism+" installed. Installing...")
return True
else:
if organism in next(os.walk(os.path.join(wd+"/data/EnsemblData/"+release+"/")))[1]:
return False
else:
print("No Local Version for "+organism+" installed. Installing...")
return True
def get_current_local_version(self, wd, data_dir):
if data_dir:
directories = next(os.walk(os.path.join(data_dir + "/EnsemblData/")))[1]
else:
directories = next(os.walk(os.path.join(wd+"/data/EnsemblData/")))[1]
for dir in directories:
if "release" in dir:
localversion = sorted(directories, reverse=True)[0]
print("Local Version found: " + localversion)
return localversion
else:
print("No Version installed !")
return None
print("No Version installed !")
return None
def check_version_difference(self, organism, wd, data_dir):
local_version = self.localversion
remote_version = self.remoteversion
if local_version is None:
return True
remote_nr = int(remote_version.split("-")[1])
local_nr = int(local_version.split("-")[1])
if remote_nr > local_nr:
print("Outdated Version detected ! local: " + local_version + " remote: " + remote_version)
return True
else:
if self.check_organism(organism, local_version, wd, data_dir):
return True
else:
return False
def download_currentversion_version(self, version, organism, wd, data_dir):
# Download Base File
if data_dir:
targetfolder = os.path.join(data_dir + "/EnsemblData/", version, organism)
else:
targetfolder = os.path.join(wd+"/data/EnsemblData/", version, organism)
os.makedirs(targetfolder)
folder_url = "/pub/"+version+"/regulation/"+organism+"/"
self.site_ftp.change_dir(folder_url)
self.site_ftp.save_entries_to_file(folder_url, targetfolder)
# Download Regulation Activity
activityfolder_local = os.path.join(targetfolder, "activity") # local Folder for Activity Data
activityfolder_remote = folder_url+"RegulatoryFeatureActivity/" # remote (ftp) folder for activity data
os.mkdir(activityfolder_local) # Create New Folder
celltypes_list = self.site_ftp.get_all_entries_from_dir(activityfolder_remote) # Get List for all entries in activity Folder
# Iterate over Celltype List and Download in corresponding subfolder
for celltype in celltypes_list:
link_local = os.path.join(activityfolder_local, celltype)
link_origin = activityfolder_remote+"/"+celltype
os.mkdir(link_local)
self.site_ftp.save_entries_to_file(link_origin, link_local)
#e = EnsemblRegulationFTPRetriever("mus_musculus")