Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
master_project_JLU2018/bin/3.1_create_gtf/Modules/Ensembl/FTPHandling/VersionChecker.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
111 lines (91 sloc)
4.32 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from Modules.Ensembl.FTPHandling.URLRetrieve import FTPHandler | |
import os.path | |
class EnsemblRegulationFTPRetriever: | |
""" | |
Class for checking current version locally and remote on ftp. | |
And downloading newest version if necessary | |
""" | |
def __init__(self, organism, wd, data_dir): | |
self.site_ftp = FTPHandler("ftp.ensembl.org", "pub") | |
self.remoteversion = self.get_current_ftp_version() | |
self.localversion = self.get_current_local_version(wd, data_dir) | |
if self.check_version_difference(organism, wd, data_dir): | |
self.download_currentversion_version(self.remoteversion, organism, wd, data_dir) | |
else: | |
print("Newest Version installed, no update needed.") | |
def get_release(self): | |
return self.remoteversion | |
def get_current_ftp_version(self): | |
entries = self.site_ftp.get_all_entries() | |
versionlist = [] | |
for entry in entries: | |
if "release" in entry: | |
versionlist.append(entry) | |
c_release = sorted(versionlist, reverse=True)[0] | |
print("Current release is "+c_release) | |
return c_release | |
def check_organism(self, organism, release, wd, data_dir): | |
if data_dir: | |
if organism in next(os.walk(os.path.join(data_dir+"/EnsemblData/"+release+"/")))[1]: | |
return False | |
else: | |
print("No Local Version for "+organism+" installed. Installing...") | |
return True | |
else: | |
if organism in next(os.walk(os.path.join(wd+"/data/EnsemblData/"+release+"/")))[1]: | |
return False | |
else: | |
print("No Local Version for "+organism+" installed. Installing...") | |
return True | |
def get_current_local_version(self, wd, data_dir): | |
if data_dir: | |
directories = next(os.walk(os.path.join(data_dir + "/EnsemblData/")))[1] | |
else: | |
directories = next(os.walk(os.path.join(wd+"/data/EnsemblData/")))[1] | |
for dir in directories: | |
if "release" in dir: | |
localversion = sorted(directories, reverse=True)[0] | |
print("Local Version found: " + localversion) | |
return localversion | |
else: | |
print("No Version installed !") | |
return None | |
print("No Version installed !") | |
return None | |
def check_version_difference(self, organism, wd, data_dir): | |
local_version = self.localversion | |
remote_version = self.remoteversion | |
if local_version is None: | |
return True | |
remote_nr = int(remote_version.split("-")[1]) | |
local_nr = int(local_version.split("-")[1]) | |
if remote_nr > local_nr: | |
print("Outdated Version detected ! local: " + local_version + " remote: " + remote_version) | |
return True | |
else: | |
if self.check_organism(organism, local_version, wd, data_dir): | |
return True | |
else: | |
return False | |
def download_currentversion_version(self, version, organism, wd, data_dir): | |
# Download Base File | |
if data_dir: | |
targetfolder = os.path.join(data_dir + "/EnsemblData/", version, organism) | |
else: | |
targetfolder = os.path.join(wd+"/data/EnsemblData/", version, organism) | |
os.makedirs(targetfolder) | |
folder_url = "/pub/"+version+"/regulation/"+organism+"/" | |
self.site_ftp.change_dir(folder_url) | |
self.site_ftp.save_entries_to_file(folder_url, targetfolder) | |
# Download Regulation Activity | |
activityfolder_local = os.path.join(targetfolder, "activity") # local Folder for Activity Data | |
activityfolder_remote = folder_url+"RegulatoryFeatureActivity/" # remote (ftp) folder for activity data | |
os.mkdir(activityfolder_local) # Create New Folder | |
celltypes_list = self.site_ftp.get_all_entries_from_dir(activityfolder_remote) # Get List for all entries in activity Folder | |
# Iterate over Celltype List and Download in corresponding subfolder | |
for celltype in celltypes_list: | |
link_local = os.path.join(activityfolder_local, celltype) | |
link_origin = activityfolder_remote+"/"+celltype | |
os.mkdir(link_local) | |
self.site_ftp.save_entries_to_file(link_origin, link_local) | |
#e = EnsemblRegulationFTPRetriever("mus_musculus") |