Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
master_project_JLU2018/bin/3.1_create_gtf/Modules/Ensembl/FTPHandling/VersionChecker.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
148 lines (115 sloc)
5.76 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from Modules.Ensembl.FTPHandling.URLRetrieve import FTPHandler | |
import os.path | |
class EnsemblRegulationFTPRetriever: | |
""" | |
Class for checking current version locally and remote on ftp. | |
And downloading newest version if necessary | |
@author: Sebastian Beyvers | |
@contact: sebastian.beyvers@med.uni-giessen.de | |
""" | |
def __init__(self, organism, wd, data_dir): | |
# Constructor: | |
# input_parameter: organism = input organism | |
# wd = working dir (default working directory, data_dir is used if specified) | |
# data_dir = data directory (this is used as directory if specified) | |
self.site_ftp = FTPHandler("ftp.ensembl.org", "pub") | |
self.remoteversion = self.get_current_ftp_version() | |
self.localversion = self.get_current_local_version(wd, data_dir) | |
if self.check_version_difference(organism, wd, data_dir): | |
self.download_currentversion_version(self.remoteversion, organism, wd, data_dir) | |
else: | |
print("Newest Version installed, no update needed.") | |
def get_release(self): | |
# Getter method for release version from FTP. | |
return self.remoteversion | |
def get_current_ftp_version(self): | |
# Gets the current ftp-version from ftp.ensembl.org | |
# return_value: string for current release on FTP | |
entries = self.site_ftp.get_all_entries() | |
versionlist = [] | |
for entry in entries: | |
if "release" in entry: | |
versionlist.append(entry) | |
c_release = sorted(versionlist, reverse=True)[0] | |
print("Current release is "+c_release) | |
return c_release | |
def check_organism(self, organism, release, wd, data_dir): | |
# Check if organism is locally existing | |
# input_parameter: as in __init__ | |
# return_value: Boolean if data locally exists or not | |
if data_dir: | |
if organism in next(os.walk(os.path.join(data_dir+"/EnsemblData/"+release+"/")))[1]: | |
return False | |
else: | |
print("No Local Version for "+organism+" installed. Installing...") | |
return True | |
else: | |
if organism in next(os.walk(os.path.join(wd+"/data/EnsemblData/"+release+"/")))[1]: | |
return False | |
else: | |
print("No Local Version for "+organism+" installed. Installing...") | |
return True | |
def get_current_local_version(self, wd, data_dir): | |
# Method to check for the current local version | |
# input_parameters: wd, data_dir as in __init__() | |
# return_value: String for local release_version or if not existing None | |
if data_dir: | |
directories = next(os.walk(os.path.join(data_dir + "/EnsemblData/")))[1] | |
else: | |
directories = next(os.walk(os.path.join(wd+"/data/EnsemblData/")))[1] | |
for dir in directories: | |
if "release" in dir: | |
localversion = sorted(directories, reverse=True)[0] | |
print("Local Version found: " + localversion) | |
return localversion | |
else: | |
print("No Version installed !") | |
return None | |
print("No Version installed !") | |
return None | |
def check_version_difference(self, organism, wd, data_dir): | |
# Method to check if local version is differing from remote version | |
# input_parameters: wd, data_dir, organism as in __init__() | |
# return_value: Boolean if the version is differing or not | |
local_version = self.localversion | |
remote_version = self.remoteversion | |
if local_version is None: | |
return True | |
remote_nr = int(remote_version.split("-")[1]) | |
local_nr = int(local_version.split("-")[1]) | |
if remote_nr > local_nr: | |
print("Outdated Version detected ! local: " + local_version + " remote: " + remote_version) | |
return True | |
else: | |
if self.check_organism(organism, local_version, wd, data_dir): | |
return True | |
else: | |
return False | |
def download_currentversion_version(self, version, organism, wd, data_dir): | |
# Method to download current version from FTP if local version is not up-to-date | |
# input_parameters: version = version to download | |
# organism = input organism | |
# wd = working directory | |
# data_dir = data directory | |
# Download Base File | |
if data_dir: | |
targetfolder = os.path.join(data_dir + "/EnsemblData/", version, organism) | |
else: | |
targetfolder = os.path.join(wd+"/data/EnsemblData/", version, organism) | |
os.makedirs(targetfolder) | |
folder_url = "/pub/"+version+"/regulation/"+organism+"/" | |
self.site_ftp.change_dir(folder_url) | |
self.site_ftp.save_entries_to_file(folder_url, targetfolder) | |
# Download Regulation Activity | |
activityfolder_local = os.path.join(targetfolder, "activity") # local Folder for Activity Data | |
activityfolder_remote = folder_url+"RegulatoryFeatureActivity/" # remote (ftp) folder for activity data | |
os.mkdir(activityfolder_local) # Create New Folder | |
celltypes_list = self.site_ftp.get_all_entries_from_dir(activityfolder_remote) # Get List for all entries in activity Folder | |
# Iterate over Celltype List and Download in corresponding subfolder | |
for celltype in celltypes_list: | |
link_local = os.pathchainfil.join(activityfolder_local, celltype) | |
link_origin = activityfolder_remote+"/"+celltype | |
os.mkdir(link_local) | |
self.site_ftp.save_entries_to_file(link_origin, link_local) | |
# Debug section | |
# e = EnsemblRegulationFTPRetriever("mus_musculus") |