diff --git a/bin/3.1_create_gtf/Modules/CrossMapper.py b/bin/3.1_create_gtf/Modules/CrossMapper.py index f4ae51c..82beace 100644 --- a/bin/3.1_create_gtf/Modules/CrossMapper.py +++ b/bin/3.1_create_gtf/Modules/CrossMapper.py @@ -24,7 +24,7 @@ def __init__(self, org, wd, out, is_dir): # out = path to output-file -> Parameter # is_dir = boolean if wd is data_dir or just working directory - # Get path to tempfile / outputfile and chainfile + # Get path to tempfile / outputfile and chain-file if is_dir: self.infile = os.path.join(wd + "/temp/" + org + ".gtf") @@ -48,8 +48,8 @@ def get_chain_file(self, org, wd, is_data_dir): # wd = working directory # is_data_dir = is wd data_dir or not - # return_value: Link to Chainfile for conversion. - # Custom chainfiles and chainfiles for more organism can be specified in this section + # return_value: Link to chain-file for conversion. + # Custom chain-files and chain-files for more organism can be specified in this section if org == "hg19": if is_data_dir: diff --git a/bin/3.1_create_gtf/Modules/Ensembl/FTPHandling/VersionChecker.py b/bin/3.1_create_gtf/Modules/Ensembl/FTPHandling/VersionChecker.py index 7a8f066..2db86e3 100644 --- a/bin/3.1_create_gtf/Modules/Ensembl/FTPHandling/VersionChecker.py +++ b/bin/3.1_create_gtf/Modules/Ensembl/FTPHandling/VersionChecker.py @@ -138,11 +138,11 @@ def download_currentversion_version(self, version, organism, wd, data_dir): # Iterate over Celltype List and Download in corresponding subfolder for celltype in celltypes_list: - link_local = os.path.join(activityfolder_local, celltype) + link_local = os.pathchainfil.join(activityfolder_local, celltype) link_origin = activityfolder_remote+"/"+celltype os.mkdir(link_local) self.site_ftp.save_entries_to_file(link_origin, link_local) # Debug section -# e = EnsemblRegulationFTPRetriever("mus_musculus") \ No newline at end of file +# e = EnsemblRegulationFTPRetriever("mus_musculus") diff --git a/bin/3.1_create_gtf/Modules/Ensembl/GTFGen.py b/bin/3.1_create_gtf/Modules/Ensembl/GTFGen.py index b6558c0..0e3af56 100644 --- a/bin/3.1_create_gtf/Modules/Ensembl/GTFGen.py +++ b/bin/3.1_create_gtf/Modules/Ensembl/GTFGen.py @@ -17,7 +17,7 @@ def __init__(self, organism, release, wd, data_dir): # Constructor for GTFGen # input_parameter: organism = input organism # release = used Ensembl release - # wd = working directory (default is ".") + # wd = working directory (default is "."), this is used if data_dir is not specified. # data_dir = data directory (if specified this is used) self.gff_lines = self.get_organism_as_gff(organism, release, wd, data_dir) diff --git a/bin/3.1_create_gtf/Modules/SaveResults.py b/bin/3.1_create_gtf/Modules/SaveResults.py index 8c7b645..e0b39e1 100644 --- a/bin/3.1_create_gtf/Modules/SaveResults.py +++ b/bin/3.1_create_gtf/Modules/SaveResults.py @@ -5,8 +5,7 @@ class ResultSaver: """ - Class to save the results. Path is dependent on the data_dir, tissuetype and mapped = True or False. - The output is saved to the temp directory in the data folder if crossmapping is necessary. + Class to save the results. The output is saved to the temp directory in the data folder if crossmapping is necessary. @author: Sebastian Beyvers @contact: sebastian.beyvers@med.uni-giessen.de @@ -22,7 +21,6 @@ def __init__(self, results, organism, wd, mapped, is_data_dir, out): # wd = working directory # mapped = boolean if crossmapping is necessary # is_data_dir = boolean if wd is a data_dir (true) or not (false) - # is_data_dir = boolean if wd is a data_dir (true) or not (false) print("Save results to File !") self.path = "" diff --git a/bin/3.1_create_gtf/Modules/Validator.py b/bin/3.1_create_gtf/Modules/Validator.py index 2f49625..ee34471 100644 --- a/bin/3.1_create_gtf/Modules/Validator.py +++ b/bin/3.1_create_gtf/Modules/Validator.py @@ -10,14 +10,14 @@ class Validator: def __init__(self, out_file): # Constructor - # input_parameter: out_file = Path to Outputfile + # input_parameter: out_file = path to output file self.out_file = out_file self.test_read_file() def test_read_file(self): - # Method to testread the file + # Method to test the output file-format with open(self.out_file) as outfile: line = outfile.readline() @@ -32,4 +32,4 @@ def test_read_file(self): exit(1) # Debug -# v = Validator("/home/basti/Schreibtisch/test_hg38.gtf") \ No newline at end of file +# v = Validator("/home/basti/Schreibtisch/test_hg38.gtf") diff --git a/bin/3.1_create_gtf/Modules/ucsc/ucsc.py b/bin/3.1_create_gtf/Modules/ucsc/ucsc.py index 50c033b..f1663b1 100644 --- a/bin/3.1_create_gtf/Modules/ucsc/ucsc.py +++ b/bin/3.1_create_gtf/Modules/ucsc/ucsc.py @@ -44,14 +44,14 @@ def __init__(self, org, wd, data_dir): def generate_gff_file(self): - # Call bigBedToBed binary to get a Bed file in the UCSCData folder + # Call bigBedToBed binary to get a BED-file in the UCSCData folder callstring = [self.path_to_bin, self.link, self.output] subprocess.call(callstring) def read_gff_to_gtf(self): - # Reads Bed File and return a gtf-formatted list of elements. + # Reads BED-file and return a GTF-formatted list of elements. # return_value: GTF-formatted List of regulation entries from UCSC gtf_lines = [] @@ -75,8 +75,8 @@ def read_gff_to_gtf(self): def find_ID(self, line): # Find RefSeq ID in Line - # input_parameter: line = current line from bedfile - # return_value: string with gene_id in gtf-format + # input_parameter: line = current line from BED-file + # return_value: string with gene_id in GTF-format pattern = re.compile(r'ID:[0-9]{,9}|$') ref_id = re.search(pattern, line).group() @@ -90,8 +90,8 @@ def find_ID(self, line): def get_activity(self, line): - # Find activity categories in bed file - # input_parameter: line = current line from bedfile + # Find activity categories in BED-file + # input_parameter: line = current line from BED-file # return_value: list with activity for specified line("keystatus") key_status = [] @@ -122,7 +122,7 @@ def get_activity_categories(organism, wd): # Method to get ucsc-celltype categories from JSON config # input_parameter: organism = organism parameter - # wd = working directory, to find config fil + # wd = working directory, to find config file # return_value: List of categories from config. path_to_config = os.path.join(wd+"/config/celltypes_" + organism + ".json")