-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
22 changed files
with
1,171 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
# Created by .ignore support plugin (hsz.mobi) | ||
### Python template | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] | ||
*$py.class | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
downloads/ | ||
eggs/ | ||
.eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
wheels/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
MANIFEST | ||
|
||
# PyInstaller | ||
# Usually these files are written by a python script from a template | ||
# before PyInstaller builds the exe, so as to inject date/other infos into it. | ||
*.manifest | ||
*.spec | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.coverage | ||
.coverage.* | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
*.cover | ||
.hypothesis/ | ||
.pytest_cache/ | ||
|
||
# Translations | ||
*.mo | ||
*.pot | ||
|
||
# Django stuff: | ||
*.log | ||
local_settings.py | ||
db.sqlite3 | ||
|
||
# Flask stuff: | ||
instance/ | ||
.webassets-cache | ||
|
||
# Scrapy stuff: | ||
.scrapy | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyBuilder | ||
target/ | ||
|
||
# Jupyter Notebook | ||
.ipynb_checkpoints | ||
|
||
# pyenv | ||
.python-version | ||
|
||
# celery beat schedule file | ||
celerybeat-schedule | ||
|
||
# SageMath parsed files | ||
*.sage.py | ||
|
||
# Environments | ||
.env | ||
.venv | ||
env/ | ||
venv/ | ||
ENV/ | ||
env.bak/ | ||
venv.bak/ | ||
|
||
# Spyder project settings | ||
.spyderproject | ||
.spyproject | ||
|
||
# Rope project settings | ||
.ropeproject | ||
|
||
# mkdocs documentation | ||
/site | ||
|
||
# mypy | ||
.mypy_cache/ | ||
### JetBrains template | ||
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm | ||
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 | ||
|
||
# User-specific stuff | ||
.idea/**/workspace.xml | ||
.idea/**/tasks.xml | ||
.idea/**/usage.statistics.xml | ||
.idea/**/dictionaries | ||
.idea/**/shelf | ||
|
||
# Sensitive or high-churn files | ||
.idea/**/dataSources/ | ||
.idea/**/dataSources.ids | ||
.idea/**/dataSources.local.xml | ||
.idea/**/sqlDataSources.xml | ||
.idea/**/dynamic.xml | ||
.idea/**/uiDesigner.xml | ||
.idea/**/dbnavigator.xml | ||
|
||
# Gradle | ||
.idea/**/gradle.xml | ||
.idea/**/libraries | ||
|
||
# Gradle and Maven with auto-import | ||
# When using Gradle or Maven with auto-import, you should exclude module files, | ||
# since they will be recreated, and may cause churn. Uncomment if using | ||
# auto-import. | ||
# .idea/modules.xml | ||
# .idea/*.iml | ||
# .idea/modules | ||
|
||
# CMake | ||
cmake-build-*/ | ||
|
||
# Mongo Explorer plugin | ||
.idea/**/mongoSettings.xml | ||
|
||
# File-based project format | ||
*.iws | ||
|
||
# IntelliJ | ||
out/ | ||
|
||
# mpeltonen/sbt-idea plugin | ||
.idea_modules/ | ||
|
||
# JIRA plugin | ||
atlassian-ide-plugin.xml | ||
|
||
# Cursive Clojure plugin | ||
.idea/replstate.xml | ||
|
||
# Crashlytics plugin (for Android Studio and IntelliJ) | ||
com_crashlytics_export_strings.xml | ||
crashlytics.properties | ||
crashlytics-build.properties | ||
fabric.properties | ||
|
||
# Editor-based Rest Client | ||
/.idea/ | ||
/EnsemblData/release-94/ | ||
/UCSCData/hg38.bed | ||
/UCSCData/mm10.bed | ||
/results/homo_sapiens_filtered.gtf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
import json | ||
import os | ||
|
||
|
||
class ActivityCategorizer: | ||
|
||
def __init__(self, release, organism): | ||
|
||
# List of all Folders with Activity Tables | ||
|
||
self.folderlist = [] | ||
|
||
# Dictionary from celltypes_organism.json mit key = Kategorie und Value = [Ordner] | ||
|
||
self.c_dict = self.read_config(organism) | ||
|
||
# Activity table from all files as dict | ||
|
||
self.activity = {} | ||
|
||
self.get_activity_data(release, organism) | ||
|
||
# Categorized Activity from Json-config | ||
print("Categorization: This may take a while") | ||
self.categorization = self.generate_categorized_activity() | ||
|
||
print("Categorization finished !") | ||
|
||
def get_categorization(self): | ||
return self.categorization | ||
|
||
def read_config(self, organism): | ||
|
||
c_dict = {} | ||
path_to_config = os.path.join("./config/celltypes_"+organism+".json") | ||
with open(path_to_config) as input: | ||
data = json.loads(input.read()) | ||
for x in data: | ||
c_dict[x["type"]] = x["alias_ensembl"] | ||
self.folderlist.extend(x["alias_ensembl"]) | ||
|
||
return c_dict | ||
|
||
def get_activity_data(self, release, organism): | ||
|
||
for folder in self.folderlist: | ||
# Generate path to binary File | ||
file = os.path.join("./EnsemblData", release, organism, "activity", folder, "table.bin") | ||
with open(file, "rb") as tables: | ||
self.activity[folder] = bytearray(tables.read()) | ||
|
||
def generate_categorized_activity(self): | ||
category_activity = {} | ||
|
||
for category, aliases in self.c_dict.items(): | ||
|
||
# If an alias exists | ||
|
||
if aliases: | ||
|
||
# If theres only one alias | ||
|
||
if len(aliases) == 1: | ||
category_activity[category] = self.activity[aliases[0]] | ||
|
||
# If there are multiple alias | ||
|
||
else: | ||
category_activity[category] = self.activity_comparator(aliases) | ||
|
||
# If theres no alias all bytes were set to 4 = NA | ||
|
||
else: | ||
category_activity[category] = bytearray([4]*len(self.activity[self.folderlist[0]])) | ||
|
||
return category_activity | ||
|
||
def activity_comparator(self, aliaslist): | ||
|
||
concatenated_array = bytearray([]) | ||
|
||
length = len(self.activity[aliaslist[0]]) | ||
input_arrays = [self.activity[x] for x in aliaslist] | ||
for x in range(length): | ||
if any(y[x] == 0 for y in input_arrays): | ||
concatenated_array.append(0) | ||
elif any(y[x] == 1 for y in input_arrays): | ||
concatenated_array.append(1) | ||
elif any(y[x] == 2 for y in input_arrays): | ||
concatenated_array.append(2) | ||
elif any(y[x] == 3 for y in input_arrays): | ||
concatenated_array.append(3) | ||
elif any(y[x] == 4 for y in input_arrays): | ||
concatenated_array.append(4) | ||
return concatenated_array | ||
|
||
|
||
# Debugging | ||
|
||
# e = ActivityCategorizer("../../config/celltypes_human.json", "release-94", "homo_sapiens") | ||
# print(len(e.categorization)) | ||
# for x in e.categorization.values(): | ||
# print(len(x)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
import os.path | ||
from Modules.Ensembl.ActivityTableGenerator import ATGenerator | ||
|
||
|
||
class ActivityTable: | ||
|
||
""" | ||
Class for checking activity_table and generating them. | ||
ActivityTable = Byte Representation of Activity Status | ||
corresponding to the generator Schema default: | ||
0, "activity=ACTIVE", | ||
1, "activity=POISED", | ||
2, "activity=REPRESSED", | ||
3, "activity=INACTIVE", | ||
4, "activity=NA" | ||
""" | ||
|
||
def __init__(self, organism, current_release): | ||
self.link = os.path.join("./EnsemblData/", current_release, organism, "activity") | ||
self.folders = next(os.walk(self.link))[1] | ||
self.generator = ATGenerator(["activity=ACTIVE", | ||
"activity=POISED", | ||
"activity=REPRESSED", | ||
"activity=INACTIVE", | ||
"activity=NA"]) | ||
|
||
def check_and_generate_activity_table(self): | ||
for subfolder in self.folders: | ||
folder_link = os.path.join(self.link, subfolder) | ||
sf_link = os.path.join(folder_link, "table.bin") | ||
if not os.path.isfile(sf_link): | ||
print("No ActivityTable for "+subfolder+" found, generating new one.") | ||
self.generate_table(folder_link) | ||
print("All ActivityTables found, proceeding") | ||
|
||
def generate_table(self, link): | ||
for root, dirs, files in os.walk(link): | ||
for file in files: | ||
if file.endswith(".gff.gz"): | ||
originpath = os.path.join(root, file) | ||
file_path = os.path.join(root, "table.bin") | ||
with open(file_path, "wb") as f: | ||
f.write(self.generator.read_table(originpath)) | ||
print("New ActivityTable generated in: " + root) | ||
# Debug | ||
|
||
#e = ActivityTable("homo_sapiens", "release-94") | ||
#e.check_and_generate_activity_table() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import gzip | ||
|
||
|
||
class ATGenerator: | ||
|
||
def __init__(self, repre): | ||
|
||
self.representation = repre | ||
|
||
def read_table(self, file): | ||
activity_table = [] | ||
with gzip.open(file, 'rb') as f: | ||
for line in f: | ||
for index, re in enumerate(self.representation): | ||
if re in str(line): | ||
activity_table.append(index) | ||
break | ||
return bytearray(activity_table) | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
from Modules.Ensembl.ActivityTable import ActivityTable | ||
from Modules.Ensembl.FTPHandling.VersionChecker import EnsemblRegulationFTPRetriever as FTPRetriever | ||
from Modules.Ensembl.ActivityCategorizer import ActivityCategorizer | ||
from Modules.Ensembl.GTFGen import GTFGen | ||
|
||
|
||
class Ensembl: | ||
|
||
def __init__(self, organism): | ||
print("Starting Ensembl") | ||
self.updater = FTPRetriever(organism) | ||
self.release = self.updater.get_release() | ||
self.acttable = ActivityTable(organism, self.release) | ||
self.acttable.check_and_generate_activity_table() | ||
self.categorizer = ActivityCategorizer(self.release, organism) | ||
print("Generating GTF") | ||
self.gtf_generator = GTFGen(organism, self.release) | ||
|
||
print("Ensembl Finished !") | ||
|
||
def get_gtf(self): | ||
return self.gtf_generator.get_gtf(self.release, self.categorizer.get_categorization()) | ||
|
||
#e = Ensembl("homo_sapiens") | ||
#print(len(e.categorizer.categorization)) |
Oops, something went wrong.