Skip to content

Commit

Permalink
Updated config, updated skript for changes in Ensembl Release 95 temp…
Browse files Browse the repository at this point in the history
…orary fix for #51. needs more improvement of config handling
  • Loading branch information
basti committed Jan 10, 2019
1 parent 13a610f commit a161ed0
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 29 deletions.
26 changes: 14 additions & 12 deletions bin/3.1_create_gtf/Modules/Ensembl/ActivityCategorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,20 +120,22 @@ def activity_comparator(self, aliaslist):
# return_value: Array of Activitystatus by category (type in config)

concatenated_array = bytearray([])

length = len(self.activity[aliaslist[0]])
input_arrays = [self.activity[index] for index in aliaslist]
for x in range(length):
if any(y[x] == 0 for y in input_arrays):
concatenated_array.append(0)
elif any(y[x] == 1 for y in input_arrays):
concatenated_array.append(1)
elif any(y[x] == 2 for y in input_arrays):
concatenated_array.append(2)
elif any(y[x] == 3 for y in input_arrays):
concatenated_array.append(3)
elif any(y[x] == 4 for y in input_arrays):
concatenated_array.append(4)
try:
for x in range(length):
if any(y[x] == 0 for y in input_arrays):
concatenated_array.append(0)
elif any(y[x] == 1 for y in input_arrays):
concatenated_array.append(1)
elif any(y[x] == 2 for y in input_arrays):
concatenated_array.append(2)
elif any(y[x] == 3 for y in input_arrays):
concatenated_array.append(3)
elif any(y[x] == 4 for y in input_arrays):
concatenated_array.append(4)
except IndexError:
print("Indexerror occured")
return concatenated_array


Expand Down
9 changes: 6 additions & 3 deletions bin/3.1_create_gtf/Modules/Ensembl/GTFGen.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,9 @@ def generate_additional_information(gene_id, activity):
# activity = List of activity-data for specified gene
# return_value: String for attributes (column 9) in gtf-format

if gene_id.startswith("ID=regulatory_region:"):
if not gene_id.startswith("ID=E"):
gene_id = 'gene_id "'+gene_id.split(':')[1]+'"'
elif gene_id.startswith("ID=E"):
else:
gene_id = 'gene_id "'+gene_id.split('=')[1]+'"'

activity_string = 'activity "'+', '.join(activity)+'"'
Expand All @@ -107,7 +107,10 @@ def generate_activity_list(self, activity, index):

activity_list = []
for key, value in activity.items():
activity_list.append(key+">"+self.value_map[value[index]])
try:
activity_list.append(key+">"+self.value_map[value[index]])
except IndexError:
pass
return activity_list

def get_gtf(self, release, activity):
Expand Down
Empty file modified bin/3.1_create_gtf/RegGTFExtractor.py
100644 → 100755
Empty file.
48 changes: 34 additions & 14 deletions bin/3.1_create_gtf/config/celltypes_homo_sapiens.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
{
"type": "A549",
"alias_ucsc": [],
"alias_ensembl": ["A549"]
"alias_ensembl": ["A549", "A673"]
},
{
"type": "Aorta",
Expand All @@ -17,22 +17,32 @@
{
"type": "B-Cells",
"alias_ucsc": [],
"alias_ensembl": ["B_cells_PB_Roadmap", "naive_B_cell_VB", "GM12878"]
"alias_ensembl": ["B_cells_PB_Roadmap", "naive_B_cell_VB", "GM12878", "B_cell_ENCSR682AXR", "CD38__naive_B_cell_VB", "naive_B_cell_To"]
},
{
"type": "T-Cell",
"alias_ucsc": [],
"alias_ensembl": ["CD4_ab_T_cell_VB", "CM_CD4_ab_T_cell_VB", "CD8_ab_T_cell_CB", "T_cells_PB_Roadmap"]
"alias_ensembl": ["CD4_ab_T_cell_VB",
"CM_CD4_ab_T_cell_VB",
"CD8_ab_T_cell_CB",
"T_cells_PB_Roadmap",
"CD4_ab_T_cell_CB",
"CD4_positive__alpha_beta_memory_T_cell",
"CD4_positive__alpha_beta_T_cell",
"CD4_positive__alpha_beta_T_cell_ENCSR948ZKZ",
"CD4_positive__CD25_positive__alpha_beta_regulatory_T_cell",
"effector_memory_CD4_positive__alpha_beta_T_cell", "EM_CD8_ab_T_cell_VB",
"naive_thymus_derived_CD4_positive__alpha_beta_T_cell", "T_helper_17_cell"]
},
{
"type": "Monocyte",
"alias_ucsc": [],
"alias_ensembl": ["CD14CD16__monocyte_CB", "CD14CD16__monocyte_VB", "Monocytes_CD14", "Monocytes_CD14_PB_Roadmap"]
"alias_ensembl": ["CD14CD16__monocyte_CB", "CD14CD16__monocyte_VB", "Monocytes_CD14", "Monocytes_CD14_PB_Roadmap", "CD14_positive_monocyte"]
},
{
"type": "Neutrophil",
"alias_ucsc": [],
"alias_ensembl": ["neutrophil_CB", "neutrophil_myelocyte_BM", "neutrophil_VB"]
"alias_ensembl": ["neutrophil_CB", "neutrophil_myelocyte_BM", "neutrophil_VB", "neutrophil"]
},
{
"type": "Eosinophil",
Expand All @@ -59,7 +69,7 @@
{
"type": "Intestine",
"alias_ucsc": [],
"alias_ensembl": ["Fetal_Intestine_Large", "Fetal_Intestine_Small", "Small_Intestine"]
"alias_ensembl": ["Fetal_Intestine_Large", "Fetal_Intestine_Small", "Small_Intestine", "sigmoid_colon"]
},
{
"type": "AdrenalGland",
Expand All @@ -69,7 +79,7 @@
{
"type": "Muscle",
"alias_ucsc": ["limb"],
"alias_ensembl": ["Fetal_Muscle_Leg", "Fetal_Muscle_Trunk", "Psoas_Muscle", "HSMM", "HSMMtube"]
"alias_ensembl": ["Fetal_Muscle_Leg", "Fetal_Muscle_Trunk", "Psoas_Muscle", "HSMM", "HSMMtube", "skeletal_muscle_myoblast"]
},
{
"type": "Gastric",
Expand All @@ -79,17 +89,17 @@
{
"type": "Endothelial",
"alias_ucsc": ["blood vessels"],
"alias_ensembl": ["EPC_VB", "HMEC", "HUVEC", "HUVEC_prol_CB", "NHEK"]
"alias_ensembl": ["EPC_VB", "HMEC", "HUVEC", "HUVEC_prol_CB", "NHEK", "endothelial_cell_of_umbilical_vein"]
},
{
"type": "StemCells",
"alias_ucsc": [],
"alias_ensembl": ["H1ESC", "H1_mesenchymal", "H1_neuronal_progenitor", "H1_trophoblast", "H9", "MSC_VB", "iPS_20b", "iPS_DF_6_9", "iPS_DF_19_11"]
"alias_ensembl": ["H1ESC", "HUES48", "HUES6", "HUES64", "H1_hESC", "H1_hESC_ENCSR820QMS", "H9_ENCSR323FKB", "H1_mesenchymal", "H1_neuronal_progenitor", "H1_trophoblast", "H9", "MSC_VB", "iPS_20b", "iPS_15b", "iPS_DF_6_9", "iPS_DF_19_11", "common_myeloid_progenitor__CD34_positive", "common_myeloid_progenitor__CD34_positive_ENCSR337XXD_1", "common_myeloid_progenitor__CD34_positive_ENCSR722JRY"]
},
{
"type": "Lung",
"alias_ucsc": [],
"alias_ensembl": ["Lung", "IMR90", "NHLF"]
"alias_ensembl": ["Lung", "IMR90", "NHLF", "lung_ENCSR465WKM"]
},
{
"type": "Pancreas",
Expand All @@ -99,7 +109,7 @@
{
"type": "Liver",
"alias_ucsc": ["liver"],
"alias_ensembl": []
"alias_ensembl": ["hepatocyte"]
},
{
"type": "Ovary",
Expand All @@ -119,7 +129,7 @@
{
"type": "Heart",
"alias_ucsc": ["heart"],
"alias_ensembl": ["Right_Atrium", "Left_Ventricle"]
"alias_ensembl": ["Right_Atrium", "Left_Ventricle", "cardiac_muscle_cell", "heart_right_ventricle"]
},
{
"type": "Osteoblast",
Expand All @@ -129,7 +139,7 @@
{
"type": "Fibroblast",
"alias_ucsc": [],
"alias_ensembl": ["NHDF_AD"]
"alias_ensembl": ["NHDF_AD", "fibroblast_of_dermis", "fibroblast_of_lung", "IMR_90"]
},
{
"type": "NK-Cells",
Expand All @@ -144,7 +154,7 @@
{
"type": "Brain",
"alias_ucsc": ["midbrain (mesencephalon)", "trigeminal V (ganglion, cranial)", "forebrain", "neural tube", "hindbrain (rhombencephalon)", "dorsal root ganglion", "cranial nerve"],
"alias_ensembl": ["NH_A"]
"alias_ensembl": ["NH_A", "astrocyte", "bipolar_neuron", "brain", "neural_progenitor_cell", "neural_stem_progenitor_cell", "neuron"]
},
{
"type": "Mesenchym",
Expand Down Expand Up @@ -175,5 +185,15 @@
"type": "Melanocytes",
"alias_ucsc": ["melanocytes"],
"alias_ensembl": []
},
{
"type": "Miscelanious",
"alias_ucsc": [],
"alias_ensembl": ["endodermal_cell", "esophagus", "HCT116", "Karpas_422", "keratinocyte", "mammary_epithelial_cell", "MCF_7", "MM_1S", "myotube", "PC_3", "PC_9", "SK_N_SH"]
},
{
"type": "Kidney",
"alias_ucsc": [],
"alias_ensembl": ["kidney"]
}
]

0 comments on commit a161ed0

Please sign in to comment.