Skip to content
Navigation Menu
Toggle navigation
Sign in
In this repository
All GitHub Enterprise
↵
Jump to
↵
No suggested jump to results
In this repository
All GitHub Enterprise
↵
Jump to
↵
In this organization
All GitHub Enterprise
↵
Jump to
↵
In this repository
All GitHub Enterprise
↵
Jump to
↵
Sign in
Reseting focus
You signed in with another tab or window.
Reload
to refresh your session.
You signed out in another tab or window.
Reload
to refresh your session.
You switched accounts on another tab or window.
Reload
to refresh your session.
Dismiss alert
{{ message }}
loosolab
/
mmRmeta
Public
forked from
sebastianlieske/mmRmeta
Notifications
You must be signed in to change notification settings
Fork
0
Star
0
Code
Pull requests
0
Actions
Projects
0
Security
Insights
Additional navigation options
Code
Pull requests
Actions
Projects
Security
Insights
Files
master
R
example
report_files
clinical.cases_selection.2019-01-18.json
filteredDataLung.Rdata
lungFiltered.RDS
lung_example.Rmd
preprocessing.R
statistical.test.R
man
.Rbuildignore
.gitignore
DESCRIPTION
NAMESPACE
README.md
lung_example.Rmd
lung_example.html
mmRmeta.Rproj
Breadcrumbs
mmRmeta
/
example
/
preprocessing.R
Blame
Blame
Latest commit
History
History
64 lines (56 loc) · 4.1 KB
Breadcrumbs
mmRmeta
/
example
/
preprocessing.R
Top
File metadata and controls
Code
Blame
64 lines (56 loc) · 4.1 KB
Raw
#Preprocessing Data #### 1.1 MetaData metadata <- RJSONIO::fromJSON("P:/TCGA/clinical.cases_selection.2019-01-18.json", nullValue = NA, simplify = FALSE) metadata <- plyr::ldply(metadata, data.frame) #flatten the list into a data frame DataExplorer::plot_intro(metadata) DataExplorer::plot_missing(metadata) metadata <- filter.columns.as.na(metadata) #15 colums were dropped because of the NA values. From here I'd suggest to extract colums of interest because many arent necessary for the evaluation. metadata <- rename.columns(metadata) #Now you can select your colums of interest. For this example 11 colums are selected. Note that you may have duplicated column names. metadataSelect <- subset(metadata, select = c(case_id, tumor_stage, primary_diagnosis, site_of_resection_or_biopsy, vital_status, days_to_death, age_at_diagnosis, gender, race, ethnicity)) #the last thing you have to do is to change your column with the patient/case id from a factor to characters. #metadataSelect$case_id <- as.character(metadataSelect$case_id) ####1.2 Organ data | Primary cancer Data | Expression data etc. #1.2.1 Load your file (filteredOrgan.Rdata or .rds file) lung <- readRDS("lungFiltered.RDS") #1.2.2 filter lung <- multimodalR::updateGeneNames(filteredOutput = lung$Output, lung$Expressionmatrix) lungX <- multimodalR::filterForYChromosomeGenes(output = lung$Output,expressionmatrix = lung$Expressionmatrix) lungXY <- multimodalR::filterForXChromosomeGenes(output = lungX$Output,expressionmatrix = lungX$Expressionmatrix) lungXY <- remove.x(lungXY) ### Now you are set to work with your objects created by multimodalR - Process Data #2.2 Match meta data case_id with filteredOrgan case_id lungMeta <- subset.metadata(metadataSelect, lungXY, key = "case_id") lungMeta <- drop.unused.levels(lungMeta) lungMeta <- add.stage.simple(meta_data = lungMeta, tumor_stage = "tumor_stage", new_name = "stage") #optional: reorder columns / filter out small counts of factor levels #lungMeta <- reorder.column(lungMeta, "primary.diagnosis", 20) #2.3 Make data tables and add expression values to datatables lungMetaExpression <- create.data.tables.new(lungMeta, lungXY) lungMetaExpression <- add.expression.new(lungXY, lungMetaExpression, key = "case_id") #lungMetaExpression <- lapply(lungMetaExpression, function(x) reorder.column(x, "primary_diagnosis", 15)) #lungMetaExpression <- lapply(lungMetaExpression, function(x) reorder.column(x, "site_of_resection_or_biopsy", 15)) #get overview of data DataExplorer::plot_bar(lungMetaExpression$SFTPB) DataExplorer::plot_histogram(lungMeta) #decide if you want to drop certain factor levels because they are so small #3. Calculation #3.1 Proportions lungDiagnosisProp <- make.prop.frame(lungMetaExpression, "group", "primary_diagnosis") lungStageProp <- make.prop.frame(lungMetaExpression, "group", "stage") lungSiteProp <- make.prop.frame(lungMetaExpression, "group", "site_of_resection_or_biopsy") lungCalculatedMeta <- make.calculated.metadata.new(lungMetaExpression) lungAgeKruskal <- make.kruskal.frame(lungMetaExpression, 7, 12, "age.kruskal") lungDeathInKruskal <- make.kruskal.frame(lungMetaExpression, 6, 12, "deathIn.kruskal") lungDeadProp <- proptest.template(lungCalculatedMeta, col_counts = 4 , col_total = 2, col_name = "dead.proportion", p_adjust = FALSE) lungGenderProp <- proptest.template(lungCalculatedMeta, col_counts = "n.male" , col_total = 2, col_name = "gender.proportion", p_adjust = FALSE) lungMaleDeadProp <- proptest.dead.gender(lungCalculatedMeta, "male") lungFemaleDeadProp <- proptest.dead.gender(lungCalculatedMeta, "female") lungStageExpressionKruskal <- make.kruskal.frame(lungMetaExpression, 13, 11, "stageXexpression") lungSiteExpressionKruskal <- make.kruskal.frame(lungMetaExpression, 13, 4, "siteXexpression") lungDiagnosisExpressionKruskal <- make.kruskal.frame(lungMetaExpression, 13, "primary_diagnosis", "diagnosisXexpression") pairwise.wilcox.test(lungMetaExpression$SFTA1P$expression, lungMetaExpression$SFTA1P$stage) #how often gene is splitted into modality groups lungCountGroup<- counts.per.group(lungMetaExpression, first_grouping = ) lungCountGene<- counts.per.group(lungMetaExpression, ".id", "group")
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
You can’t perform that action at this time.