Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
isoform_differentiation/test.R
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
220 lines (164 sloc)
10.6 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("IsoformSwitchAnalyzeR") | |
library(BSgenome) | |
library(BSgenome.Hsapiens.UCSC.hg38) | |
library(ggplot2) | |
Hsapiens | |
data <- read.csv("/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/Quantification/all_counts.txt") | |
data <- data[!(data$gene_name == ""),] | |
filter <- read.csv("/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/multiple_exons.txt", header = FALSE) | |
data <- data[which(data$transcript_id %in% filter$V1),] | |
counts <- data[,c(11,2:7)] | |
colnames(counts)[1] <- "isoform_id" | |
counts$isoform_id <- sapply(counts$isoform_id, as.character) | |
data <- read.csv("/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/Quantification/all_counts_deseq2norm.txt") | |
data <- data[!(data$gene_name == ""),] | |
filter <- read.csv("/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/multiple_exons.txt", header = FALSE) | |
data <- data[which(data$transcript_id %in% filter$V1),] | |
abundance <- data[,c(11,2:7)] | |
colnames(abundance)[1] <- "isoform_id" | |
abundance$isoform_id <- sapply(abundance$isoform_id, as.character) | |
#gene_ensembl <- unique(sort(paste(data$gene_name,data$gene_id,sep="_"))) | |
#write.table(gene_ensembl, file = "/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/Lists/all_genes.txt", row.names = FALSE, | |
# col.names = FALSE, quote = FALSE) | |
design <- data.frame(sampleID = colnames(abundance)[-1],condition = gsub('_.*', '', colnames(abundance)[-1])) | |
asswitchlist <- importRdata(isoformExonAnnoation = "/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/GffCompare/nanopore.combined_filt_tcons_gene.gtf", | |
isoformNtFasta = "/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/Pinfish/corrected_transcriptome_polished_collapsed_tcons.fas", | |
isoformRepExpression = abundance, | |
isoformCountMatrix = counts, | |
designMatrix = design) | |
#asswitchlist <- preFilter(switchAnalyzeRlist = asswitchlist,geneExpressionCutoff = 1,isoformExpressionCutoff = 0,removeSingleIsoformGenes = TRUE) | |
asswitchlist_analyzed <- isoformSwitchTestDRIMSeq(switchAnalyzeRlist = asswitchlist, reduceToSwitchingGenes=FALSE) | |
#asswitchlist_analyzed <- isoformSwitchTestDRIMSeq(switchAnalyzeRlist = asswitchlist, testIntegration = "isoform_only", reduceToSwitchingGenes=TRUE) | |
asswitchlist_analyzed <- analyzeORF(asswitchlist_analyzed, orfMethod = "longest", genomeObject = Hsapiens) | |
asswitchlist_analyzed <- extractSequence(asswitchlist_analyzed, pathToOutput = "/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/IsoformAnalyzer/", filterAALength = TRUE, alsoSplitFastaFile = TRUE) | |
asswitchlist_analyzed <- analyzeCPAT(asswitchlist_analyzed, | |
pathToCPATresultFile = "/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/IsoformAnalyzer/cpat_results.txt", | |
codingCutoff = 0.725, | |
removeNoncodinORFs = FALSE) | |
asswitchlist_analyzed <- analyzePFAM(asswitchlist_analyzed, | |
pathToPFAMresultFile = "/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/IsoformAnalyzer/pfam_results_cl.txt") | |
asswitchlist_analyzed <- analyzeSignalP(asswitchlist_analyzed, | |
pathToSignalPresultFile = "/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/IsoformAnalyzer/signalp_output_protein_type.txt") | |
asswitchlist_analyzed <- analyzeIUPred2A(asswitchlist_analyzed, | |
pathToIUPred2AresultFile = "/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/IsoformAnalyzer/iupred2a_results.txt") | |
asswitchlist_analyzed <- analyzeAlternativeSplicing(switchAnalyzeRlist = asswitchlist_analyzed) | |
consequencesOfInterest <- c('intron_retention','coding_potential','NMD_status','domains_identified','ORF_seq_similarity') | |
asswitchlist_analyzed <- analyzeSwitchConsequences( | |
asswitchlist_analyzed, | |
#consequencesToAnalyze = consequencesOfInterest, | |
dIFcutoff = 0.3, | |
showProgress=FALSE | |
) | |
################## | |
top <- extractTopSwitches(asswitchlist_analyzed, | |
filterForConsequences = TRUE, | |
sortByQvals = TRUE, | |
n = NA) | |
row.names(top) <- top$Rank | |
htop <- table(top$gene_switch_q_value) | |
top <- top[top$gene_switch_q_value %in% names(htop[which(htop == 1)]),] | |
sig_genes <- unique(top$gene_name) | |
#write.table(sig_genes[order(sig_genes)], file = "/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/Lists/IsoformSwitchAnalyzeR_List.txt", row.names = FALSE, | |
# col.names = FALSE, quote = FALSE) | |
top_func <- extractTopSwitches(asswitchlist_analyzed, | |
filterForConsequences = TRUE, | |
sortByQvals = TRUE, | |
n = NA) | |
row.names(top_func) <- top_func$Rank | |
htop <- table(top_func$gene_switch_q_value) | |
top_func <- top_func[top_func$gene_switch_q_value %in% names(htop[which(htop == 1)]),] | |
sig_genes <- unique(top$gene_name) | |
write.table(sig_genes[order(sig_genes)], file = "/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/Lists/IsoformSwitchAnalyzeR_Regular_Func_List.txt", row.names = FALSE, | |
col.names = FALSE, quote = FALSE) | |
switchPlotTopSwitches( | |
switchAnalyzeRlist = asswitchlist_analyzed, | |
n = 10, | |
filterForConsequences = TRUE, | |
sortByQvals = TRUE | |
) | |
jelena <- read.csv("/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/Lists/JelenaList.txt", header = FALSE) | |
jelena$V1[as.character(jelena$V1) %in% sig_genes] | |
develop <- read.csv("/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/Lists/Developmental.txt", header = FALSE) | |
develop$V1[as.character(develop$V1) %in% sig_genes] | |
environ <- read.csv("/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/Lists/Environment.txt", header = FALSE) | |
environ$V1[as.character(environ$V1) %in% sig_genes] | |
histonemod <- read.csv("/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/Lists/HistoneModification.txt", header = FALSE) | |
histonemod$V1[as.character(histonemod$V1) %in% sig_genes] | |
housekeeping <- read.csv("/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/Lists/Housekeeping.txt", header = FALSE) | |
housekeeping$V1[as.character(housekeeping$V1) %in% sig_genes] | |
tf <- read.csv("/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/Lists/TF.txt", header = FALSE) | |
tf$V1[as.character(tf$V1) %in% sig_genes] | |
top <- top[top$gene_name %in% (tf$V1[as.character(tf$V1) %in% sig_genes]),] | |
pdf("rplot_tfs.pdf") | |
for (i in 1:nrow(top)) { | |
switchPlot(asswitchlist_analyzed,gene = top$gene_id[i], condition1 = top$condition_1[i], condition2 = top$condition_2[i]) | |
} | |
dev.off() | |
mapply(switchPlot,asswitchlist_analyzed,top$gene_name,top$condition_1,top$condition_2) | |
switchPlot(asswitchlist_analyzed, gene = 'SMARCB1', condition1 = "day0", condition2 = "day5") | |
switchPlot(asswitchlist_analyzed, gene = 'RPS24', condition1 = "day0", condition2 = "day5") | |
switchPlot(asswitchlist_analyzed, gene = 'ERBB2', condition1 = "day0", condition2 = "day3") | |
switchPlot(asswitchlist_analyzed, gene = 'MYL6', condition1 = "day0", condition2 = "day5") | |
switchPlot(asswitchlist_analyzed, gene = 'LAGE3', condition1 = "day0", condition2 = "day3") | |
switchPlot(asswitchlist_analyzed, gene = 'GRWD1', condition1 = "day0", condition2 = "day5") | |
switchPlot(asswitchlist_analyzed, gene = 'XLOC_004946', condition1 = "day0", condition2 = "day5") | |
switchPlot(asswitchlist_analyzed, gene = 'XLOC_005559', condition1 = "day0", condition2 = "day5") | |
switchPlot(asswitchlist_analyzed, gene = 'LAD1', condition1 = "day0", condition2 = "day3") | |
switchPlot(asswitchlist_analyzed, gene = 'XLOC_016569', condition1 = "day0", condition2 = "day5") | |
switchPlot(asswitchlist_analyzed, gene = 'GLS', condition1 = "day3", condition2 = "day5") | |
switchPlot(asswitchlist_analyzed, gene = 'NRF1', condition1 = "day3", condition2 = "day5") | |
extractSplicingSummary( | |
asswitchlist_analyzed, | |
asFractionTotal = FALSE, | |
plotGenes=FALSE | |
) | |
extractSwitchOverlap(asswitchlist_analyzed) | |
splicingEnrichment <- extractSplicingEnrichment( | |
asswitchlist_analyzed, | |
splicingToAnalyze='all', | |
returnResult=TRUE, | |
returnSummary=TRUE | |
) | |
splicingEnrichment <- extractConsequenceEnrichment( | |
asswitchlist_analyzed, | |
consequencesToAnalyze='all', | |
analysisOppositeConsequence=TRUE, | |
returnResult=FALSE | |
) | |
subset(splicingEnrichment, splicingEnrichment$AStype == 'IR') | |
extractSplicingEnrichmentComparison( | |
asswitchlist_analyzed, | |
splicingToAnalyze=c('ATTS',"IR"), # Those significant in COAD in the previous analysis | |
returnResult=FALSE # Preventing the summary statistics to be returned as a data.frame | |
) | |
analyzeSwitchConsequences(asswitchlist_analyzed) | |
extractConsequenceSummary(asswitchlist_analyzed, consequencesToAnalyze='all',asFractionTotal = FALSE) | |
extractConsequenceEnrichment(asswitchlist_analyzed, consequencesToAnalyze='all') | |
ggplot(data=asswitchlist_analyzed$isoformFeatures, aes(x=dIF, y=-log10(isoform_switch_q_value))) + | |
geom_point( | |
aes( color=abs(dIF) > 0.3 & isoform_switch_q_value < 0.05 ), # default cutoff | |
size=1 | |
) + | |
geom_hline(yintercept = -log10(0.05), linetype='dashed') + # default cutoff | |
geom_vline(xintercept = c(-0.3, 0.3), linetype='dashed') + # default cutoff | |
facet_wrap( ~ condition_2) + | |
#facet_grid(condition_1 ~ condition_2) + # alternative to facet_wrap if you have overlapping conditions | |
scale_color_manual('Signficant\nIsoform Switch', values = c('black','red')) + | |
labs(x='dIF', y='-Log10 ( Isoform Switch Q Value )') + | |
theme_bw() | |
###################################### | |
consequencesOfInterest <- c('intron_retention','coding_potential','NMD_status','domains_identified','ORF_seq_similarity') | |
asswitchlist_test <- analyzeSwitchConsequences( | |
asswitchlist_test, | |
consequencesToAnalyze = consequencesOfInterest, | |
dIFcutoff = 0.3, | |
showProgress=FALSE | |
) | |
asswitchlist <- importRdata(isoformExonAnnoation = "/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/GffCompare/nanopore.combined_filt_tcons.gtf", | |
isoformRepExpression = abundance, | |
isoformNtFasta = "/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/Pinfish/corrected_transcriptome_polished_collapsed_tcons.fas", | |
isoformCountMatrix = counts, | |
designMatrix = design) | |
exampleSwitchList <- isoformSwitchAnalysisPart1(switchAnalyzeRlist = asswitchlist, genomeObject = Hsapiens, dIFcutoff = 0.3, outputSequences = FALSE, prepareForWebServers = FALSE) | |
exampleSwitchList <- isoformSwitchAnalysisPart2(switchAnalyzeRlist = exampleSwitchList, dIFcutoff = 0.3, n = 10, removeNoncodinORFs = TRUE,outputPlots = FALSE) | |
switchPlot(exampleSwitchList,gene = "XLOC_009192", condition1 = "day0", condition2 = "day5") |