Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from scipy.stats import ttest_ind_from_stats"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"fn=\"/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/Quantification/all_counts_deseq2norm_stats.csv\"\n",
"df=pd.read_csv(fn)\n",
"conds = [\"day0\",\"day3\",\"day5\"]\n",
"for cond in conds: \n",
" df['mean'+cond]=df.filter(like=cond+'_').mean(1)\n",
" df['std'+cond]=df.filter(like=cond+'_').std(1)\n",
" df['stdn'+cond]=df.filter(like=cond+'_').std(1)/np.sqrt(2)\n",
" df['reps'+cond]=2\n",
" \n",
"for d in conds:\n",
" df['valMax'+str(d)] = (df.groupby(['gene_id'])['mean'+str(d)].transform(max)==df['mean'+str(d)])*1\n",
"potentialSwitches=set(df[(df.filter(like='valMax').sum(axis=1)>0)&(df.filter(like='valMax').sum(axis=1)<len(conds))]['gene_id'])\n",
"\n",
"df_genes=df.filter(like='gene').copy().drop_duplicates()\n",
"df_genes=df_genes[df_genes['gene_id'].isin(potentialSwitches)]\n",
"for d in conds:\n",
" df_genes['mainIso'+str(d)] = np.nan\n",
"df_genes=df_genes.set_index('gene_id')\n",
"\n",
"for gene in potentialSwitches:\n",
" data=df[df[\"gene_id\"]==gene]\n",
" if (data.shape[0] > 1):\n",
" for d in conds:\n",
" candidate=data[data['valMax'+str(d)]==1]\n",
" cmean=candidate['mean'+str(d)].values[0]\n",
" cstd=candidate['std'+str(d)].values[0]\n",
" creps=candidate['reps'+str(d)].values[0]\n",
" temp=data[data['transcript_id']!=candidate['transcript_id'].values[0]]\n",
"\n",
" if (temp.apply(lambda x: ttest_ind_from_stats(cmean, cstd, creps, x['mean'+str(d)],x['std'+str(d)], x['reps'+str(d)])[1],1)<0.05).all():\n",
" df_genes.loc[gene,'mainIso'+str(d)]=candidate['transcript_id'].values[0]\n",
"isoSwi=df_genes[df_genes.filter(like=\"mainIso\").nunique(axis=1)>1]\n",
"#isoSwi.to_csv('/project/Neurodifferentiation_System/IsoformsAll/isoformSwitches0_01.csv')\n",
"mainIsoforms=set(isoSwi.filter(like='mainIso').values.flatten()[~pd.isnull(isoSwi.filter(like='mainIso').values.flatten())])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fn='/project/Neurodifferentiation_System/IsoformsAll/stats_TPM.csv'\n",
"df=pd.read_csv(fn)\n",
"\n",
"days=list(set([int(x.split('transcript_mean')[1]) for x in df.filter(regex='transcript_mean').columns]))\n",
"for d in days:\n",
" df['valMax'+str(d)] = (df.groupby(['gene_id'])['transcript_mean'+str(d)].transform(max)==df['transcript_mean'+str(d)])*1\n",
"potentialSwitches=set(df[(df.filter(like='valMax').sum(axis=1)>0)&(df.filter(like='valMax').sum(axis=1)<len(days))]['gene_id'])\n",
"df_genes=df.filter(like='gene').copy().drop_duplicates()\n",
"df_genes=df_genes[df_genes['gene_id'].isin(potentialSwitches)]\n",
"for d in days:\n",
" df_genes['mainIso'+str(d)] = np.nan\n",
"df_genes=df_genes.set_index('gene_id')\n",
"for gene in potentialSwitches:\n",
" data=df[df[\"gene_id\"]==gene]\n",
" for d in days:\n",
" candidate=data[data['valMax'+str(d)]==1]\n",
" cmean=candidate['transcript_mean'+str(d)].values[0]\n",
" cstd=candidate['transcript_std'+str(d)].values[0]\n",
" creps=candidate['number_reps'+str(d)].values[0]\n",
" temp=data[data['transcript_id']!=candidate['transcript_id'].values[0]]\n",
" if (temp.apply(lambda x: ttest_ind_from_stats(cmean, cstd, creps, x['transcript_mean'+str(d)],x['transcript_std'+str(d)], x['number_reps'+str(d)])[1],1)<0.01).all():\n",
" df_genes.loc[gene,'mainIso'+str(d)]=candidate['transcript_id'].values[0]\n",
"isoSwi=df_genes[df_genes.filter(like=\"mainIso\").nunique(axis=1)>1]\n",
"isoSwi.to_csv('/project/Neurodifferentiation_System/IsoformsAll/isoformSwitches0_01.csv')\n",
"mainIsoforms=set(isoSwi.filter(like='mainIso').values.flatten()[~pd.isnull(isoSwi.filter(like='mainIso').values.flatten())])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"fn=\"/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/Quantification/all_counts_deseq2norm.txt\"\n",
"df=pd.read_csv(fn)\n",
"conds = [\"day0\",\"day3\",\"day5\"]\n",
"for cond in conds: \n",
" df['mean'+cond]=df.filter(like=cond+'_').mean(1)\n",
" df['std'+cond]=df.filter(like=cond+'_').std(1)\n",
" df['stdn'+cond]=df.filter(like=cond+'_').std(1)/np.sqrt(2)\n",
" df['reps'+cond]=2\n",
" \n",
"for d in conds:\n",
" df['valMax'+str(d)] = (df.groupby(['gene_id'])['mean'+str(d)].transform(max)==df['mean'+str(d)])*1\n",
"#df.to_csv(\"/project/owlmayerTemporary/Sid/nanopore-analysis/Results_5_1/Quantification/all_counts_deseq2norm_stats.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ADAM12\n",
"AL596087.2\n",
"CLCN1\n",
"COX11\n",
"DNMBP\n",
"EPB41L5\n",
"ERBB2\n",
"ERGIC3\n",
"F2RL1\n",
"FKBP11\n",
"GAB3\n",
"GABRB1\n",
"IQSEC2\n",
"LAGE3\n",
"LINC00623\n",
"MXRA7\n",
"NCAM1\n",
"NHLH1\n",
"NKAIN4\n",
"PFN2\n",
"PLXNA2\n",
"PPM1E\n",
"PSMD14\n",
"PTRH1\n",
"RDM1P5\n",
"RFC5\n",
"RNF24\n",
"RPS24\n",
"SEPTIN6\n",
"SEPTIN8\n",
"SHD\n",
"SMC5\n",
"SNORA40\n",
"STMN2\n",
"TFDP2\n",
"TRAF3IP2\n",
"UNC45B\n",
"VEPH1\n",
"WDR37\n",
"ZNRD2\n",
"40\n"
]
}
],
"source": [
"count =0\n",
"for i in sorted(isoSwi[\"gene_name\"].values):\n",
" print(i)\n",
" count += 1\n",
"print(count)\n"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"2353"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(potentialSwitches)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'potentialSwitches' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-1-261a0e621f00>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpotentialSwitches\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'potentialSwitches' is not defined"
]
}
],
"source": [
"potentialSwitches"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}