Skip to content
Permalink
main
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
executable file 422 lines (422 sloc) 11.8 KB
{
"cells": [
{
"cell_type": "markdown",
"id": "e1b92587-9337-4c8b-ae0f-fc4a50a15a9b",
"metadata": {},
"source": [
"# scRNA tutorial from scanpy\n"
]
},
{
"cell_type": "raw",
"id": "8bd3fb16-cb23-4b78-946e-c96cba29eb9a",
"metadata": {},
"source": [
"!pip3 install scanpy anndata anndata2ri\n",
"!pip3 install scrublet bbknn scanorama\n",
"!pip3 install rpy2 logging\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "4335e122-6033-40c3-bf91-fac7f005e810",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import os \n",
"import pandas as pd\n",
"import numpy as np\n",
"import scipy as sp\n",
"import seaborn as sn\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib import rcParams\n",
"from matplotlib import colors\n",
"\n",
"\n",
"import scanpy as sc\n",
"import anndata as ann\n",
"import scrublet as scr #doublet detection\n",
"import scanorama as scan #batch correction\n",
"import scanpy.external as sce #external modules\n",
"\n",
"#R interface\n",
"import rpy2.rinterface_lib.callbacks\n",
"import logging\n",
"from rpy2.robjects import pandas2ri\n",
"import anndata2ri\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "7ca0343b-b877-4a11-9d97-fca83f8dd063",
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The rpy2.ipython extension is already loaded. To reload it, use:\n",
" %reload_ext rpy2.ipython\n",
"-----\n",
"anndata 0.10.5.post1\n",
"scanpy 1.9.8\n",
"-----\n",
"PIL 10.0.1\n",
"anndata2ri 1.3.1\n",
"annoy NA\n",
"asttokens NA\n",
"backcall 0.2.0\n",
"cffi 1.15.1\n",
"comm 0.1.4\n",
"cycler 0.10.0\n",
"cython_runtime NA\n",
"dateutil 2.8.2\n",
"debugpy 1.8.0\n",
"decorator 5.1.1\n",
"executing 1.2.0\n",
"fbpca NA\n",
"h5py 3.10.0\n",
"igraph 0.11.3\n",
"intervaltree NA\n",
"ipykernel 6.25.2\n",
"jedi 0.19.0\n",
"jinja2 3.1.3\n",
"joblib 1.3.2\n",
"kiwisolver 1.4.5\n",
"leidenalg 0.10.2\n",
"llvmlite 0.42.0\n",
"markupsafe 2.1.5\n",
"matplotlib 3.8.0\n",
"mpl_toolkits NA\n",
"natsort 8.4.0\n",
"numba 0.59.0\n",
"numpy 1.26.0\n",
"packaging 23.1\n",
"pandas 2.1.1\n",
"parso 0.8.3\n",
"patsy 0.5.3\n",
"pexpect 4.8.0\n",
"pickleshare 0.7.5\n",
"platformdirs 3.10.0\n",
"prompt_toolkit 3.0.39\n",
"psutil 5.9.5\n",
"ptyprocess 0.7.0\n",
"pure_eval 0.2.2\n",
"pydev_ipython NA\n",
"pydevconsole NA\n",
"pydevd 2.9.5\n",
"pydevd_file_utils NA\n",
"pydevd_plugins NA\n",
"pydevd_tracing NA\n",
"pygments 2.16.1\n",
"pyparsing 3.1.1\n",
"pytz 2023.3.post1\n",
"rpy2 3.5.15\n",
"scanorama 1.7.4\n",
"scipy 1.11.2\n",
"scrublet NA\n",
"seaborn 0.13.2\n",
"session_info 1.0.0\n",
"six 1.16.0\n",
"sklearn 1.4.1.post1\n",
"sortedcontainers 2.4.0\n",
"stack_data 0.6.2\n",
"statsmodels 0.14.0\n",
"texttable 1.7.0\n",
"threadpoolctl 3.2.0\n",
"tornado 6.3.3\n",
"traitlets 5.10.0\n",
"tzlocal NA\n",
"wcwidth 0.2.6\n",
"zmq 25.1.1\n",
"-----\n",
"IPython 8.15.0\n",
"jupyter_client 8.3.1\n",
"jupyter_core 5.3.1\n",
"-----\n",
"Python 3.11.0 (main, Jun 4 2023, 22:34:19) [GCC 11.3.0]\n",
"Linux-5.15.0-76-generic-x86_64-with-glibc2.35\n",
"-----\n",
"Session information updated at 2024-03-07 20:31\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_875756/690074142.py:6: DeprecationWarning: The global conversion available with activate() is deprecated and will be removed in the next major release. Use a local converter.\n",
" anndata2ri.activate()\n"
]
}
],
"source": [
"#Note: this can be commented out to get more verbose R output\n",
"rpy2.rinterface_lib.callbacks.logger.setLevel(logging.ERROR)\n",
"\n",
"# Automatically convert rpy2 outputs to pandas dataframes\n",
"pandas2ri.activate()\n",
"anndata2ri.activate()\n",
"%load_ext rpy2.ipython\n",
"\n",
"plt.rcParams['figure.figsize']=(8,8) #rescale figures\n",
"sc.settings.verbosity = 3\n",
"#sc.set_figure_params(dpi=200, dpi_save=300)\n",
"sc.logging.print_versions()\n",
"\n",
"sn.set_context(context='poster')\n",
"\n",
"## Of note, this notebook was created as part of a workshop, \n",
"## so we use extra large legend texts in all seaborn plots. \n",
"## You can set the context as well to 'talk' or 'paper'.\n"
]
},
{
"cell_type": "markdown",
"id": "bf9903dc-a88e-47e5-b4cc-a57cbb7a4542",
"metadata": {},
"source": [
"**not able to install packages from R**"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2b9a9301-be67-4fa0-93a1-a9f8d8a57c37",
"metadata": {},
"outputs": [],
"source": [
"## tried to install R package, but was not successful \n",
"\n",
"import rpy2.robjects.packages as rpackages\n",
"\n",
"# Define a list of R packages to install\n",
"packnames = ['scran', 'RColorBrewer', 'DropletUtils']\n",
"\n",
"# Function to check if package is installed\n",
"def install_r_packages(packnames):\n",
" # Import rpy2's package module\n",
" utils = rpackages.importr('utils')\n",
" # Select a mirror for R packages\n",
" utils.chooseCRANmirror(ind=1) # select the first mirror in the list\n",
" # Convert list to string\n",
" packnames_str = ' '.join(packnames)\n",
" # Install R packages\n",
" utils.install_packages(packnames_str)\n",
"\n",
"# Call the function to install packages\n",
"install_r_packages(packnames)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf6544f2-defe-453c-884b-ba604c7a6316",
"metadata": {},
"outputs": [],
"source": [
"%%R\n",
"# Load libraries from correct lib Paths for my environment - ignore this!\n",
"#.libPaths(.libPaths()[c(3,2,1)])\n",
"\n",
"# Load all the R libraries we will be using in the notebook\n",
"library(scran)\n",
"library(RColorBrewer)\n",
"library(DropletUtils)\n"
]
},
{
"cell_type": "markdown",
"id": "8fb6d0d7-c39a-424e-a580-4ada9e1b2296",
"metadata": {},
"source": [
"## Set project file paths"
]
},
{
"cell_type": "raw",
"id": "2c8b2e31-564d-4913-a1bc-3f8243f0371a",
"metadata": {},
"source": [
"File path to the raw data. They are usually stored at a different location than the rest of the project."
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "84cfa14f-4b52-4aa0-b221-d0b2fb7f5740",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"file_path = '/nexus/posix0/MAGE-flaski/service/posit/home/sjiang/scRNA_analysis/data/'"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "a9f45c6e-ab93-4091-998d-90175b1b5f0a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"file_path_raw = file_path + '3k_PBMC/' # all processed data and anndata files\n",
"data_dir = file_path + 'day1_beginner/data/' \n",
"table_dir = file_path + 'day1_beginner/tables/' # all tabular data output, e.g. in .csv or .xls file format\n",
"sc.settings.figdir = file_path + 'day1_beginner/figures/' # change figure path from POSIX path"
]
},
{
"cell_type": "raw",
"id": "20a5ac70-d5ef-4872-8b48-7ba90bcf35dc",
"metadata": {},
"source": [
"Comment: When repeat analyses, it is helpful to set a date variable and add it to every figure and table (see datetime Python package).\n",
"e.g., \n",
"\n",
"\n",
"import matplotlib.pyplot as plt\n",
"from datetime import datetime\n",
"\n",
"# Generate some example data\n",
"x = [1, 2, 3, 4, 5]\n",
"y = [10, 15, 13, 18, 20]\n",
"\n",
"# Plot the data\n",
"plt.plot(x, y)\n",
"plt.xlabel('X-axis')\n",
"plt.ylabel('Y-axis')\n",
"\n",
"# Get the current date\n",
"current_date = datetime.now().strftime('%Y-%m-%d')\n",
"\n",
"# Add the date to the figure\n",
"plt.text(0.2, 1.05, current_date, fontsize=13,\n",
" transform=plt.gca().transAxes, ha='center'\n",
" )\n",
"\n",
"plt.show()\n"
]
},
{
"cell_type": "markdown",
"id": "66690d34-7618-47a9-b029-37d54b60feb8",
"metadata": {},
"source": [
"# Read Data"
]
},
{
"cell_type": "raw",
"id": "cd8322d8-e01b-4d49-904c-131827bd7ab9",
"metadata": {},
"source": [
"The dataset consists of 4k PBMCs (Human) provided by 10X Genomics. The data is an mtx directory with an mtx file (i.e. count matrix), two tsv files with barcodes (i.e. cell indices) and features (i.e. gene symbols). Scanpy unpacks the files (if the files are in gz archive format) and creates an anndata object with the read_10x_mtx function.\n",
"\n",
"The dataset is not filtered, yet."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "faabd40e-18cb-446f-af80-d7f30a381e67",
"metadata": {
"lines_to_next_cell": 2
},
"outputs": [],
"source": [
"file_path_raw = file_path_raw + 'raw_gene_bc_matrices/'"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7517090c-0105-4a12-8643-9fd0cf50a974",
"metadata": {},
"outputs": [],
"source": [
"adata_raw = sc.read_10x_mtx(path=file_path_raw)\n",
"adata_raw.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a09a8a27-4604-49f0-95ec-d7139a8028d7",
"metadata": {},
"outputs": [],
"source": [
"print('Total number of observations: {:d}'.format(adata_raw.n_obs))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "1785bcf8-8f49-483b-b3a4-e7375fda0be4",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"'/nexus/posix0/MAGE-flaski/service/hpc/home/sjiang'"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pwd"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "70433400-c250-4f2c-9eed-9efdeae81e3b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"jupytext": {
"cell_markers": "{{{,}}}"
},
"kernelspec": {
"display_name": "Python 3.11.0",
"language": "python",
"name": "py3.11.0"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}