Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
64fce7794e
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
382 lines (355 sloc) 23.5 KB
@article{tang_2009,
title = {{mRNA}-{Seq} whole-transcriptome analysis of a single cell},
volume = {6},
issn = {1548-7091, 1548-7105},
url = {http://www.nature.com/articles/nmeth.1315},
doi = {10.1038/nmeth.1315},
language = {en},
number = {5},
urldate = {2018-07-26TZ},
journal = {Nature Methods},
author = {Tang, Fuchou and Barbacioru, Catalin and Wang, Yangzhou and Nordman, Ellen and Lee, Clarence and Xu, Nanlan and Wang, Xiaohui and Bodeau, John and Tuch, Brian B and Siddiqui, Asim and Lao, Kaiqin and Surani, M Azim},
month = may,
year = {2009},
pages = {377--382}
}
@Manual{r_core_team_2018,
title = {R: A Language and Environment for Statistical Computing},
author = {{R Core Team}},
organization = {R Foundation for Statistical Computing},
address = {Vienna, Austria},
year = {2018},
url = {https://www.R-project.org/},
}
@article{ringner_2008,
title = {What is principal component analysis?},
volume = {26},
rights = {2008 Nature Publishing Group},
issn = {1546-1696},
url = {https://www.nature.com/articles/nbt0308-303},
doi = {10.1038/nbt0308-303},
abstract = {Principal component analysis is often incorporated into genome-wide expression studies, but what is it and how can it be used to explore high-dimensional data?},
pages = {303-304},
number = {3},
journaltitle = {Nature Biotechnology},
author = {Ringnér, Markus},
urldate = {2017-11-01},
date = {2008-03-01},
langid = {english}
}
@article{cellity_2016,
title = {Classification of low quality cells from single-cell {RNA}-seq data},
volume = {17},
issn = {1474-760X},
url = {http://genomebiology.com/2016/17/1/29},
doi = {10.1186/s13059-016-0888-1},
language = {en},
number = {1},
urldate = {2018-07-27TZ},
journal = {Genome Biology},
author = {Ilicic, Tomislav and Kim, Jong Kyoung and Kolodziejczyk, Aleksandra A. and Bagger, Frederik Otzen and McCarthy, Davis James and Marioni, John C. and Teichmann, Sarah A.},
month = dec,
year = {2016}
}
@article{maaten_tsne_2008,
title = {Visualizing {Data} using t-{SNE}},
volume = {9},
issn = {ISSN 1533-7928},
url = {http://www.jmlr.org/papers/v9/vandermaaten08a.html},
number = {Nov},
urldate = {2018-07-27TZ},
journal = {Journal of Machine Learning Research},
author = {Maaten, Laurens van der and Hinton, Geoffrey},
year = {2008},
pages = {2579--2605}
}
@article{zhang_one_2018,
title = {One read per cell per gene is optimal for single-cell {RNA}-{Seq}},
copyright = {© 2018, Posted by Cold Spring Harbor Laboratory. This pre-print is available under a Creative Commons License (Attribution-NonCommercial-NoDerivs 4.0 International), CC BY-NC-ND 4.0, as described at http://creativecommons.org/licenses/by-nc-nd/4.0/},
url = {https://www.biorxiv.org/content/early/2018/08/09/389296},
doi = {10.1101/389296},
abstract = {An underlying question for virtually all single-cell RNA sequencing experiments is how to allocate the limited sequencing budget: deep sequencing of a few cells or shallow sequencing of many cells? A mathematical framework reveals that, for estimating many important gene properties, the optimal allocation is to sequence at the depth of one read per cell per gene. Interestingly, the corresponding optimal estimator is not the widely-used plug-in estimator but one developed via empirical Bayes.},
language = {en},
urldate = {2018-08-13TZ},
journal = {bioRxiv},
author = {Zhang, Martin J. and Ntranos, Vasilis and Tse, David},
month = aug,
year = {2018},
pages = {389296}
}
@article{robinson_tmm_2010,
title = {A scaling normalization method for differential expression analysis of {RNA}-seq data},
volume = {11},
issn = {1474-760X},
url = {https://doi.org/10.1186/gb-2010-11-3-r25},
doi = {10.1186/gb-2010-11-3-r25},
abstract = {The fine detail provided by sequencing-based transcriptome surveys suggests that RNA-seq is likely to become the platform of choice for interrogating steady state RNA. In order to discover biologically important changes in expression, we show that normalization continues to be an essential step in the analysis. We outline a simple and effective method for performing normalization and show dramatically improved results for inferring differential expression in simulated and publicly available data sets.},
number = {3},
urldate = {2018-08-02TZ},
journal = {Genome Biology},
author = {Robinson, Mark D. and Oshlack, Alicia},
month = mar,
year = {2010},
pages = {R25}
}
@article{lun_pooling_2016,
title = {Pooling across cells to normalize single-cell {RNA} sequencing data with many zero counts},
volume = {17},
issn = {1474-760X},
url = {https://doi.org/10.1186/s13059-016-0947-7},
doi = {10.1186/s13059-016-0947-7},
abstract = {Normalization of single-cell RNA sequencing data is necessary to eliminate cell-specific biases prior to downstream analyses. However, this is not straightforward for noisy single-cell data where many counts are zero. We present a novel approach where expression values are summed across pools of cells, and the summed values are used for normalization. Pool-based size factors are then deconvolved to yield cell-based factors. Our deconvolution approach outperforms existing methods for accurate normalization of cell-specific biases in simulated data. Similar behavior is observed in real data, where deconvolution improves the relevance of results of downstream analyses.},
number = {1},
urldate = {2018-08-01TZ},
journal = {Genome Biology},
author = {L. Lun, Aaron T. and Bach, Karsten and Marioni, John C.},
month = apr,
year = {2016},
pages = {75}
}
@article{bacher_scnorm_2017,
title = {{SCnorm}: robust normalization of single-cell {RNA}-seq data},
volume = {14},
copyright = {2017 Nature Publishing Group},
issn = {1548-7105},
shorttitle = {{SCnorm}},
url = {https://www.nature.com/articles/nmeth.4263},
doi = {10.1038/nmeth.4263},
abstract = {The normalization of RNA-seq data is essential for accurate downstream inference, but the assumptions upon which most normalization methods are based are not applicable in the single-cell setting. Consequently, applying existing normalization methods to single-cell RNA-seq data introduces artifacts that bias downstream analyses. To address this, we introduce SCnorm for accurate and efficient normalization of single-cell RNA-seq data.},
language = {en},
number = {6},
urldate = {2018-09-18TZ},
journal = {Nature Methods},
author = {Bacher, Rhonda and Chu, Li-Fang and Leng, Ning and Gasch, Audrey P. and Thomson, James A. and Stewart, Ron M. and Newton, Michael and Kendziorski, Christina},
month = jun,
year = {2017},
pages = {584--586}
}
@article{gandolfo_rle_2018,
title = {{RLE} {Plots}: {Visualising} {Unwanted} {Variation} in {High} {Dimensional} {Data}},
volume = {13},
issn = {1932-6203},
shorttitle = {{RLE} {Plots}},
url = {http://arxiv.org/abs/1704.03590},
doi = {10.1371/journal.pone.0191629},
abstract = {Unwanted variation can be highly problematic and so its detection is often crucial. Relative log expression (RLE) plots are a powerful tool for visualising such variation in high dimensional data. We provide a detailed examination of these plots, with the aid of examples and simulation, explaining what they are and what they can reveal. RLE plots are particularly useful for assessing whether a procedure aimed at removing unwanted variation, i.e. a normalisation procedure, has been successful. These plots, while originally devised for gene expression data from microarrays, can also be used to reveal unwanted variation in many other kinds of high dimensional data, where such variation can be problematic.},
number = {2},
urldate = {2018-08-13TZ},
journal = {PLOS ONE},
author = {Gandolfo, Luke C. and Speed, Terence P.},
month = feb,
year = {2018},
note = {arXiv: 1704.03590},
keywords = {Statistics - Methodology},
pages = {e0191629}
}
@article{ramskold_noise_2012,
title = {Full-length {mRNA}-{Seq} from single-cell levels of {RNA} and individual circulating tumor cells},
volume = {30},
copyright = {2012 Nature Publishing Group},
issn = {1546-1696},
url = {https://www.nature.com/articles/nbt.2282},
doi = {10.1038/nbt.2282},
abstract = {Genome-wide transcriptome analyses are routinely used to monitor tissue-, disease- and cell type–specific gene expression, but it has been technically challenging to generate expression profiles from single cells. Here we describe a robust mRNA-Seq protocol (Smart-Seq) that is applicable down to single cell levels. Compared with existing methods, Smart-Seq has improved read coverage across transcripts, which enhances detailed analyses of alternative transcript isoforms and identification of single-nucleotide polymorphisms. We determined the sensitivity and quantitative accuracy of Smart-Seq for single-cell transcriptomics by evaluating it on total RNA dilution series. We found that although gene expression estimates from single cells have increased noise, hundreds of differentially expressed genes could be identified using few cells per cell type. Applying Smart-Seq to circulating tumor cells from melanomas, we identified distinct gene expression patterns, including candidate biomarkers for melanoma circulating tumor cells. Our protocol will be useful for addressing fundamental biological problems requiring genome-wide transcriptome profiling in rare cells.},
language = {en},
number = {8},
urldate = {2018-08-17TZ},
journal = {Nature Biotechnology},
author = {Ramsköld, Daniel and Luo, Shujun and Wang, Yu-Chieh and Li, Robin and Deng, Qiaolin and Faridani, Omid R. and Daniels, Gregory A. and Khrebtukova, Irina and Loring, Jeanne F. and Laurent, Louise C. and Schroth, Gary P. and Sandberg, Rickard},
month = aug,
year = {2012},
pages = {777--782}
}
@article{andrews_dropout_2018,
title = {Dropout-based feature selection for {scRNASeq}},
url = {http://biorxiv.org/lookup/doi/10.1101/065094},
doi = {10.1101/065094},
abstract = {Features selection is a key step in many single-cell RNASeq (scRNASeq) analyses. Feature selection is intended to preserve biologically relevant information while removing genes only subject to technical noise. As it is frequently performed prior to dimensionality reduction, clustering and pseudotime analyses, feature selection can have a major impact on the results. Several different approaches have been proposed for unsupervised feature selection from unprocessed single-cell expression matrices, most based upon identifying highly variable genes in the dataset. We present two methods which take advantage of the prevalence of zeros (dropouts) in scRNASeq data to identify features. We show that dropout-based feature selection outperforms variance-based feature selection for multiple applications of single-cell RNASeq.},
urldate = {2018-08-20TZ},
author = {Andrews, Tallulah S. and Hemberg, Martin},
month = may,
year = {2018}
}
@article{mojtahedi_fate_2016,
title = {Cell {Fate} {Decision} as {High}-{Dimensional} {Critical} {State} {Transition}},
volume = {14},
issn = {1545-7885},
url = {http://dx.plos.org/10.1371/journal.pbio.2000640},
doi = {10.1371/journal.pbio.2000640},
language = {en},
number = {12},
urldate = {2018-09-25TZ},
journal = {PLOS Biology},
author = {Mojtahedi, Mitra and Skupin, Alexander and Zhou, Joseph and Castaño, Ivan G. and Leong-Quong, Rebecca Y. Y. and Chang, Hannah and Trachana, Kalliopi and Giuliani, Alessandro and Huang, Sui},
month = dec,
year = {2016},
pages = {e2000640}
}
@book{bellman_dimensionality_2016,
address = {Princeton, NJ},
title = {Adaptive {Control} {Processes} {A} {Guided} {Tour}},
isbn = {9781400874668},
url = {https://doi.org/10.1515/9781400874668},
abstract = {The aim of this work is to present a unified approach to the modern field of control theory and to provide a technique for making problems involving deterministic, stochastic, and adaptive processes of both linear and nonlinear type amenable to machine solution. Mr. Bellman has used the theory of dynamic programming to formulate, analyze, and prepare these processes for numerical treatment by digital computers. The unique concept of the book is that of a single problem stretching from recognition and formulation to analytic treatment and computational solution. Due to the emphasis upon ideas and concepts, this book is equally suited for the pure and applied mathematician, and for control engineers in all fields. Originally published in 1961. The Princeton Legacy Library uses the latest print-on-demand technology to again make available previously out-of-print books from the distinguished backlist of Princeton University Press. These paperback editions preserve the original texts of these important books while presenting them in durable paperback editions. The goal of the Princeton Legacy Library is to vastly increase access to the rich scholarly heritage found in the thousands of books published by Princeton University Press since its founding in 1905.},
language = {English},
urldate = {2018-09-04TZ},
publisher = {Princeton University Press},
author = {Bellman, Richard E},
year = {2016},
note = {OCLC: 954124493}
}
@article{anderson_space_1984,
title = {Computer {Science} and {Statistics}: {Proceedings} of the {Fifteenth} {Symposium} on the {Interface}.},
volume = {40},
issn = {0006341X},
shorttitle = {Computer {Science} and {Statistics}},
url = {https://www.jstor.org/stable/2530952?origin=crossref},
doi = {10.2307/2530952},
number = {3},
urldate = {2018-09-04TZ},
journal = {Biometrics},
author = {Anderson, A. J. B. and Gentle, J. E.},
month = sep,
year = {1984},
pages = {876}
}
@article{kenkel_nmds_1986,
title = {Applying {Metric} and {Nonmetric} {Multidimensional} {Scaling} to {Ecological} {Studies}: {Some} {New} {Results}},
volume = {67},
issn = {00129658},
shorttitle = {Applying {Metric} and {Nonmetric} {Multidimensional} {Scaling} to {Ecological} {Studies}},
url = {http://doi.wiley.com/10.2307/1939814},
doi = {10.2307/1939814},
language = {en},
number = {4},
urldate = {2018-09-04TZ},
journal = {Ecology},
author = {Kenkel, N. C. and Orloci, L.},
month = aug,
year = {1986},
pages = {919--928}
}
@article{halko_rsvd_2009,
title = {Finding structure with randomness: {Probabilistic} algorithms for constructing approximate matrix decompositions},
shorttitle = {Finding structure with randomness},
url = {http://arxiv.org/abs/0909.4061},
abstract = {Low-rank matrix approximations, such as the truncated singular value decomposition and the rank-revealing QR decomposition, play a central role in data analysis and scientific computing. This work surveys and extends recent research which demonstrates that randomization offers a powerful tool for performing low-rank matrix approximation. These techniques exploit modern computational architectures more fully than classical methods and open the possibility of dealing with truly massive data sets. This paper presents a modular framework for constructing randomized algorithms that compute partial matrix decompositions. These methods use random sampling to identify a subspace that captures most of the action of a matrix. The input matrix is then compressed---either explicitly or implicitly---to this subspace, and the reduced matrix is manipulated deterministically to obtain the desired low-rank factorization. In many cases, this approach beats its classical competitors in terms of accuracy, speed, and robustness. These claims are supported by extensive numerical experiments and a detailed error analysis.},
urldate = {2018-09-06TZ},
journal = {arXiv:0909.4061 [math]},
author = {Halko, Nathan and Martinsson, Per-Gunnar and Tropp, Joel A.},
month = sep,
year = {2009},
note = {arXiv: 0909.4061},
keywords = {Mathematics - Numerical Analysis, Mathematics - Probability}
}
@article{erichson_rsvd_2016,
title = {Randomized {Matrix} {Decompositions} using {R}},
url = {http://arxiv.org/abs/1608.02148},
abstract = {Matrix decompositions are fundamental tools in the area of applied mathematics, statistical computing, and machine learning. In particular, low-rank matrix decompositions are vital, and widely used for data analysis, dimensionality reduction, and data compression. Massive datasets, however, pose a computational challenge for traditional algorithms, placing significant constraints on both memory and processing power. Recently, the powerful concept of randomness has been introduced as a strategy to ease the computational load. The essential idea of probabilistic algorithms is to employ some amount of randomness in order to derive a smaller matrix from a high-dimensional data matrix. The smaller matrix is then used to compute the desired low-rank approximation. Such algorithms are shown to be computationally efficient for approximating matrices with low-rank structure. We present the R package rsvd, and provide a tutorial introduction to randomized matrix decompositions. Specifically, randomized routines for the singular value decomposition, (robust) principal component analysis, interpolative decomposition, and CUR decomposition are discussed. Several examples demonstrate the routines, and show the computational advantage over other methods implemented in R.},
urldate = {2018-09-06TZ},
journal = {arXiv:1608.02148 [cs, stat]},
author = {Erichson, N. Benjamin and Voronin, Sergey and Brunton, Steven L. and Kutz, J. Nathan},
month = aug,
year = {2016},
note = {arXiv: 1608.02148},
keywords = {Statistics - Computation, Computer Science - Mathematical Software, Statistics - Methodology}
}
@article{mcinnes_umap_2018,
title = {{UMAP}: {Uniform} {Manifold} {Approximation} and {Projection} for {Dimension} {Reduction}},
shorttitle = {{UMAP}},
url = {http://arxiv.org/abs/1802.03426},
abstract = {UMAP (Uniform Manifold Approximation and Projection) is a novel manifold learning technique for dimension reduction. UMAP is constructed from a theoretical framework based in Riemannian geometry and algebraic topology. The result is a practical scalable algorithm that applies to real world data. The UMAP algorithm is competitive with t-SNE for visualization quality, and arguably preserves more of the global structure with superior run time performance. Furthermore, UMAP as described has no computational restrictions on embedding dimension, making it viable as a general purpose dimension reduction technique for machine learning.},
urldate = {2018-09-06TZ},
journal = {arXiv:1802.03426 [cs, stat]},
author = {McInnes, Leland and Healy, John},
month = feb,
year = {2018},
note = {arXiv: 1802.03426},
keywords = {Statistics - Machine Learning, Computer Science - Computational Geometry, Computer Science - Machine Learning}
}
@article{wehrens_som_2007,
title = {Self- and {Super}-organizing {Maps} in \textit{{R}} : {The} \textbf{kohonen} {Package}},
volume = {21},
issn = {1548-7660},
shorttitle = {Self- and {Super}-organizing {Maps} in \textit{{R}}},
url = {http://www.jstatsoft.org/v21/i05/},
doi = {10.18637/jss.v021.i05},
language = {en},
number = {5},
urldate = {2018-09-06TZ},
journal = {Journal of Statistical Software},
author = {Wehrens, Ron and Buydens, Lutgarde M. C.},
year = {2007}
}
@book{kohonen_som_2001,
address = {Berlin, Heidelberg},
series = {Springer {Series} in {Information} {Sciences}},
title = {Self-{Organizing} {Maps}},
volume = {30},
isbn = {9783540679219 9783642569272},
url = {http://link.springer.com/10.1007/978-3-642-56927-2},
urldate = {2018-09-06TZ},
publisher = {Springer Berlin Heidelberg},
author = {Kohonen, Teuvo},
editor = {Huang, Thomas S. and Kohonen, Teuvo and Schroeder, Manfred R.},
year = {2001},
doi = {10.1007/978-3-642-56927-2}
}
@phdthesis{wirth_som_2012,
address = {Leipzig},
title = {Analysis of large-scale molecular biological data using self-organizing maps},
author = {Wirth, Henry},
month = jun,
year = {2012}
}
@misc{mcinnes_hdbscan_2017,
title = {hdbscan: {Hierarchical} density based clustering},
shorttitle = {hdbscan},
url = {http://joss.theoj.org},
abstract = {The Journal of Open Source Software, a {\textless}strong{\textgreater}developer friendly{\textless}/strong{\textgreater} journal for research software packages.},
language = {en},
urldate = {2018-09-11TZ},
author = {McInnes, Leland and Healy, John and Astels, Steve},
month = mar,
year = {2017},
doi = {10.21105/joss.00205}
}
@inproceedings{ester_dbscan_1996,
title = {A density-based algorithm for discovering clusters in large spatial databases with noise},
abstract = {Clustering algorithms are attractive for the task of class identification in spatial databases. However, the application to large spatial databases rises the following requirements for clustering algorithms: minimal requirements of domain knowledge to determine the input parameters, discovery of clusters with arbitrary shape and good efficiency on large databases. The well-known clustering algorithms offer no solution to the combination of these requirements. In this paper, we present the new clustering algorithm DBSCAN relying on a density-based notion of clusters which is designed to discover clusters of arbitrary shape. DBSCAN requires only one input parameter and supports the user in determining an appropriate value for it. We performed an experimental evaluation of the effectiveness and efficiency of DBSCAN using synthetic data and real data of the SEQUOIA 2000 benchmark. The results of our experiments demonstrate that (1) DBSCAN is significantly more effective in discovering clusters of arbitrary shape than the well-known algorithm CLAR-ANS, and that (2) DBSCAN outperforms CLARANS by a factor of more than 100 in terms of efficiency.},
publisher = {AAAI Press},
author = {Ester, Martin and Kriegel, Hans-Peter and Sander, Jörg and Xu, Xiaowei},
year = {1996},
pages = {226--231}
}
@article{padovan_compensation_2015,
title = {Single {Mammalian} {Cells} {Compensate} for {Differences} in {Cellular} {Volume} and {DNA} {Copy} {Number} through {Independent} {Global} {Transcriptional} {Mechanisms}},
volume = {58},
issn = {10972765},
url = {http://linkinghub.elsevier.com/retrieve/pii/S1097276515001707},
doi = {10.1016/j.molcel.2015.03.005},
language = {en},
number = {2},
urldate = {2018-09-19TZ},
journal = {Molecular Cell},
author = {Padovan-Merhar, Olivia and Nair, Gautha P. and Biaesch, Andrew G. and Mayer, Andreas and Scarfone, Steven and Foley, Shawn W. and Wu, Angela R. and Churchman, L. Stirling and Singh, Abhyudai and Raj, Arjun},
month = apr,
year = {2015},
pages = {339--352}
}
@article{finak_mast_2015,
title = {{MAST}: a flexible statistical framework for assessing transcriptional changes and characterizing heterogeneity in single-cell {RNA} sequencing data},
volume = {16},
issn = {1474-760X},
shorttitle = {{MAST}},
url = {https://doi.org/10.1186/s13059-015-0844-5},
doi = {10.1186/s13059-015-0844-5},
abstract = {Single-cell transcriptomics reveals gene expression heterogeneity but suffers from stochastic dropout and characteristic bimodal expression distributions in which expression is either strongly non-zero or non-detectable. We propose a two-part, generalized linear model for such bimodal data that parameterizes both of these features. We argue that the cellular detection rate, the fraction of genes expressed in a cell, should be adjusted for as a source of nuisance variation. Our model provides gene set enrichment analysis tailored to single-cell data. It provides insights into how networks of co-expressed genes evolve across an experimental treatment. MAST is available at https://github.com/RGLab/MAST.},
number = {1},
urldate = {2018-09-19TZ},
journal = {Genome Biology},
author = {Finak, Greg and McDavid, Andrew and Yajima, Masanao and Deng, Jingyuan and Gersuk, Vivian and Shalek, Alex K. and Slichter, Chloe K. and Miller, Hannah W. and McElrath, M. Juliana and Prlic, Martin and Linsley, Peter S. and Gottardo, Raphael},
month = dec,
year = {2015},
pages = {278}
}