mirror of
https://github.com/gladstone-institutes/Bioinformatics-Workshops.git
synced 2025-11-30 09:45:43 -08:00
Add script and data
This commit is contained in:
parent
f277a26ccb
commit
b030129659
6 changed files with 778 additions and 0 deletions
BIN
single-cell-analysis/Sessions_1-2/data/barcodes.tsv.gz
Normal file
BIN
single-cell-analysis/Sessions_1-2/data/barcodes.tsv.gz
Normal file
Binary file not shown.
BIN
single-cell-analysis/Sessions_1-2/data/features.tsv.gz
Normal file
BIN
single-cell-analysis/Sessions_1-2/data/features.tsv.gz
Normal file
Binary file not shown.
BIN
single-cell-analysis/Sessions_1-2/data/matrix.mtx.gz
Normal file
BIN
single-cell-analysis/Sessions_1-2/data/matrix.mtx.gz
Normal file
Binary file not shown.
205
single-cell-analysis/Sessions_1-2/hands_on_component.Rmd
Normal file
205
single-cell-analysis/Sessions_1-2/hands_on_component.Rmd
Normal file
|
|
@ -0,0 +1,205 @@
|
|||
---
|
||||
title: "Hands-on component of Single-cell RNA-seq workshop: Sessions 1-2"
|
||||
author: "Krishna Choudhary"
|
||||
date: "11/3/2020"
|
||||
output: html_document
|
||||
---
|
||||
|
||||
```{r setup, include=FALSE}
|
||||
knitr::opts_chunk$set(echo = TRUE)
|
||||
```
|
||||
|
||||
##Introduction
|
||||
|
||||
We'll review the current practices for some of the common steps in scRNA-seq data analysis during the Sessions 1-2. We'll discuss different practices for each step and the assumptions underlying various tools and their limitations. This document provides an exposure to one of the popular tools for such analysis. As we'll discuss, the right choice of method in any application depends on a number of factors, including the biological systems under study and the characteristics of the data in hand. For the purpose of our workshop, we'll limit the hands-on component to the Seurat package in R. In general, analysis might require multiple tools in different languages and/or novel development. For more, please see the slide deck in materials.
|
||||
|
||||
The following is based on [this vignette](https://satijalab.org/seurat/v3.2/pbmc3k_tutorial.html) from the Seurat developers.
|
||||
|
||||
## Setup the working environment
|
||||
|
||||
```{r message=FALSE, warning=FALSE}
|
||||
library(tidyverse)
|
||||
library(Seurat)
|
||||
```
|
||||
|
||||
## Load the data
|
||||
|
||||
Please ensure that the directory named "data" in the workshop materials is in the current working directory.
|
||||
|
||||
```{r}
|
||||
data <- CreateSeuratObject(counts = Read10X("data"),
|
||||
project = "Hello_scWorld", #Name this whatever.
|
||||
min.cells = 3, # Don't keep genes observed in fewer than 3 cells
|
||||
min.features = 200, # Don't keep cells with fewer than 200 genes
|
||||
names.delim = NULL) # Don't try and parse the sample names
|
||||
|
||||
```
|
||||
|
||||
## Filter poor quality or uninteresting cells
|
||||
|
||||
```{r}
|
||||
#Assess percent of mitochondrial counts in each cell
|
||||
data[["percent_mt"]] <- PercentageFeatureSet(object = data,
|
||||
pattern = "^mt-")
|
||||
|
||||
#Violin plot
|
||||
VlnPlot(object = data,
|
||||
features = c("nFeature_RNA",
|
||||
"nCount_RNA",
|
||||
"percent_mt"),
|
||||
ncol = 3)
|
||||
|
||||
#Ridge plot
|
||||
RidgePlot(object = data,
|
||||
features = c("nFeature_RNA",
|
||||
"percent_mt"),
|
||||
ncol = 2)
|
||||
|
||||
#Other plotting otions
|
||||
plot1 <- FeatureScatter(object = data,
|
||||
feature1 = "nCount_RNA",
|
||||
feature2 = "percent_mt")
|
||||
plot2 <- FeatureScatter(object = data,
|
||||
feature1 = "nCount_RNA",
|
||||
feature2 = "nFeature_RNA")
|
||||
plot1 + plot2
|
||||
|
||||
#Subset data
|
||||
data <- subset(x = data,
|
||||
subset = nFeature_RNA > 200 &
|
||||
nCount_RNA > 950000 &
|
||||
percent_mt < 20)
|
||||
|
||||
VlnPlot(object = data,
|
||||
features = c("nFeature_RNA",
|
||||
"nCount_RNA",
|
||||
"percent_mt"),
|
||||
ncol = 3)
|
||||
|
||||
```
|
||||
|
||||
## Normalization
|
||||
|
||||
```{r}
|
||||
data <- NormalizeData(object = data,
|
||||
normalization.method = "LogNormalize",
|
||||
scale.factor = 10000)
|
||||
|
||||
```
|
||||
|
||||
## Feature selection
|
||||
|
||||
```{r}
|
||||
data <- FindVariableFeatures( object = data,
|
||||
selection.method = "vst",
|
||||
nfeatures = 2000)
|
||||
|
||||
#View the 10 most highly variable genes
|
||||
top10 <- head(x = VariableFeatures(object = data), 10)
|
||||
print(top10)
|
||||
|
||||
#Seurat allows plotting variable features with and without labels
|
||||
plot1 <- VariableFeaturePlot(object = data)
|
||||
plot2 <- LabelPoints(plot = plot1,
|
||||
points = top10,
|
||||
repel = TRUE)
|
||||
print(plot2)
|
||||
|
||||
```
|
||||
|
||||
## Dimensionality reduction
|
||||
|
||||
### Linear dimensionality reduction
|
||||
```{r}
|
||||
#By default Seurat only scales the variable features.
|
||||
#Explicit input required to rescale all the genes
|
||||
scale_genes <- rownames(data)
|
||||
data <- ScaleData(object = data,
|
||||
features = scale_genes)
|
||||
|
||||
# Use the highly variable genes to find principal components
|
||||
data <- RunPCA(object = data,
|
||||
features = VariableFeatures(object = data),
|
||||
verbose = FALSE)
|
||||
|
||||
#Examine and visualize PCA results a few different ways
|
||||
print(x = data[["pca"]], dims = 1:5, nfeatures = 5)
|
||||
|
||||
DimPlot(data, reduction = "pca")
|
||||
|
||||
data <- JackStraw(object = data, num.replicate = 10)
|
||||
data <- ScoreJackStraw(object = data, dims = 1:20)
|
||||
JackStrawPlot(data, dims = 1:15)
|
||||
|
||||
ElbowPlot(data)
|
||||
|
||||
VizDimLoadings(object = data, dims = 1:2, reduction = "pca")
|
||||
|
||||
```
|
||||
|
||||
### Nonlinear dimensionality reduction
|
||||
```{r}
|
||||
data <- RunUMAP(data, dims = 1:10)
|
||||
data <- RunTSNE(data, dims = 1:10)
|
||||
|
||||
DimPlot(data, reduction = "umap")
|
||||
DimPlot(data, reduction = "tsne")
|
||||
```
|
||||
|
||||
## Clustering
|
||||
```{r}
|
||||
data <- FindNeighbors(data, dims = 1:10)
|
||||
data <- FindClusters(data, resolution = 0.5)
|
||||
|
||||
DimPlot(data, reduction = "umap", label = TRUE)
|
||||
DimPlot(data, reduction = "tsne", label = TRUE)
|
||||
DimPlot(data, reduction = "pca", label = TRUE)
|
||||
```
|
||||
|
||||
### Save the Seurat object
|
||||
```{r}
|
||||
test <- data[, 1:10 ]
|
||||
saveRDS(test, file = "hello_scWorld.rds")
|
||||
```
|
||||
|
||||
## Find markers
|
||||
```{r}
|
||||
# find all markers of cluster 1
|
||||
cluster1.markers <- FindMarkers(data,
|
||||
ident.1 = 1,
|
||||
min.pct = 0.25)
|
||||
head(cluster1.markers, n = 5)
|
||||
|
||||
# find all markers distinguishing cluster 5 from clusters 0 and 3
|
||||
cluster5.markers <- FindMarkers(data,
|
||||
ident.1 = 5,
|
||||
ident.2 = c(0, 3),
|
||||
min.pct = 0.25)
|
||||
head(cluster5.markers,
|
||||
n = 5)
|
||||
|
||||
# find markers for every cluster compared to all remaining cells, report only the positive ones
|
||||
data.markers <- FindAllMarkers(data,
|
||||
only.pos = TRUE,
|
||||
min.pct = 0.25,
|
||||
logfc.threshold = 0.25)
|
||||
data.markers %>%
|
||||
group_by(cluster) %>%
|
||||
top_n(n = 2, wt = avg_logFC)
|
||||
|
||||
```
|
||||
|
||||
## Additional visualizations
|
||||
|
||||
```{r}
|
||||
VlnPlot(data, features = c("Vps37b", "Tcf7"))
|
||||
|
||||
FeaturePlot(data, features = c("Vps37b", "Tcf7"))
|
||||
```
|
||||
|
||||
```{r}
|
||||
sessionInfo()
|
||||
```
|
||||
|
||||
## THE END.
|
||||
|
||||
573
single-cell-analysis/Sessions_1-2/hands_on_component.html
Normal file
573
single-cell-analysis/Sessions_1-2/hands_on_component.html
Normal file
File diff suppressed because one or more lines are too long
BIN
single-cell-analysis/Sessions_1-2/hello_scWorld.rds
Normal file
BIN
single-cell-analysis/Sessions_1-2/hello_scWorld.rds
Normal file
Binary file not shown.
Loading…
Add table
Add a link
Reference in a new issue