From cda43e36db217ea8978070d6af179f3e812e72e6 Mon Sep 17 00:00:00 2001 From: Ayushi Agrawal <88406934+ayushi-agrawal-gladstone@users.noreply.github.com> Date: Tue, 11 Apr 2023 18:23:25 -0700 Subject: [PATCH] Update scRNAseq.html --- docs/scRNAseq.html | 740 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 736 insertions(+), 4 deletions(-) diff --git a/docs/scRNAseq.html b/docs/scRNAseq.html index 0729bf5..8bf78cf 100644 --- a/docs/scRNAseq.html +++ b/docs/scRNAseq.html @@ -1,6 +1,738 @@ + + - -

Welcome to Gladstone Bioinformatics Workshops

-

This is the page for single-cell RNA-seq workshop

- + + + + + + + + + + + +Hands-on component of Single-cell RNA-seq workshop: Sessions 1-2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + +

##Introduction

+

We’ll review the current practices for some of the common steps in +scRNA-seq data analysis in the Sessions 1-2. We’ll discuss different +practices for each step and the assumptions underlying various tools and +their limitations. This document provides an exposure to one of the +popular tools for such analysis. As we’ll discuss, the right choice of +method in any application depends on a number of factors, including the +biological systems under study and the characteristics of the data in +hand. For the purpose of our workshop, we’ll limit the hands-on +component to the Seurat package in R. In general, analysis might require +multiple tools in different languages and/or novel development. For +more, please see the slide deck in materials.

+

The following is based on this +vignette from the Seurat developers.

+
+

Setup the working environment

+
library(dplyr)
+library(Seurat)
+library(patchwork)
+
+
+

Load the data

+

Please ensure that the directory named “pbmc3k_data” in the workshop +materials is in the same directory as this .Rmd file.

+
data <- CreateSeuratObject(counts = Read10X("data"),
+                           project = "Hello_scWorld",  #Name this whatever.
+                           min.cells = 3,  # Don't keep genes observed in fewer than 3 cells
+                           min.features = 200,  # Don't keep cells with fewer than 200 genes
+                           names.delim = NULL)  # Don't try and parse the sample names
+
+
+

Filter poor quality or uninteresting cells

+
#Assess percent of mitochondrial counts in each cell 
+data[["percent_mt"]] <- PercentageFeatureSet(object = data, 
+                                             pattern = "^mt-")
+
+#Violin plot
+VlnPlot(object = data, 
+        features = c("nFeature_RNA", 
+                   "nCount_RNA", 
+                   "percent_mt"), 
+        ncol = 3)
+

+
#Other plotting otions
+plot1 <- FeatureScatter(object = data, 
+                        feature1 = "nCount_RNA", 
+                        feature2 = "percent_mt")
+plot2 <- FeatureScatter(object = data,
+                        feature1 = "nCount_RNA", 
+                        feature2 = "nFeature_RNA")
+plot1 + plot2
+

+
#Subset data
+data <- subset(x = data, 
+               subset =  nFeature_RNA > 200 & 
+                 nCount_RNA > 950000 & 
+                 percent_mt < 20)
+
+VlnPlot(object = data, 
+        features = c("nFeature_RNA",
+                   "nCount_RNA",
+                   "percent_mt"),
+        ncol = 3)
+

+
+
+

Normalization

+
data <- NormalizeData(object = data, 
+                      normalization.method = "LogNormalize", 
+                      scale.factor = 10000)
+
+
+

Feature selection

+
data <- FindVariableFeatures( object = data, 
+                              selection.method = "vst", 
+                              nfeatures = 2000)
+
+#View the 10 most highly variable genes
+top10 <- head(x = VariableFeatures(object = data), 10)
+print(top10)
+
##  [1] "Gm10800" "Ccl21a"  "Ccl19"   "Gm21541" "Glycam1" "Gm2564"  "Ccl19.1"
+##  [8] "Gm10801" "Ccl20"   "Gzma"
+
#Seurat allows plotting variable features with and without labels
+plot1 <- VariableFeaturePlot(object = data)
+plot2 <- LabelPoints(plot = plot1, 
+                     points = top10, 
+                     repel = TRUE)
+
## When using repel, set xnudge and ynudge to 0 for optimal results
+
plot1 
+
## Warning: Transformation introduced infinite values in continuous x-axis
+

+
plot2
+
## Warning: Transformation introduced infinite values in continuous x-axis
+

+
+
+

Dimensionality reduction

+
+

Linear dimensionality reduction

+
#By default Seurat only scales the variable features.
+#Explicit input required to rescale all the genes
+scale_genes <- rownames(data)
+data <- ScaleData(object = data, 
+                  features = scale_genes)
+
## Centering and scaling data matrix
+
# Use the highly variable genes to find principal components
+data <- RunPCA(object = data,
+               features = VariableFeatures(object = data),  
+               verbose = FALSE)  
+
+#Examine and visualize PCA results a few different ways
+print(x = data[["pca"]], dims = 1:5, nfeatures = 5)
+
## PC_ 1 
+## Positive:  Serping1, Col3a1, Col1a2, Col1a1, Serpinf1 
+## Negative:  Cd74, Tyrobp, H2-Aa, H2-Ab1, Fcer1g 
+## PC_ 2 
+## Positive:  Plvap, Lrg1, Cldn5, Ctla2a, Fabp4 
+## Negative:  Lyz2, Cd68, Fcer1g, Tyrobp, Lgals3 
+## PC_ 3 
+## Positive:  Nkg7, Ctsw, Icos, Gm26917, Cd27 
+## Negative:  Plvap, Lrg1, Ms4a6d, Lyz2, Tmem88 
+## PC_ 4 
+## Positive:  Slc45a2, Tyrp1, Car14, Dct, Tspan10 
+## Negative:  Cfb, Lrg1, Clu, Gm26905, Plvap 
+## PC_ 5 
+## Positive:  Nkg7, Actb, Ctsw, AW112010, Gzmb 
+## Negative:  Gm11168, Gm26870, Gm10717, Gm21738, Gm10719
+
DimPlot(data, reduction = "pca")
+

+
data <- JackStraw(object = data, num.replicate = 100)
+data <- ScoreJackStraw(object = data, dims = 1:20)
+JackStrawPlot(data, dims = 1:15)
+
## Warning: Removed 21000 rows containing missing values (`geom_point()`).
+

+
ElbowPlot(data)
+

+
VizDimLoadings(object = data, dims = 1:2, reduction = "pca")
+

+
+
+

Nonlinear dimensionality reduction

+
data <- RunUMAP(data, dims = 1:10)
+
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
+## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
+## This message will be shown once per session
+
## 10:18:04 UMAP embedding parameters a = 0.9922 b = 1.112
+
## 10:18:04 Read 5438 rows and found 10 numeric columns
+
## 10:18:04 Using Annoy for neighbor search, n_neighbors = 30
+
## 10:18:04 Building Annoy index with metric = cosine, n_trees = 50
+
## 0%   10   20   30   40   50   60   70   80   90   100%
+
## [----|----|----|----|----|----|----|----|----|----|
+
## **************************************************|
+## 10:18:05 Writing NN index file to temp file /var/folders/1t/y84b3_bs2wq0l8ks9pr15jmw0000gq/T//RtmpEYxVdI/file687b79e58625
+## 10:18:05 Searching Annoy index using 1 thread, search_k = 3000
+## 10:18:06 Annoy recall = 100%
+## 10:18:06 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
+## 10:18:06 Initializing from normalized Laplacian + noise (using irlba)
+## 10:18:07 Commencing optimization for 500 epochs, with 228306 positive edges
+## 10:18:15 Optimization finished
+
data <- RunTSNE(data, dims = 1:10)
+
+DimPlot(data, reduction = "umap")
+

+
DimPlot(data, reduction = "tsne")
+

+
+
+
+

Clustering

+
data <- FindNeighbors(data, dims = 1:10)
+
## Computing nearest neighbor graph
+
## Computing SNN
+
data <- FindClusters(data, resolution = 0.5)
+
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
+## 
+## Number of nodes: 5438
+## Number of edges: 177873
+## 
+## Running Louvain algorithm...
+## Maximum modularity in 10 random starts: 0.9331
+## Number of communities: 18
+## Elapsed time: 0 seconds
+
DimPlot(data, reduction = "umap", label = TRUE) 
+

+
DimPlot(data, reduction = "tsne", label = TRUE)
+

+
DimPlot(data, reduction = "pca", label = TRUE)
+

+
+

Save the Seurat object

+
test <- data[, 1:10 ]
+saveRDS(test, file = "hello_scWorld.rds")
+
+
+
+

Find markers

+
# find all markers of cluster 1
+cluster1.markers <- FindMarkers(data, 
+                                ident.1 = 1, 
+                                min.pct = 0.25)
+head(cluster1.markers, n = 5)
+
##       p_val avg_log2FC pct.1 pct.2 p_val_adj
+## Spi1      0   1.672875 0.914 0.208         0
+## Cybb      0   1.862620 0.857 0.147         0
+## Fcgr1     0   1.924539 0.572 0.034         0
+## Cd68      0   3.266696 0.923 0.191         0
+## Ccl6      0   4.016911 0.819 0.091         0
+
# find all markers distinguishing cluster 5 from clusters 0 and 3
+cluster5.markers <- FindMarkers(data, 
+                                ident.1 = 5, 
+                                ident.2 = c(0, 3), 
+                                min.pct = 0.25)
+head(cluster5.markers,
+     n = 5)
+
##                p_val avg_log2FC pct.1 pct.2     p_val_adj
+## Sparc   0.000000e+00   4.614014 0.927 0.019  0.000000e+00
+## Igfbp7 5.582004e-293   5.709793 0.850 0.021 1.191870e-288
+## Ptrf   3.510770e-272   3.342912 0.774 0.009 7.496196e-268
+## Clu    1.710715e-265   3.763025 0.784 0.016 3.652719e-261
+## Plvap  1.616214e-262   4.448397 0.740 0.004 3.450939e-258
+
# find markers for every cluster compared to all remaining cells, report only the positive ones
+data.markers <- FindAllMarkers(data, 
+                               only.pos = TRUE, 
+                               min.pct = 0.25, 
+                               logfc.threshold = 0.25)
+
## Calculating cluster 0
+
## Calculating cluster 1
+
## Calculating cluster 2
+
## Calculating cluster 3
+
## Calculating cluster 4
+
## Calculating cluster 5
+
## Calculating cluster 6
+
## Calculating cluster 7
+
## Calculating cluster 8
+
## Calculating cluster 9
+
## Calculating cluster 10
+
## Calculating cluster 11
+
## Calculating cluster 12
+
## Calculating cluster 13
+
## Calculating cluster 14
+
## Calculating cluster 15
+
## Calculating cluster 16
+
## Calculating cluster 17
+
data.markers %>%
+  group_by(., cluster) %>% 
+  top_n(., n = 2, wt = avg_log2FC)
+
## # A tibble: 36 × 7
+## # Groups:   cluster [18]
+##        p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene   
+##        <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>  
+##  1 0               2.73 0.735 0.139 0         0       Tcf7   
+##  2 3.68e-180       2.45 0.907 0.55  7.85e-176 0       Vps37b 
+##  3 0               5.82 0.984 0.332 0         1       Lyz2   
+##  4 2.35e-202       4.02 0.407 0.049 5.01e-198 1       Chil3  
+##  5 5.55e-239       2.82 0.811 0.24  1.19e-234 2       Plbd1  
+##  6 1.13e-150       3.19 0.938 0.789 2.42e-146 2       Cst3   
+##  7 0               3.76 0.745 0.112 0         3       Cxcr6  
+##  8 0               3.40 0.838 0.155 0         3       Icos   
+##  9 0               4.46 0.935 0.038 0         4       Gpihbp1
+## 10 0               3.68 0.938 0.104 0         4       Esam   
+## # … with 26 more rows
+
+
+

Additional visualizations

+
VlnPlot(data, features = c("Vps37b", "Tcf7"))
+

+
FeaturePlot(data, features = c("Vps37b", "Tcf7"))
+

+
sessionInfo()
+
## R version 4.1.2 (2021-11-01)
+## Platform: x86_64-apple-darwin17.0 (64-bit)
+## Running under: macOS Big Sur 10.16
+## 
+## Matrix products: default
+## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
+## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
+## 
+## locale:
+## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
+## 
+## attached base packages:
+## [1] stats     graphics  grDevices utils     datasets  methods   base     
+## 
+## other attached packages:
+## [1] patchwork_1.1.2    SeuratObject_4.1.3 Seurat_4.3.0       dplyr_1.0.10      
+## 
+## loaded via a namespace (and not attached):
+##   [1] ggbeeswarm_0.7.1       Rtsne_0.16             colorspace_2.0-3      
+##   [4] deldir_1.0-6           ellipsis_0.3.2         ggridges_0.5.4        
+##   [7] rstudioapi_0.14        spatstat.data_3.0-0    farver_2.1.1          
+##  [10] leiden_0.4.3           listenv_0.9.0          ggrepel_0.9.2         
+##  [13] fansi_1.0.3            codetools_0.2-18       splines_4.1.2         
+##  [16] cachem_1.0.6           knitr_1.41             polyclip_1.10-4       
+##  [19] jsonlite_1.8.4         ica_1.0-3              cluster_2.1.4         
+##  [22] png_0.1-8              uwot_0.1.14            shiny_1.7.4           
+##  [25] sctransform_0.3.5      spatstat.sparse_3.0-0  compiler_4.1.2        
+##  [28] httr_1.4.4             assertthat_0.2.1       Matrix_1.5-1          
+##  [31] fastmap_1.1.0          lazyeval_0.2.2         limma_3.50.3          
+##  [34] cli_3.5.0              later_1.3.0            htmltools_0.5.4       
+##  [37] tools_4.1.2            igraph_1.3.5           gtable_0.3.1          
+##  [40] glue_1.6.2             RANN_2.6.1             reshape2_1.4.4        
+##  [43] Rcpp_1.0.9             scattermore_0.8        jquerylib_0.1.4       
+##  [46] vctrs_0.5.1            nlme_3.1-161           spatstat.explore_3.0-5
+##  [49] progressr_0.12.0       lmtest_0.9-40          spatstat.random_3.0-1 
+##  [52] xfun_0.35              stringr_1.5.0          globals_0.16.2        
+##  [55] mime_0.12              miniUI_0.1.1.1         lifecycle_1.0.3       
+##  [58] irlba_2.3.5.1          goftest_1.2-3          future_1.30.0         
+##  [61] MASS_7.3-58.1          zoo_1.8-11             scales_1.2.1          
+##  [64] promises_1.2.0.1       spatstat.utils_3.0-1   parallel_4.1.2        
+##  [67] RColorBrewer_1.1-3     yaml_2.3.6             reticulate_1.26       
+##  [70] pbapply_1.6-0          gridExtra_2.3          ggrastr_1.0.1         
+##  [73] ggplot2_3.4.0          sass_0.4.4             stringi_1.7.8         
+##  [76] highr_0.9              rlang_1.0.6            pkgconfig_2.0.3       
+##  [79] matrixStats_0.63.0     evaluate_0.19          lattice_0.20-45       
+##  [82] ROCR_1.0-11            purrr_1.0.0            tensor_1.5            
+##  [85] labeling_0.4.2         htmlwidgets_1.6.0      cowplot_1.1.1         
+##  [88] tidyselect_1.2.0       parallelly_1.33.0      RcppAnnoy_0.0.20      
+##  [91] plyr_1.8.8             magrittr_2.0.3         R6_2.5.1              
+##  [94] generics_0.1.3         DBI_1.1.3              withr_2.5.0           
+##  [97] pillar_1.8.1           fitdistrplus_1.1-8     survival_3.4-0        
+## [100] abind_1.4-5            sp_1.5-1               tibble_3.1.8          
+## [103] future.apply_1.10.0    crayon_1.5.2           KernSmooth_2.23-20    
+## [106] utf8_1.2.2             spatstat.geom_3.0-3    plotly_4.10.1         
+## [109] rmarkdown_2.19         grid_4.1.2             data.table_1.14.6     
+## [112] digest_0.6.31          xtable_1.8-4           tidyr_1.2.1           
+## [115] httpuv_1.6.7           munsell_0.5.0          beeswarm_0.4.0        
+## [118] viridisLite_0.4.1      vipor_0.4.5            bslib_0.4.2
+
+
+

THE END.

+
+ + + + +
+ + + + + + + + + + + + + +