diff --git a/single-cell-analysis/Session_3/Session_3/Session3.Rmd b/single-cell-analysis/Session_3/Session_3/Session3.Rmd index 45d86b8..d2c0f4b 100644 --- a/single-cell-analysis/Session_3/Session_3/Session3.Rmd +++ b/single-cell-analysis/Session_3/Session_3/Session3.Rmd @@ -115,7 +115,7 @@ Now, we will perform sctranform based normalization and visualize the results ```{r} data <- SCTransform(data, method="qpoisson", vars.to.regress = NULL) data <- RunPCA(data, verbose = FALSE) -data <- RunTSNE(data, dims = 1:30, verbose = FALSE, method="Flt-SNE", initialization="pca", learning_rate=2400, perplexity=30) +data <- RunTSNE(data, dims = 1:30, verbose = FALSE) data <- FindNeighbors(data, dims = 1:30, verbose = FALSE) data <- FindClusters(data, verbose = FALSE) @@ -124,10 +124,8 @@ DimPlot(data, reduction = "tsne", group.by = "Individual") DimPlot(data, reduction = "tsne", group.by = "Time") DimPlot(data, reduction = "tsne", group.by = "batch") -## Note there still appears to be an association of PC1 with nFeature_RNA -FeatureScatter(object=data, feature1="nFeature_RNA", feature2="PC_1") -FeaturePlot(object = data, features = "nFeature_RNA") -FeaturePlot(object = data, features = "PC_1") +# ## Note there still appears to be an association of PC1 with nFeature_RNA +# FeatureScatter(object=data, feature1="nFeature_RNA", feature2="PC_1") ``` ##Find marker genes for each cluster @@ -144,19 +142,56 @@ head(MarkersRes1) ## Note: this is not the recommended approach to do this since instead of individual mice being treated as replicates, the cells are being treated as such cluster0_data <- subset(x=data, subset=(seurat_clusters==0)) MarkersRes0 <- FindMarkers(cluster0_data, ident.1 = "5 day", group.by = "Time", assay = "SCT",slot = "data", test.use = "wilcox", return.thresh = 1e-6) -VlnPlot(cluster0_data, features = "Tac1-17705", group.by = "Time") +cluster0_data@meta.data$Time <- as.factor(cluster0_data@meta.data$Time) +cluster0_data@meta.data$Time <- relevel(cluster0_data@meta.data$Time, ref="11 day") VlnPlot(cluster0_data, features = "Gm10116-5887", group.by = "Time") VlnPlot(cluster0_data, features = "Hspa1l-9322", group.by = "Time") -## Find genes associated with time in cluster 1. -## Note: this is not the recommended approach to do this since instead of individual mice being treated as replicates, the cells are being treated as such -cluster1_data <- subset(x=data, subset=(seurat_clusters==1)) -MarkersRes1 <- FindMarkers(cluster1_data, ident.1 = "5 day", group.by = "Time", assay = "SCT",slot = "data", test.use = "wilcox", return.thresh = 1e-6) -VlnPlot(cluster1_data, features = "Il1rl1-185", group.by = "Time") ``` +## Batch correction +Below, we will go over code to remove potential batch effects in the data using the IntegrateData function in Seurat 3. We will use the same data as before, assuming that the data was generated in two batches. It was not, :). But lets pretend anyway. +```{r} +## Let us visualize the uncorrected data +DimPlot(data, reduction = "tsne", group.by = "Individual") +DimPlot(data, reduction = "tsne", group.by = "Time") +DimPlot(data, reduction = "tsne", group.by = "batch") + +## Split the cells between the two batches +batch.list <- SplitObject(data, split.by = "batch") + +## Normalize the read counts across cells within the same bench +for (i in 1:length(batch.list)) { + batch.list[[i]] <- SCTransform(batch.list[[i]], verbose = FALSE, method="qpoisson", vars.to.regress = NULL) +} + +## Select features to identify anchors +batch.features <- SelectIntegrationFeatures(object.list = batch.list, nfeatures = 3000) +batch.list <- PrepSCTIntegration(object.list = batch.list, anchor.features = batch.features, + verbose = FALSE) +## Find the anchors +batch.anchors <- FindIntegrationAnchors(object.list = batch.list, normalization.method = "SCT", + anchor.features = batch.features, verbose = FALSE, k.filter = 10) +## Integrate the data +batch.integrated <- IntegrateData(anchorset = batch.anchors, normalization.method = "SCT", + verbose = FALSE) + +## Let us visualize the "corrected" data +batch.integrated <- RunPCA(batch.integrated, verbose = FALSE) +batch.integrated <- RunTSNE(batch.integrated, dims = 1:30, verbose = FALSE) + +batch.integrated <- FindNeighbors(batch.integrated, dims = 1:30, verbose = FALSE) +batch.integrated <- FindClusters(batch.integrated, verbose = FALSE) +DimPlot(batch.integrated, label = TRUE, reduction = "tsne") +DimPlot(batch.integrated, reduction = "tsne", group.by = "Individual") +DimPlot(batch.integrated, reduction = "tsne", group.by = "Time") +DimPlot(batch.integrated, reduction = "tsne", group.by = "batch") + +``` + + ## Multi-sample multi-condition comparison We will aggregate the counts across cells from each mouse within each cluster so that now we will be able to perform a pseudo-bulk RNA-seq differential expression separtely within each cluster. Note, we will be able to treat individual mice as replicates with this analyses. For the analyses in this section, we are moving away from Seurat. Hence we need to create a new single-cell RNA-seq object that the typical bioconductor package will recognise. @@ -173,6 +208,7 @@ data[["sIndividual"]] <- (data@meta.data$Individual) %>% as.character() %>% paste0("Individual_",.) +head(data@meta.data) ## Store the meta-data for each cell in the PhenoData object PhenoData <- data@meta.data @@ -296,45 +332,6 @@ print(ClusterRes) ``` -## Batch correction -Below, we will go over code to remove potential batch effects in the data using the IntegrateData function in Seurat 3. We will use the same data as before, assuming that the data was generated in two batches. It was not, :). But lets pretend anyway. -```{r} -## Let us visualize the uncorrected data -DimPlot(data, reduction = "tsne", group.by = "Individual") -DimPlot(data, reduction = "tsne", group.by = "Time") -DimPlot(data, reduction = "tsne", group.by = "batch") - -## Split the cells between the two batches -batch.list <- SplitObject(data, split.by = "batch") - -## Normalize the read counts across cells within the same bench -for (i in 1:length(batch.list)) { - batch.list[[i]] <- SCTransform(batch.list[[i]], verbose = FALSE, method="qpoisson", vars.to.regress = NULL) -} - -## Select features to identify anchors -batch.features <- SelectIntegrationFeatures(object.list = batch.list, nfeatures = 3000) -batch.list <- PrepSCTIntegration(object.list = batch.list, anchor.features = batch.features, - verbose = FALSE) -## Find the anchors -batch.anchors <- FindIntegrationAnchors(object.list = batch.list, normalization.method = "SCT", - anchor.features = batch.features, verbose = FALSE, k.filter = 10) -## Integrate the data -batch.integrated <- IntegrateData(anchorset = batch.anchors, normalization.method = "SCT", - verbose = FALSE) - -## Let us visualize the "corrected" data -batch.integrated <- RunPCA(batch.integrated, verbose = FALSE) -batch.integrated <- RunTSNE(batch.integrated, dims = 1:30, verbose = FALSE) - -batch.integrated <- FindNeighbors(batch.integrated, dims = 1:30, verbose = FALSE) -batch.integrated <- FindClusters(batch.integrated, verbose = FALSE) -DimPlot(batch.integrated, label = TRUE, reduction = "tsne") -DimPlot(batch.integrated, reduction = "tsne", group.by = "Individual") -DimPlot(batch.integrated, reduction = "tsne", group.by = "Time") -DimPlot(batch.integrated, reduction = "tsne", group.by = "batch") - -``` ```{r} sessionInfo() ``` diff --git a/single-cell-analysis/Session_3/Session_3/Session3.html b/single-cell-analysis/Session_3/Session_3/Session3.html index 54a2823..2ff750c 100644 --- a/single-cell-analysis/Session_3/Session_3/Session3.html +++ b/single-cell-analysis/Session_3/Session_3/Session3.html @@ -200,7 +200,7 @@ suppressMessages(library(muscat)) suppressMessages(library(SummarizedExperiment)) suppressMessages(library(tidyverse)) suppressMessages(library(magrittr)) -# suppressMessages(library(snifter)) + raw_data <- read.csv("rawCounts.csv", header = T) @@ -429,13 +429,13 @@ print(sprintf("After filtering outliers: %d cells and %d genes", ncol( | |======================================================================| 100%
## Calculating gene attributes
-
## Wall clock passed: Time difference of 3.402095 secs
+
## Wall clock passed: Time difference of 3.042193 secs
## Determine variable features
## Place corrected count matrix in counts slot
## Centering data matrix
## Set default assay to SCT
data <- RunPCA(data, verbose = FALSE)
-data <- RunTSNE(data, dims = 1:30, verbose = FALSE, method="Flt-SNE", initialization="pca", learning_rate=2400, perplexity=30)
+data <- RunTSNE(data, dims = 1:30, verbose = FALSE)
 
 data <- FindNeighbors(data, dims = 1:30, verbose = FALSE)
 data <- FindClusters(data, verbose = FALSE)
@@ -447,13 +447,9 @@ DimPlot(data, label = TRUE, reduction = "tsne") 

DimPlot(data, reduction = "tsne", group.by = "batch")

-
## Note there still appears to be an association of PC1 with nFeature_RNA
-FeatureScatter(object=data, feature1="nFeature_RNA", feature2="PC_1")
-

-
FeaturePlot(object = data, features = "nFeature_RNA")
-

-
FeaturePlot(object = data, features = "PC_1")
-

##Find marker genes for each cluster We will find markers using the Wilcoxon two-sample test.

+
# ## Note there still appears to be an association of PC1 with nFeature_RNA
+# FeatureScatter(object=data, feature1="nFeature_RNA", feature2="PC_1")
+

##Find marker genes for each cluster We will find markers using the Wilcoxon two-sample test.

## Find markers for each of the 5 clusters
 # MarkersRes <- FindAllMarkers(data, assay = "SCT", slot = "data", test.use = "wilcox", return.thresh = 1e-6)
 
@@ -471,25 +467,61 @@ head(MarkersRes1)
## Note: this is not the recommended approach to do this since instead of individual mice being treated as replicates, the cells are being treated as such cluster0_data <- subset(x=data, subset=(seurat_clusters==0)) MarkersRes0 <- FindMarkers(cluster0_data, ident.1 = "5 day", group.by = "Time", assay = "SCT",slot = "data", test.use = "wilcox", return.thresh = 1e-6) -VlnPlot(cluster0_data, features = "Tac1-17705", group.by = "Time") -

-
VlnPlot(cluster0_data, features = "Gm10116-5887", group.by = "Time")
+VlnPlot(cluster0_data, features = "Gm10116-5887", group.by = "Time")

VlnPlot(cluster0_data, features = "Hspa1l-9322", group.by = "Time")

-
## Find genes associated with time in cluster 1.
-## Note: this is not the recommended approach to do this since instead of individual mice being treated as replicates, the cells are being treated as such
-cluster1_data <- subset(x=data, subset=(seurat_clusters==1))
-MarkersRes1 <- FindMarkers(cluster1_data, ident.1 = "5 day", group.by = "Time", assay = "SCT",slot = "data", test.use = "wilcox", return.thresh = 1e-6)
-VlnPlot(cluster1_data, features = "Il1rl1-185", group.by = "Time")
-

+ +
+

Batch correction

+

Below, we will go over code to remove potential batch effects in the data using the IntegrateData function in Seurat 3. We will use the same data as before, assuming that the data was generated in two batches. It was not, :). But lets pretend anyway.

+
## Let us visualize the uncorrected data
+DimPlot(data, reduction = "tsne", group.by = "Individual")
+

+
DimPlot(data, reduction = "tsne", group.by = "Time")
+

+
DimPlot(data, reduction = "tsne", group.by = "batch")
+

+
## Split the cells between the two batches
+batch.list <- SplitObject(data, split.by = "batch")
+
+## Normalize the read counts across cells within the same bench
+for (i in 1:length(batch.list)) {
+  batch.list[[i]] <- SCTransform(batch.list[[i]], verbose = FALSE, method="qpoisson", vars.to.regress = NULL)
+}
+
+## Select features to identify anchors
+batch.features <- SelectIntegrationFeatures(object.list = batch.list, nfeatures = 3000)
+batch.list <- PrepSCTIntegration(object.list = batch.list, anchor.features = batch.features, 
+                                    verbose = FALSE)
+## Find the anchors
+batch.anchors <- FindIntegrationAnchors(object.list = batch.list, normalization.method = "SCT", 
+                                           anchor.features = batch.features, verbose = FALSE, k.filter  = 10)
+## Integrate the data
+batch.integrated <- IntegrateData(anchorset = batch.anchors, normalization.method = "SCT", 
+                                     verbose = FALSE)
+
+## Let us visualize the "corrected" data
+batch.integrated <- RunPCA(batch.integrated, verbose = FALSE)
+batch.integrated <- RunTSNE(batch.integrated, dims = 1:30, verbose = FALSE)
+
+batch.integrated <- FindNeighbors(batch.integrated, dims = 1:30, verbose = FALSE)
+batch.integrated <- FindClusters(batch.integrated, verbose = FALSE)
+DimPlot(batch.integrated, label = TRUE, reduction = "tsne") 
+

+
DimPlot(batch.integrated, reduction = "tsne", group.by = "Individual")
+

+
DimPlot(batch.integrated, reduction = "tsne", group.by = "Time")
+

+
DimPlot(batch.integrated, reduction = "tsne", group.by = "batch")
+

Multi-sample multi-condition comparison

We will aggregate the counts across cells from each mouse within each cluster so that now we will be able to perform a pseudo-bulk RNA-seq differential expression separtely within each cluster. Note, we will be able to treat individual mice as replicates with this analyses. For the analyses in this section, we are moving away from Seurat. Hence we need to create a new single-cell RNA-seq object that the typical bioconductor package will recognise.

Within -cluster comparison

-

The bioconductor package muscat will help us with this analysis. I had to slightly modify the primary function in this package (pbDS) to work with this data. The modified version (that you have downloaded) is what I sourced at the start of this document. I illustrate below how to set up the design matrix to perform the differential expression analyses. I prefer this approach as opposed to typical approach to assuming a two condition comparison. This way you have a lot of flexibility in modeling more complex design with more than one variable along with interactions between variables of interest.

+

The bioconductor package muscat will help us with this analysis. I illustrate below how to set up the design matrix to perform the differential expression analyses. I prefer this approach as opposed to typical approach to assuming a two condition comparison. This way you have a lot of flexibility in modeling more complex design with more than one variable along with interactions between variables of interest.

## Add modified names for the Time and Individual variable to make them work "nice" with the subsequent analysis in this code
 data[["sTime"]] <- (data@meta.data$Time) %>%
   as.character() %>%
@@ -499,7 +531,36 @@ data[["sIndividual"]] <- (data@meta.data$Individual) %>%
   as.character() %>%
   paste0("Individual_",.)
 
-## Store the meta-data for each cell in the PhenoData object
+head(data@meta.data)
+
##             orig.ident nCount_RNA nFeature_RNA       FacsMarker Individual
+## 22028_3_121          2     312643         4457 CD45- GFP+ CD31-       1197
+## 22028_3_123          2     368024         1639 CD45- GFP+ CD31-       1197
+## 22028_3_125          2    1069902         4187 CD45- GFP+ CD31-       1197
+## 22028_3_131          2    1401179         6841 CD45- GFP+ CD31-       1197
+## 22028_3_133          2    1365745         5857 CD45- GFP+ CD31-       1197
+## 22028_3_135          2    1163445         5613 CD45- GFP+ CD31-       1197
+##                         InferredCellType Organ SamplingSite  Time  batch
+## 22028_3_121 cancer associated fibroblast  skin        tumor 5 day batch1
+## 22028_3_123 cancer associated fibroblast  skin        tumor 5 day batch1
+## 22028_3_125 cancer associated fibroblast  skin        tumor 5 day batch1
+## 22028_3_131 cancer associated fibroblast  skin        tumor 5 day batch1
+## 22028_3_133 cancer associated fibroblast  skin        tumor 5 day batch1
+## 22028_3_135 cancer associated fibroblast  skin        tumor 5 day batch1
+##             percent_mt nCount_SCT nFeature_SCT SCT_snn_res.0.8 seurat_clusters
+## 22028_3_121   1.181859     617938         4625               0               0
+## 22028_3_123   3.632644     615456         1823               0               0
+## 22028_3_125   0.824468     624177         3871               0               0
+## 22028_3_131   1.430081     627706         6403               1               1
+## 22028_3_133   1.262534     626974         5209               1               1
+## 22028_3_135   2.624447     627235         5040               0               0
+##              sTime     sIndividual
+## 22028_3_121 X5_day Individual_1197
+## 22028_3_123 X5_day Individual_1197
+## 22028_3_125 X5_day Individual_1197
+## 22028_3_131 X5_day Individual_1197
+## 22028_3_133 X5_day Individual_1197
+## 22028_3_135 X5_day Individual_1197
+
## Store the meta-data for each cell in the PhenoData object
 PhenoData <- data@meta.data
 
 ## For this analysis we are moving away from Seurat. Hence we need to create a new single-cell RNA-seq object that the typical bioconductor package will recognise.
@@ -727,53 +788,8 @@ print(ClusterRes)
## Cluster2 -0.2263598 0.1681710 -1.3460100 0.1782993 0.4041632 ## Cluster3 -0.1638158 0.1649122 -0.9933519 0.3205385 0.4041632 ## Cluster4 -0.2099656 0.1868392 -1.1237771 0.2611076 0.4041632 -
-
-
-

Batch correction

-

Below, we will go over code to remove potential batch effects in the data using the IntegrateData function in Seurat 3. We will use the same data as before, assuming that the data was generated in two batches. It was not, :). But lets pretend anyway.

-
## Let us visualize the uncorrected data
-DimPlot(data, reduction = "tsne", group.by = "Individual")
-

-
DimPlot(data, reduction = "tsne", group.by = "Time")
-

-
DimPlot(data, reduction = "tsne", group.by = "batch")
-

-
## Split the cells between the two batches
-batch.list <- SplitObject(data, split.by = "batch")
-
-## Normalize the read counts across cells within the same bench
-for (i in 1:length(batch.list)) {
-  batch.list[[i]] <- SCTransform(batch.list[[i]], verbose = FALSE, method="qpoisson", vars.to.regress = NULL)
-}
-
-## Select features to identify anchors
-batch.features <- SelectIntegrationFeatures(object.list = batch.list, nfeatures = 3000)
-batch.list <- PrepSCTIntegration(object.list = batch.list, anchor.features = batch.features, 
-                                    verbose = FALSE)
-## Find the anchors
-batch.anchors <- FindIntegrationAnchors(object.list = batch.list, normalization.method = "SCT", 
-                                           anchor.features = batch.features, verbose = FALSE, k.filter  = 10)
-## Integrate the data
-batch.integrated <- IntegrateData(anchorset = batch.anchors, normalization.method = "SCT", 
-                                     verbose = FALSE)
-
-## Let us visualize the "corrected" data
-batch.integrated <- RunPCA(batch.integrated, verbose = FALSE)
-batch.integrated <- RunTSNE(batch.integrated, dims = 1:30, verbose = FALSE)
-
-batch.integrated <- FindNeighbors(batch.integrated, dims = 1:30, verbose = FALSE)
-batch.integrated <- FindClusters(batch.integrated, verbose = FALSE)
-DimPlot(batch.integrated, label = TRUE, reduction = "tsne") 
-

-
DimPlot(batch.integrated, reduction = "tsne", group.by = "Individual")
-

-
DimPlot(batch.integrated, reduction = "tsne", group.by = "Time")
-

-
DimPlot(batch.integrated, reduction = "tsne", group.by = "batch")
-

sessionInfo()
-
## R version 4.1.1 (2021-08-10)
+
## R version 4.1.3 (2022-03-10)
 ## Platform: x86_64-apple-darwin17.0 (64-bit)
 ## Running under: macOS Catalina 10.15.7
 ## 
@@ -785,117 +801,118 @@ DimPlot(batch.integrated, label = TRUE, reduction = "tsne") 

+## [183] gtable_0.3.0 spatstat.core_2.4-0
+
diff --git a/single-cell-analysis/Session_3/Session_3/Sessions_3.pptx b/single-cell-analysis/Session_3/Session_3/Sessions_3.pptx index 7e8dcb8..d653363 100644 Binary files a/single-cell-analysis/Session_3/Session_3/Sessions_3.pptx and b/single-cell-analysis/Session_3/Session_3/Sessions_3.pptx differ diff --git a/single-cell-analysis/Session_3/Session_3/~$Sessions_3.pptx b/single-cell-analysis/Session_3/Session_3/~$Sessions_3.pptx deleted file mode 100644 index 138f1a0..0000000 Binary files a/single-cell-analysis/Session_3/Session_3/~$Sessions_3.pptx and /dev/null differ