Extract expression matrix for specific cluster and group in integrated analysis

Hi, I have eight samples (AW1 to AW8), these represent four experimental groups, two biological replicates in each group (T1 to T4; T1=AW1+AW2, T2=AW3+AW4, T3=AW5+AW6, T4=AW7+AW8). I have run an integrated analysis on all the samples and want to compare gene expression between the clusters. I plan to use MAST, which needs raw counts - so I want to export raw count data from group within each cluster.

I have tried using this command, which I expected to generate a matrix of raw RNA expression data for the T1 sample point at cluster 5:

raw.data <- as.matrix(
  GetIntegrationData(
    Timepoints.combined, 
    integration.name="T1", 
    slot = "counts")[, WhichCells(Timepoints.combined, ident = "5")]
)

but get the response:

Error in GetIntegrationData(Timepoints.combined, integration.name = "T1",  : 
  Requested integration key does not exist

Any assistance with this, or another way to export raw data from integrated datasets would be greatly appreciated. Cheers, Alex

I post the rest of my script below:

library(plyr)
library(dplyr)
library(Seurat)
library(cowplot)

# Load the datasets, must have different project names to be able to separate them later on
AW1.data <- Read10X(data.dir = "C:\\snRNAseq\\Single sample analysis\\AW1")
AW1 <- CreateSeuratObject(counts = AW1.data, project = "T1",min.cells = 3, min.features = 200)
AW2.data <- Read10X(data.dir = "C:\\snRNAseq\\Single sample analysis\\AW2")
AW2 <- CreateSeuratObject(counts = AW2.data, project = "T1",min.cells = 3, min.features = 200)
AW3.data <- Read10X(data.dir = "C:\\snRNAseq\\Single sample analysis\\AW3")
AW3 <- CreateSeuratObject(counts = AW3.data, project = "T2",min.cells = 3, min.features = 200)
AW4.data <- Read10X(data.dir = "C:\\snRNAseq\\Single sample analysis\\AW4")
AW4 <- CreateSeuratObject(counts = AW4.data, project = "T2",min.cells = 3, min.features = 200)
AW5.data <- Read10X(data.dir = "C:\\snRNAseq\\Single sample analysis\\AW5")
AW5 <- CreateSeuratObject(counts = AW5.data, project = "T3",min.cells = 3, min.features = 200)
AW6.data <- Read10X(data.dir = "C:\\snRNAseq\\Single sample analysis\\AW6")
AW6 <- CreateSeuratObject(counts = AW6.data, project = "T3",min.cells = 3, min.features = 200)
AW7.data <- Read10X(data.dir = "C:\\snRNAseq\\Single sample analysis\\AW7")
AW7 <- CreateSeuratObject(counts = AW7.data, project = "T4",min.cells = 3, min.features = 200)
AW8.data <- Read10X(data.dir = "C:\\snRNAseq\\Single sample analysis\\AW8")
AW8 <- CreateSeuratObject(counts = AW8.data, project = "T4",min.cells = 3, min.features = 200)

AW1 <- subset(AW1, subset = nFeature_RNA > 200 & nFeature_RNA < 2500)
AW2 <- subset(AW2, subset = nFeature_RNA > 200 & nFeature_RNA < 2500)
AW3 <- subset(AW3, subset = nFeature_RNA > 200 & nFeature_RNA < 2500)
AW4 <- subset(AW4, subset = nFeature_RNA > 200 & nFeature_RNA < 2500)
AW5 <- subset(AW5, subset = nFeature_RNA > 200 & nFeature_RNA < 2500)
AW6 <- subset(AW6, subset = nFeature_RNA > 200 & nFeature_RNA < 2500)
AW7 <- subset(AW7, subset = nFeature_RNA > 200 & nFeature_RNA < 2500)
AW8 <- subset(AW8, subset = nFeature_RNA > 200 & nFeature_RNA < 2500)

#Merge Datasets
# # to merge more than two objects, pass one to x and a list of objects to y
# merge(x = pbmc_small, y = c(pbmc_small, pbmc_small))
T1 <- merge(AW1, y = AW2, add.cell.ids = c("AW1","AW2"), project = "Gill")
T2 <- merge(AW3, y = AW4, add.cell.ids = c("AW3","AW4"), project = "Gill")
T3 <- merge(AW5, y = AW6, add.cell.ids = c("AW5","AW6"), project = "Gill")
T4 <- merge(AW7, y = AW8, add.cell.ids = c("AW7","AW8"), project = "Gill")

Combined <- merge(T1, y = c(T2,T3,T4), add.cell.ids = c("T1","T2","T3","T4"), project = "All")
Combined
# An object of class Seurat 
# 34514 features across 18844 samples within 1 assay 
# Active assay: RNA (34514 features)

#Check tops and bottom, also IDs
head(colnames(Combined))
tail(colnames(Combined))
table(Combined$orig.ident)
# T1   T2   T3   T4 
# 4539 4876 4873 4556 

#Split up object 
Timepoints <- SplitObject(Combined, split.by="orig.ident")

#normalise and ID variable features
Timepoints.list <- lapply(X = Timepoints, FUN = function(x) {
  x <- NormalizeData(x,normalization.method = "LogNormalize", scale.factor = 10000)
  x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

#ID anchors between datasets which takes a list of Seurat objects as input, and use 
#these anchors to integrate the two datasets together with IntegrateData.
#dims defines the number of dimensions used from the CCA (canonical correlation analysis)
#see more in stuwart and butler paper
Gill.anchors <- FindIntegrationAnchors(object.list = Timepoints.list, dims = 1:20)
#might need to increase memory limit in r using:
# memory.size()
# memory.limit(size=NA)
memory.limit(size=20000)
Timepoints.combined <- IntegrateData(anchorset = Gill.anchors, dims = 1:20)
DefaultAssay(Timepoints.combined) <- "integrated"

Timepoints.combined <- ScaleData(Timepoints.combined, verbose = FALSE)
Timepoints.combined <- RunPCA(Timepoints.combined, npcs = 30, verbose = FALSE)
# UMAP and Clustering
Timepoints.combined <- RunUMAP(Timepoints.combined, reduction = "pca", dims = 1:20)
Timepoints.combined <- FindNeighbors(Timepoints.combined, reduction = "pca", dims = 1:20)
Timepoints.combined <- FindClusters(Timepoints.combined, resolution = 0.5)

#Visualise
p1 <- DimPlot(Timepoints.combined, reduction = "umap", group.by = "orig.ident")
p2 <- DimPlot(Timepoints.combined, reduction = "umap", label = TRUE)
plot_grid(p1, p2)

#side by side
DimPlot(Timepoints.combined, reduction = "umap", split.by = "orig.ident", ncol=2, nrow=2, label=T)

#number of cells per cluster
# Get number of cells per cluster and per sample of origin
table(Timepoints.combined@active.ident, Timepoints.combined@meta.data$orig.ident)
# T1  T2  T3  T4
# 0  366 979 792 730
# 1  643 618 685 470
# 2  282 276 468 474
# 3  290 252 451 474
# 4  351 228 354 268
# 5  573 361 120 117
# 6  126 386 241 331
# 7  316 258 235 188
# 8  123 370 236 255
# 9   96 136 287 293
# 10 107 103 186 197
# 11 197  86 160 127
# 12 253 131  65  21
# 13 197 136  75  57
# 14  37  75 142 163
# 15 117 126  77  83
# 16 185 115  54  46
# 17  97 129  62  86
# 18  90  61  85 112
# 19  93  50  98  64

#Make matrix of raw values for each cluster (for MAST)
raw.data <- as.matrix(GetIntegrationData(Timepoints.combined, integration.name="T1", slot = "counts")[, WhichCells(Timepoints.combined, ident = "0")])

satijalab / seurat

Extract expression matrix for specific cluster and group in integrated analysis #2825