Seurat--V5.0_数据存放结构的改变以及对象的构建

2024-03-26 21:14:03
开发
36

library(Seurat)
library(SeuratData)
library(patchwork)

# install dataset
InstallData("ifnb")
# load dataset
ifnb <- LoadData("ifnb")
str(ifnb)

单个数据集对象的数据结构

在这里插入图片描述

多个数据集对象的数据结构

# load dataset
ifnb <- LoadData("ifnb")
# split the RNA measurements into two layers one for control cells, one for stimulated cells

ifnb[["RNA"]] <- split(ifnb[["RNA"]], f = ifnb$stim)
ifnb

在这里插入图片描述

Perform analysis without integration

ifnb <- NormalizeData(ifnb)
ifnb <- FindVariableFeatures(ifnb)
ifnb <- ScaleData(ifnb)
ifnb <- RunPCA(ifnb)

ifnb <- FindNeighbors(ifnb, dims = 1:30, reduction = "pca")
ifnb <- FindClusters(ifnb, resolution = 2, cluster.name = "unintegrated_clusters")

ifnb <- RunUMAP(ifnb, dims = 1:30, reduction = "pca", reduction.name = "umap.unintegrated")
DimPlot(ifnb, reduction = "umap.unintegrated", group.by = c("stim", "seurat_clusters"))

在这里插入图片描述

Perform integration

ifnb <- IntegrateLayers(object = ifnb, method = CCAIntegration, orig.reduction = "pca", new.reduction = "integrated.cca",
    verbose = FALSE)

# re-join layers after integration
ifnb[["RNA"]] <- JoinLayers(ifnb[["RNA"]])
str(ifnb)

ifnb <- FindNeighbors(ifnb, reduction = "integrated.cca", dims = 1:30)
ifnb <- FindClusters(ifnb, resolution = 1)

ifnb <- RunUMAP(ifnb, dims = 1:30, reduction = "integrated.cca")

# Visualization
DimPlot(ifnb, reduction = "umap", group.by = c("stim", "seurat_annotations"))

在这里插入图片描述

个人数据部分

# conda env R4
library(Seurat)
library(tidyverse)
# getwd()
# [1] "/public/home/djs/huiyu/project/HY0007/20240325-scRNA/ANNO_XS01KF2023090059_PM-XS01KF2023090059-03_2024-03-20_18-43-41_223CKKLT4/reanalysis"
	
packageVersion("Seurat")
# [1] ‘5.0.1’

data.dir <- c("/public/home/djs/huiyu/project/HY0007/20240325-scRNA/ANNO_XS01KF2023090059_PM-XS01KF2023090059-03_2024-03-20_18-43-41_223CKKLT4/9323_map_to_ref/outs/filtered_feature_bc_matrix","
/public/home/djs/huiyu/project/HY0007/20240325-scRNA/ANNO_XS01KF2023090059_PM-XS01KF2023090059-03_2024-03-20_18-43-41_223CKKLT4/9347_map_to_ref/outs/filtered_feature_bc_matrix")

scelist <- lapply(data.dir,function(folder){CreateSeuratObject(counts = Read10X(folder),min.cells = 10,min.features  = 200)})
scelist[[1]]@meta.data$project.name <- "9323"  # 这里是每一个样本的名称，最后merge后再meta信息中显示
scelist[[2]]@meta.data$project.name <- "9347"

sce <- merge(scelist[[1]],scelist[[2]])
str(sce)

在这里插入图片描述

# 制造出sce 对象后跟着Seurat V5的教程走就可以了
# qc
mkdir {qc,multiqc}
conda activate NGS

ls * | while read id ;do (fastqc -o ../qc -q $id &);done
multiqc -o ./multiqc/ ./qc

# cellranger
/public/home/djs//software/cellranger-7.0.1/cellranger count \
--id  9323_map_to_ref \
--transcriptome /public/home/djs/reference/cellranger_count_ref/refdata-gex-GRCh38-2020-A  \
--fastqs   /public/home/djs/huiyu/project/HY0007/20240325-scRNA/ANNO_XS01KF2023090059_PM-XS01KF2023090059-03_2024-03-20_18-43-41_223CKKLT4/Rawdata/9347 \
--localcores  8 \
--localmem   64 & 

/public/home/djs//software/cellranger-7.0.1/cellranger count \
--id  9347_map_to_ref \
--transcriptome /public/home/djs/reference/cellranger_count_ref/refdata-gex-GRCh38-2020-A  \
--fastqs   /public/home/djs/huiyu/project/HY0007/20240325-scRNA/ANNO_XS01KF2023090059_PM-XS01KF2023090059-03_2024-03-20_18-43-41_223CKKLT4/Rawdata/9347 \
--localcores  8 \
--localmem   64 & 

tree  9347_map_to_ref
# 考虑到一些细胞的barcode可能出现重复，我把一些barcode的后缀改掉
cd 9347_map_to_ref/outs/filtered_feature_bc_matrix/
gzip -d barcodes.tsv.gz
sed "s/-1/-2/g" barcodes.tsv
sed -i "s/-1/-2/g" barcodes.tsv
gzip barcodes.tsv

# Seurat
mdkir  reanalysis
conda deactivate 
conda activate R4

########## step1 standard workfolw  ###################
# conda env R4
library(Seurat)
library(tidyverse)
# getwd()
# [1] "/public/home/djs/huiyu/project/HY0007/20240325-scRNA/ANNO_XS01KF2023090059_PM-XS01KF2023090059-03_2024-03-20_18-43-41_223CKKLT4/reanalysis"
	
packageVersion("Seurat")
# [1] ‘5.0.1’

data.dir <- c("/public/home/djs/huiyu/project/HY0007/20240325-scRNA/ANNO_XS01KF2023090059_PM-XS01KF2023090059-03_2024-03-20_18-43-41_223CKKLT4/9323_map_to_ref/outs/filtered_feature_bc_matrix","
/public/home/djs/huiyu/project/HY0007/20240325-scRNA/ANNO_XS01KF2023090059_PM-XS01KF2023090059-03_2024-03-20_18-43-41_223CKKLT4/9347_map_to_ref/outs/filtered_feature_bc_matrix")

# 构建对象
scelist <- lapply(data.dir,function(folder){CreateSeuratObject(counts = Read10X(folder),min.cells = 10,min.features  = 200)})
# 根据线粒体基因占比进行过滤
scelist <- lapply(scelist,function(sce){
	sce[["percent.mt"]] <- PercentageFeatureSet(sce, pattern = "^MT-")
	sce <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 7500 & percent.mt < 15)
})

scelist[[1]]@meta.data$project.name <- "9323"
scelist[[2]]@meta.data$project.name <- "9347"

# 因为Seurat V5的所有counts 放在了layers下，此时需要merge各个样本对象
sce <- merge(scelist[[1]],scelist[[2]])
# str(sce)
saveRDS(sce, file="pre_sce.rds")

############### Perform analysis without integration (optional)  ########################
# run standard anlaysis workflow
sce <- NormalizeData(sce)
sce <- FindVariableFeatures(sce)
sce <- ScaleData(sce)
sce <- RunPCA(sce)

pdf("Elboplot.pdf")
ElbowPlot(sce)
dev.off()

sce <- FindNeighbors(sce, dims = 1:15, reduction = "pca")
sce <- FindClusters(sce, resolution = 2, cluster.name = "unintegrated_clusters")

sce <- RunUMAP(sce, dims = 1:15, reduction = "pca", reduction.name = "umap.unintegrated")
pdf("Dimplot_UMAP.pdf",width=12,height=6)
DimPlot(sce, reduction = "umap.unintegrated", group.by = c("project.name", "seurat_clusters"))
dev.off()

########### step2 Perform integration ################## 
sce <- IntegrateLayers(object = sce, method = CCAIntegration, orig.reduction = "pca", new.reduction = "integrated.cca",verbose = FALSE)
# re-join layers after integration
sce[["RNA"]] <- JoinLayers(sce[["RNA"]])

sce <- FindNeighbors(sce, reduction = "integrated.cca", dims = 1:15)
sce <- FindClusters(sce, resolution = 0.3)
sce <- RunUMAP(sce, dims = 1:15, reduction = "integrated.cca") # 注意这里使用的降维数据

# Visualization
pdf("Dimplot_UMAP_integration_1.pdf",width=12,height=6)
DimPlot(sce, reduction = "umap", group.by = c("project.name", "seurat_annotations"))
dev.off()

pdf("Dimplot_UMAP_integration_2.pdf",width=12,height=6)
DimPlot(sce, reduction = "umap", split.by = "project.name")
dev.off()

saveRDS(sce,file="sce.combined.rds")

################# step3 find markers and visualization##################
# By default, Seurat performs differential expression (DE) testing based on the non-parametric Wilcoxon rank sum test. 
# find markers for every cluster compared to all remaining cells, report only the positive ones	
dir.create("./DE_identification")
setwd("./DE_identification")
library(limma)
library(dplyr)

clusters <- unique(sce@meta.data$seurat_clusters)
for (i in clusters){
    markers <- FindMarkers(sce, ident.1 = i, only.pos=T,group.by = "seurat_clusters",test.use = "wilcox_limma")
    write.table(markers, file=paste("cluster_",i,"_pos.markers.xls", sep=""), sep="\t", quote=F)
    #top20
    sce_top20 <- markers %>% top_n(n = 20, wt=avg_log2FC)
    features <- row.names(sce_top20)
    pdf(paste("cluster",i,"_pos.markers.pdf", sep=""), height=18, width=24)
    #a <- RidgePlot(immune.combined, idents="Epithelial_cells", sort="decreasing", group.by="cell_cluster_1stEpi", features = features, ncol = 5)
    b <- VlnPlot(sce, group.by="seurat_clusters", features = features, ncol = 5, pt.size = 0)
    #c <- FeaturePlot(immune.combined, features = features, ncol = 5)
    #print(a); print(b); print(c)
    print(b); dev.off()
}

# MANUAL pick marker gene for visualizing
pdf("VlnPlot_cluster3.pdf")
VlnPlot(immune.combined, features = c("CD3E", "CD8A","GZMB"))
dev.off()
pdf("FeaturePlot_cluster3.pdf")
FeaturePlot(immune.combined, features = c("CD3E", "CD8A","GZMB"))
dev.off()

##cell cluster annotation using SingleR 细胞注释
dir.create("../cell_annotation_singleR/")
setwd("../cell_annotation_singleR/")
library(SingleR)
library(SingleCellExperiment)
library(celldex)
library(pheatmap)


hpca.se <- HumanPrimaryCellAtlasData()
counts <- GetAssayData(sce[["RNA"]], slot="counts")
meta.data <- sce@meta.data
pred.sce <- SingleR(test = counts, ref = hpca.se, labels = hpca.se$label.main, clusters=meta.data$seurat_clusters)
write.table(pred.sce,"sce_singler.annotation.xls",sep="\t", quote=FALSE)

pdf("singleR_plotScoreHeatmap.pdf")
plotScoreHeatmap(pred.sce)
dev.off()

new.cluster.ids <- pred.sce$labels
names(new.cluster.ids) <- levels(sce)
sce <- RenameIdents(sce, new.cluster.ids)

pdf("SingleR_celltype.pdf",width=12,height=6)
DimPlot(sce1, reduction = "umap", label=T, label.size = 6) + NoLegend()
dev.off()

在这里插入图片描述

原文地址:https://blog.csdn.net/jiangshandaiyou/article/details/137046459 本文来自互联网用户投稿，该文观点仅代表作者本人，不代表本站立场。本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如若转载，请注明出处：https://www.suanlizi.com/kf/1772612980761038848.html 如若内容造成侵权/违法违规/事实不符，请联系《酸梨子》网邮箱：1419361763@qq.com进行投诉反馈，一经查实，立即删除！

阅读全部