SC| 多组GO富集-2

303 阅读1分钟
pbmc.data <- Read10X(data.dir="D:\\R program\\shell\\enrich\\enrich\\filtered_gene_bc_matrices\\hg19")
pbmc <- CreateSeuratObject(counts = pbmc.data, project = "pbmc3k", min.cells = 3, min.features = 200)
pbmc[["percent.mt"]] <- PercentageFeatureSet(pbmc, pattern = "^MT-")
VlnPlot(pbmc, group.by = "orig.ident",
                 features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
                 pt.size = 0, #不需要显示点,可以设置pt.size = 0
                 ncol = 4) +
  theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank())


pbmc <- subset(pbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < 5)
VlnPlot(pbmc, group.by = "orig.ident",
                 features = c("nFeature_RNA", "nCount_RNA", "percent.mt"),
                 pt.size = 0,
                 ncol = 4) +
  theme(axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank())
pbmc <- NormalizeData(pbmc)
pbmc <- FindVariableFeatures(pbmc, selection.method = "vst", nfeatures =2000)
pbmc <- ScaleData(pbmc)
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc))
pbmc <- RunUMAP(pbmc, dims = 1:10)
DimPlot(pbmc, reduction = "umap")

pbmc <- RunTSNE(pbmc, dims = 1:10)
DimPlot(pbmc, reduction = "tsne")
pbmc <- FindNeighbors(pbmc, dims = 1:10)
pbmc <- FindClusters(pbmc, resolution = 0.5)
#####最重要的知识点:dims = 1:10和resolution = 0.5可以调整,值越大,聚类越多,根据实际需要不断调整参数,即可得到自己想要的细胞聚类数。
# find markers for every cluster compared to all remaining cells, report only the positive
# ones
pbmc.markers <- FindAllMarkers(pbmc, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
new.cluster.ids <- c("Naive CD4 T", "CD14+ Mono", "Memory CD4 T", "B", "CD8 T", "FCGR3A+ Mono",
                     "NK", "DC", "Platelet")
names(new.cluster.ids) <- levels(pbmc)
pbmc <- RenameIdents(pbmc, new.cluster.ids)
DimPlot(pbmc, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()

#根据资料查阅,最后确定细胞类型,将细胞类型名称按顺序输入即可命名
#pt.size = 0.5  这是调整可视化图颗粒大小的,如果觉得颗粒太小,可以根据需要调整,如pt.size = 1

## 找出每个cluster的标记与所有剩余的细胞相比较,只报告阳性细胞
#这一步特别重要,可以在后续可视化时将尽可能的调出差异基因,可以是后续细胞类型鉴定结果更加准确

new.cluster.ids <- c("Naive CD4 T", "CD14+ Mono", "Memory CD4 T", "B", "CD8 T", "FCGR3A+ Mono",
                     "NK", "DC", "Platelet")
names(new.cluster.ids) <- levels(pbmc)
pbmc <- RenameIdents(pbmc, new.cluster.ids)
DimPlot(pbmc, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()

#根据资料查阅,最后确定细胞类型,将细胞类型名称按顺序输入即可命名
#pt.size = 0.5  这是调整可视化图颗粒大小的,如果觉得颗粒太小,可以根据需要调整,如pt.size = 1
pbmc@meta.data$celltype=pbmc@meta.data$seurat_clusters
levels(pbmc@meta.data$celltype) <- new.cluster.ids


library(Seurat)
library(patchwork)
library(clusterProfiler)
library(org.Mm.eg.db) ##加载小鼠
library(org.Hs.eg.db) ##加载人类
library(tidyverse)
# 导入注释好的pbmc数据集
pbmc <- readRDS("pbmc.rds")

# 寻找每个群的marker基因
Idents(pbmc) <- 'celltype'#因为这个是单样品数据集,在做多样品的时候,把Idents设置为不同样品即可计算不同样品的差异基因
table(pbmc$celltype)
# Naive CD4 T Memory CD4 T   CD14+ Mono            B        CD8 T 
#         711          480          472          344          279 
# FCGR3A+ Mono           NK           DC     Platelet 
#          162          144           32           14 
markers <- FindAllMarkers(pbmc)
sig_dge.all <- subset(markers, p_val_adj<0.05&abs(avg_log2FC)>0.15) #所有差异基因
saveRDS(sig_dge.all, file = "sig_dge.all_pbmc.rds")
View(sig_dge.all)
##CD4T的上调基因
sig_dge.CD4T_up <- subset(sig_dge.all, avg_log2FC>0.15&cluster=='Naive CD4 T') #CD4T细胞的上调差异基因
#富集分析
ego_CD4 <- enrichGO(gene          = row.names(sig_dge.CD4T_up),
                    #universe     = row.names(dge.celltype),
                    OrgDb         = 'org.Hs.eg.db',
                    keyType       = 'SYMBOL',
                    ont           = "ALL",  #设置为ALL时BP, CC, MF都计算
                    pAdjustMethod = "BH",
                    pvalueCutoff  = 0.01,
                    qvalueCutoff  = 0.05)
ego_cd4 <- data.frame(ego_CD4)
#write.csv(ego_CD4,'enrichGO_CD4.csv')
View(ego_cd4)
ego_cd4$Group='Naive CD4 T'#增加分组标签


sig_dge.B_up <- subset(sig_dge.all, avg_log2FC>0.15&cluster=='B') #CD4T细胞的上调差异基因
#富集分析
ego_B <- enrichGO(gene          = row.names(sig_dge.B_up),
                  #universe     = row.names(dge.celltype),
                  OrgDb         = 'org.Hs.eg.db',
                  keyType       = 'SYMBOL',
                  ont           = "ALL",  #设置为ALL时BP, CC, MF都计算
                  pAdjustMethod = "BH",
                  pvalueCutoff  = 0.01,
                  qvalueCutoff  = 0.05)
ego_b <- data.frame(ego_B)
#write.csv(ego_B,'enrichGO_B.csv')
View(ego_b)
ego_b$Group='B'

sig_dge.MCD4T_up <- subset(sig_dge.all, avg_log2FC>0.15&cluster=='Memory CD4 T') #CD4T细胞的上调差异基因
#富集分析
ego_MCD4T <- enrichGO(gene          = row.names(sig_dge.MCD4T_up),
                      #universe     = row.names(dge.celltype),
                      OrgDb         = 'org.Hs.eg.db',
                      keyType       = 'SYMBOL',
                      ont           = "ALL",  #设置为ALL时BP, CC, MF都计算
                      pAdjustMethod = "BH",
                      pvalueCutoff  = 0.01,
                      qvalueCutoff  = 0.05)
ego_mcd4t <- data.frame(ego_MCD4T)
#write.csv(ego_MCD4T,'enrichGO_MCD4T.csv')
View(ego_mcd4t)
ego_mcd4t$Group='Memory CD4 T'
每个群选了前3个通路,这里的通路可以随便选

CD4T=ego_cd4[1:3,]#每个GO通路默认是按照P值由小到大排序的,取前三行
B=ego_b[1:3,]
MCD4T=ego_mcd4t[1:3,]
all <- rbind(CD4T,B,MCD4T)#前三行组合在一起
上面我们得到了9个pathway,下面这一步的目的是看9个pathway在不同细胞群的表达(否则每组只有三个通路有结果,其他的没有)

a=all$Description
mcd4t=ego_mcd4t[ego_mcd4t$Description%in%a,]
b=ego_b[ego_b$Description%in%a,]
cd4t=ego_cd4[ego_cd4$Description%in%a,]
all <- all <- rbind(cd4t,b,mcd4t)
all$GeneRatio <- c(0.3798883,0.3798883,0.3854749,0.2105263,0.1184211,0.09210526,0.1502591,0.1450777,0.0880829)
library(forcats)
all$Description <- as.factor(all$Description)
all$Description <- fct_inorder(all$Description)
最终的目的是得到这样的表格,挑选想展示的通路,标记清楚分组即可


ggplot(all, aes(Group, Description)) +
  geom_point(aes(color=p.adjust, size=GeneRatio))+theme_bw()+
  theme(panel.grid = element_blank(),
        axis.text.x=element_text(angle=90,hjust = 1,vjust=0.5))+
  scale_color_gradient(low='#6699CC',high='#CC3333')+
  labs(x=NULL,y=NULL)+guides(size=guide_legend(order=1))