数据可视化

GO/KEGG 富集分析

clusterProfiler 富集分析

1. 安装与加载

BiocManager::install(c("clusterProfiler", "org.Hs.eg.db", "enrichplot"))

library(clusterProfiler)
library(org.Hs.eg.db)
library(enrichplot)
library(ggplot2)

2. 基因 ID 转换

# 从 Gene Symbol 转换为 Entrez ID
genes <- c("TP53", "BRCA1", "EGFR", "MYC", "KRAS")

gene_ids <- bitr(genes,
  fromType = "SYMBOL",
  toType = "ENTREZID",
  OrgDb = org.Hs.eg.db
)

3. GO 富集分析

# BP: Biological Process
# MF: Molecular Function
# CC: Cellular Component

go_bp <- enrichGO(
  gene = gene_ids$ENTREZID,
  OrgDb = org.Hs.eg.db,
  ont = "BP",
  pAdjustMethod = "BH",
  pvalueCutoff = 0.05,
  qvalueCutoff = 0.2,
  readable = TRUE
)

# 查看结果
head(go_bp)

4. KEGG 富集分析

kegg <- enrichKEGG(
  gene = gene_ids$ENTREZID,
  organism = 'hsa',
  pvalueCutoff = 0.05
)

# 转换基因 ID 为 Symbol
kegg_readable <- setReadable(kegg, OrgDb = org.Hs.eg.db, keyType = "ENTREZID")

可视化

条形图

barplot(go_bp, showCategory = 15) +
  ggtitle("GO Biological Process") +
  theme_minimal()

气泡图

dotplot(go_bp, showCategory = 15) +
  ggtitle("GO Enrichment") +
  theme(axis.text.y = element_text(size = 10))

网络图

# 基因-通路网络
cnetplot(go_bp,
  categorySize = "pvalue",
  foldChange = gene_fc,  # 可选:传入 log2FC
  colorEdge = TRUE
)

富集图 (emap)

go_bp2 <- pairwise_termsim(go_bp)
emapplot(go_bp2, showCategory = 20)

导出结果

# 导出为 CSV
write.csv(as.data.frame(go_bp), "GO_BP_results.csv")
write.csv(as.data.frame(kegg), "KEGG_results.csv")

# 保存图片
ggsave("GO_dotplot.pdf", width = 8, height = 10)

常用物种代码

物种OrgDbKEGG org
Humanorg.Hs.eg.dbhsa
Mouseorg.Mm.eg.dbmmu
Ratorg.Rn.eg.dbrno
Zebrafishorg.Dr.eg.dbdre