使用clusterProfiler分析其他软件注释的GO和KEGG

clusterProfiler出图比较漂亮。

#使用新的GO和KEGG进行富集分析
setwd("e:/GO/")
library(clusterProfiler)
library(ggplot2)

##获取spo基因的富集分析结果,需要输入基因列表和genetype(用于输出文件名的前缀)
  #修改输入文件和type即可运行
  gene <- read.csv("expansion_trd.csv",header = FALSE)
  genetype <- "trd"

  #修改GO和KEGG文件名
  golist <- read.table(file="GO.info",sep = "\t",header = FALSE)
  kegglist <- read.table(file="KEGG.info",sep = "\t",header = FALSE)

  #下面内容基本不需要修改
  spo2gene <- data.frame(golist$V1,golist$V3) #GO,Geneid
  spo2name <- data.frame(golist$V1,golist$V2) #GO,GO描述
  gene1 <- t(gene$V1)  #获取需要分析的基因列表
  #GO富集分析
  spo_GO <- enricher(gene1,TERM2GENE = spo2gene,TERM2NAME = spo2name)
  p1 <- dotplot(spo_GO)
  p1
  ggsave(file=paste(genetype,".GO.pdf",sep=""))
  write.csv(spo_GO,file=paste(genetype,"_GO.csv",sep = ""))

  #kegg富集
  kegg2gene <- data.frame(kegglist$V1,kegglist$V3) #keggid,Geneid
  kegg2name <- data.frame(kegglist$V1,kegglist$V2) #keggid,kegg描述
  gene1 <- t(gene$V1)  #获取需要分析的基因列表
  spo_KEGG <- enricher(gene1,TERM2GENE = kegg2gene,TERM2NAME = kegg2name)
  p2 <- dotplot(spo_KEGG)
  p2
  ggsave(file=paste(genetype,".KEGG.pdf",sep=""))
  write.csv(spo_KEGG,file=paste(genetype,"_kegg.csv",sep=""))

  cowplot::plot_grid(p1, p2, nrow=2, labels=c("A", "B"))  #ggplot2的函数,nrow指定2行,如果是ncol则是2列。
  ggsave(file=paste(genetype,"_go_kegg.pdf",sep=""))

head(golist)
#  V1                                       V2            V3                 V4
#  1 GO:0005452       inorganic anion exchanger activity Sp09G018780.1 Molecular Function
#  2 GO:0006820                          anion transport Sp09G018780.1 Biological Process
#  3 GO:0016021           integral component of membrane Sp09G018780.1 Cellular Component
head(kegglist)
#  V1                           V2            V3
#  1 ko00010 Glycolysis / Gluconeogenesis Sp12G016220.1
#  2 ko00010 Glycolysis / Gluconeogenesis Sp10G003910.1
#  3 ko00010 Glycolysis / Gluconeogenesis Sp16G007040.1
回到页面顶部