使用clusterProfiler分析其他软件注释的GO和KEGG
clusterProfiler出图比较漂亮。
#使用新的GO和KEGG进行富集分析
setwd("e:/GO/")
library(clusterProfiler)
library(ggplot2)
##获取spo基因的富集分析结果,需要输入基因列表和genetype(用于输出文件名的前缀)
#修改输入文件和type即可运行
gene <- read.csv("expansion_trd.csv",header = FALSE)
genetype <- "trd"
#修改GO和KEGG文件名
golist <- read.table(file="GO.info",sep = "\t",header = FALSE)
kegglist <- read.table(file="KEGG.info",sep = "\t",header = FALSE)
#下面内容基本不需要修改
spo2gene <- data.frame(golist$V1,golist$V3) #GO,Geneid
spo2name <- data.frame(golist$V1,golist$V2) #GO,GO描述
gene1 <- t(gene$V1) #获取需要分析的基因列表
#GO富集分析
spo_GO <- enricher(gene1,TERM2GENE = spo2gene,TERM2NAME = spo2name)
p1 <- dotplot(spo_GO)
p1
ggsave(file=paste(genetype,".GO.pdf",sep=""))
write.csv(spo_GO,file=paste(genetype,"_GO.csv",sep = ""))
#kegg富集
kegg2gene <- data.frame(kegglist$V1,kegglist$V3) #keggid,Geneid
kegg2name <- data.frame(kegglist$V1,kegglist$V2) #keggid,kegg描述
gene1 <- t(gene$V1) #获取需要分析的基因列表
spo_KEGG <- enricher(gene1,TERM2GENE = kegg2gene,TERM2NAME = kegg2name)
p2 <- dotplot(spo_KEGG)
p2
ggsave(file=paste(genetype,".KEGG.pdf",sep=""))
write.csv(spo_KEGG,file=paste(genetype,"_kegg.csv",sep=""))
cowplot::plot_grid(p1, p2, nrow=2, labels=c("A", "B")) #ggplot2的函数,nrow指定2行,如果是ncol则是2列。
ggsave(file=paste(genetype,"_go_kegg.pdf",sep=""))
head(golist)
# V1 V2 V3 V4
# 1 GO:0005452 inorganic anion exchanger activity Sp09G018780.1 Molecular Function
# 2 GO:0006820 anion transport Sp09G018780.1 Biological Process
# 3 GO:0016021 integral component of membrane Sp09G018780.1 Cellular Component
head(kegglist)
# V1 V2 V3
# 1 ko00010 Glycolysis / Gluconeogenesis Sp12G016220.1
# 2 ko00010 Glycolysis / Gluconeogenesis Sp10G003910.1
# 3 ko00010 Glycolysis / Gluconeogenesis Sp16G007040.1