跳转至

两组时间向量相关性分析(皮尔逊分析)

setwd("circos/test")
#install.packages("ggpubr")
library("ggpubr")
data1 <- read.table("check.table",header = T,sep="\t")
head(data1)

检测两个变量是否符合正态分布(看散点是否分布在阴影中,是,则是正态分布)

# gene
ggqqplot(data1$Gene_coverge, ylab = "gene coverge")
# LTR
ggqqplot(data1$LTR_percent, ylab = "LTR")

检测数据是否符合正态分布,p<0.05,不符合正态分布;p>0.05,符合正态分布

shapiro.test(data1$Gene_coverge) # => p = 2.2e-16
shapiro.test(data1$LTR_percent) # => p = 2.2e-16

经过两种方法检测,本数据不符合正态分布,所以不能使用pearson检测相关性。所以使用kendall和spearman.

皮尔逊方法,本实验不适用

if (FALSE){
  res <- cor.test(data1$Gene_coverge, data1$LTR_percent,  method = "pearson")
  res
  #cor 是皮尔逊相关系数-0.9668788 (1正相关,-1负相关),df是自由度,p-value< 2.2e-16
  ggscatter(data1, x = "Gene_coverge", y = "LTR_percent", 
            add = "reg.line", conf.int = TRUE, 
            cor.coef = TRUE, cor.method = "pearson",
            xlab = "gene coverge", ylab = "LTR percent")

}

kendall方法

res2 <- cor.test(data1$Gene_coverge, data1$LTR_percent,  method = "kendall")
res2
#tau是肯德尔相关系数-0.8825434 ,p-value<2.2e-16
p2 <- ggscatter(data1, x = "Gene_coverge", y = "LTR_percent", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "kendall",
          xlab = "gene coverge", ylab = "LTR percent")

spearman方法

res3 <- cor.test(data1$Gene_coverge, data1$LTR_percent,  method = "spearman")
res3
#rho是spearman相关系数-0.9799782 ,p-value < 2.2e-16
p3 <- ggscatter(data1, x = "Gene_coverge", y = "LTR_percent", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "spearman",
          xlab = "gene coverge", ylab = "LTR percent")+labs(title = "Correlation analysis of LTR and gene density")

#cowplot::plot_grid(p2,p3,nrow=2,labels=c("a","b"))
ggsave("LTR_gene_coverge.check.pdf",dpi = 300)
ggsave("LTR_gene_coverge.check.tiff")

data0 <- read.table("check3.table",header = T,sep = "\t")
ggscatter(data0, x = "Gene_coverge", y = "Gypsy", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "spearman",
          xlab = "gene coverge", ylab = "Gypsy")+labs(title = "Correlation analysis of Gypsy and gene density")
ggsave("gypsy_gene_coverge.pdf")

不在阴影区,说明不符合正态分布

对整个数据框各组变量都进行相关性分析

K<- read.csv("test.csv")
cor_data <- cor(K,method = "kendall")
library(corrplot)
corrplot(cor_data, method="circle",type="lower")
corrplot(cor_data, method="pie",type="lower")
corrplot(cor_data, method="number")

corrplot method参数"circle", "square", "ellipse", "number", "shade", "color", "pie" ;type参数; "full", "lower", "upper"

回到页面顶部