是不是看到這種圖心里癢癢的,三年了,終于有人把它重現出來了。
從原圖我們很容易發現,主要有三部分:右上角是類似于corrplot包中的上三角相關系數圖;下三角是一組點之間的連接線(作者用了弧線,直線也能達到同樣的效果);剩余部分主要是圖例等其它輔助繪圖元素。
R語言才是最好的拼圖軟件,只要你愿意花時間。時間來到這個歷史節點上,就是這個圖已經有人做出來了,而且,以你殘缺的R基礎也已經重新在自己電腦上重新繪制出來(盡管是在單身的學長的幫助下)。那么,這個圖里面的點線面及其顏色各代表什么實際的生物學或者社會學的意義,它在講訴一個怎樣的故事?花瓶型還是內涵型?
library(vegan)
library(dplyr)
library(corrplot)
par(omi = c(0.3, 0.3, 0.3, 0.3),
cex = 1.2,
family = 'Times New Roman') # windows系統可能需要安裝其他字體包
M <- cor(decostand(mtcars,method="hellinger",na.rm=T))#計算相關系數矩陣
corrplot(M, method = "circle", type = 'upper')
head(mtcars)
mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
# 準備數據
set.seed(20190420)
n <- ncol(mtcars)
grp <- c('Cluster_1', 'Cluster_2', 'Cluster_3') # 分組名稱
sp <- c(rep(0.0008, 6), rep(0.007, 2), rep(0.03, 3), rep(0.13, 22)) # P值
gx <- c(-4.5, -2.5, 1) # 分組的X坐標
gy <- c(n-1, n-5, 2.5) # 分組的Y坐標
df <- data.frame(
grp = rep(grp, each = n), # 分組名稱,每個重復n次
gx = rep(gx, each = n), # 組X坐標,每個重復n次
gy = rep(gy, each = n), # 組Y坐標,每個重復n次
x = rep(0:(n - 1) - 0.5, 3), # 變量連接點X坐標
y = rep(n:1, 3), # 變量連接點Y坐標
p = sample(sp), # 對人工生成p值進行隨機抽樣
r = sample(c(rep(0.8, 4), rep(0.31, 7), rep(0.12, 22)))
# 對人工生成r值進行隨機抽樣
)
length(rep(grp, each = n))
length(rep(gx, each = n))
length(rep(gy, each = n))
length(rep(0:(n - 1) - 0.5, 3))
length(rep(n:1, 3))
length(sample(sp))
length(sample(c(rep(0.8, 4), rep(0.31, 7), rep(0.12, 22))) )
# 這一部分代碼是按照原圖圖例說明處理線條寬度和顏色映射
df <- df %>%
mutate(
lcol = ifelse(p <= 0.001, '#1B9E77', NA),
# p值小于0.001時,顏色為綠色,下面依次類推
lcol = ifelse(p > 0.001 & p <= 0.01, '#88419D', lcol),
lcol = ifelse(p > 0.01 & p <= 0.05, '#A6D854', lcol),
lcol = ifelse(p > 0.05, '#B3B3B3', lcol),
lwd = ifelse(r >= 0.5, 14, NA),
# r >= 0.5 時,線性寬度為14,下面依次類推
lwd = ifelse(r >= 0.25 & r < 0.5, 7, lwd),
lwd = ifelse(r < 0.25, 1, lwd)
)
核心函數:segments。
segments(df$gx, df$gy, df$x, df$y, lty = 'solid', lwd = df$lwd,
col = df$lcol, xpd = TRUE) # 繪制連接線
points(gx, gy, pch = 24, col = 'blue', bg = 'blue', cex = 3, xpd = TRUE)
# 組標記點
text(gx - 0.5, gy, labels = grp, adj = c(1, 0.5), cex = 1.5, xpd = TRUE)
# 組名稱
labels01 <- c('<= 0.001','0.001 < x <= 0.01','0.01 < x <= 0.05','> 0.05')
labels02 <- c('>= 0.5', '0.25 - 0.5', '< 0.25')
labels_x <- rep(-6, 4)
labels_y <- seq(4.6, 2.6, length.out = 4)
text(-6.5, 5.2, 'P-value', adj = c(0, 0.5), cex = 1.2, font = 2, xpd = TRUE)
text(labels_x, labels_y, labels01, adj = c(0, 0.5), cex = 1.2, xpd = TRUE)
points(labels_x - 0.5, labels_y, pch = 20, col = c('#1B9E77', '#88419D','#A6D854', '#B3B3B3'),
cex = 3, xpd = TRUE)
lines_x <- c(-6.5, -3, 0.5)
lines_y <- rep(1.2, 3)
text(-6.5, 1.9, "Mantel's r", adj = c(0, 0.5), cex = 1.2, font = 2, xpd = TRUE)
text(lines_x + 1.5, lines_y, labels02, adj = c(0, 0.5), cex = 1.2, xpd = TRUE)
segments(lines_x, lines_y, lines_x + 1, lines_y, lwd = c(14, 7, 2.5), lty = 'solid',
col = '#B3B3B3', xpd = TRUE)
圖例框框
## 圖例框框
segments(-6.9, 5.6, -2.8, 5.6, lty = 'solid', lwd = 1.2,
col = 'grey50', xpd = TRUE)
segments(-2.8, 5.6, -2.8, 1.8, lty = 'solid', lwd = 1.2,
col = 'grey50', xpd = TRUE)
segments(-2.8, 1.8, 3.6, 1.8, lty = 'solid', lwd = 1.2,
col = 'grey50', xpd = TRUE)
segments(3.6, 1.8, 3.6, 0.7, lty = 'solid', lwd = 1.2,
col = 'grey50', xpd = TRUE)
segments(3.6, 0.7, -6.9, 0.7, lty = 'solid', lwd = 1.2,
col = 'grey50', xpd = TRUE)
segments(-6.9, 0.7, -6.9, 5.6, lty = 'solid', lwd = 1.2,
col = 'grey50', xpd = TRUE)
這張圖不過是相關系數的展現形式的一種創新,炫的地方在與下面的幾條線。那么,我們不禁要問,這種形式的圖和pheatmap按照p值標簽的圖有什么本質的區別嗎?
library(pheatmap)
library(psych)
?pheatmap
?psych
pr<-corr.test(mtcars,mtcars,method="spearman")
pheatmap(pr$r,display_numbers = matrix(ifelse(pr$p <= 0.01, "**", ifelse(pr$p<= 0.05 ,"*"," ")), nrow(pr$p)),fontsize=18)