R:pheatmap

導(dǎo)讀

pheatmap默認會對輸入矩陣數(shù)據(jù)的行和列同時進行聚類,但是也可以通過布爾型參數(shù)cluster_rows和cluster_cols設(shè)置是否對行或列進行聚類,具體看分析需求。利用display_numbers參數(shù)可以在熱圖中的每個cell中填入想要的信息,例如相對豐度信息。利用cutree_rows和cutree_cols參數(shù)可以根據(jù)聚類產(chǎn)生的tree信息對熱圖進行分割。利用annotation_col和annotation_row參數(shù)可以給橫或列添加分組信息。本文將先模擬輸入矩陣數(shù)據(jù),然后再展示這些參數(shù)的具體使用方法。

一、模擬輸入矩陣

set.seed(1995)  
# 隨機種子
data=matrix(abs(round(rnorm(200, mean=0.5, sd=0.25), 2)), 20, 10)  
# 隨機正整數(shù),20行,20列
colnames(data)=paste("Species", 1:10, sep=".")  
# 列名-細菌
rownames(data)=paste("Sample", 1:20, sep=".")  
# 行名-樣品

data_norm=data
for(i in 1:20){
    sample_sum=apply(data, 1, sum)
    for(j in 1:10){
        data_norm[i,j]=data[i,j]/sample_sum[i]
    }
}
# 標準化

data_norm
# 模擬完成的標準化矩陣數(shù)據(jù)如下:

               Species.1   Species.2  Species.3  Species.4 ... Species.10
    Sample.1  0.14032835 0.076767862 0.12225993 0.08713198 
    Sample.2  0.08434712 0.116281427 0.14405921 0.12976480 
    Sample.3  0.09997205 0.026460449 0.11571788 0.10006522 
    Sample.4  0.10753751 0.102236996 0.03449825 0.12766149 
    ...
    Sample.20

二、聚類分析和熱圖

1. 基礎(chǔ)熱圖

library(pheatmap)
# 加載pheatmap包

pheatmap(data_norm)
# 繪制熱圖,結(jié)果如下:
pheatmap(data_norm, border_color=NA)

2. colorRampPalette漸變色、cell尺寸調(diào)整

cellheight=15 # 設(shè)置單元格高度
cellwidth=20 # 設(shè)置單元格寬度
color=colorRampPalette(colors = c("blue","white","red"))(10) # 漸變?nèi)∩桨?/p>

pheatmap(data_norm,
  cellheight=15,
  cellwidth=20,
  color=colorRampPalette(colors = c("blue","white","red"))(10)
)

3. 在cell中添加豐度

display_numbers=TRUE:使用默認矩陣數(shù)據(jù)

pheatmap(data_norm, 
  display_numbers=TRUE,
  cellheight=15,
  cellwidth=20,
  color=colorRampPalette(colors = c("purple", "white", "green"))(10)
  )

4. 在cell中添加mark

display_numbers=matrix:使用自定義矩陣數(shù)據(jù)
fontsize_number=18:mark大小
filename="name.png/pdf": 保存

data_mark=data_norm
# 新建mark矩陣

for(i in 1:20){
    for(j in 1:10){
        if(data_norm[i,j] <= 0.001)
            {
                data_mark[i,j]="***"
            }
            else if(data_norm[i,j] <= 0.01 && data_norm[i,j] > 0.001)
            {
                data_mark[i,j]="**"
            }
            else if(data_norm[i,j] <= 0.05 && data_norm[i,j] > 0.01)
            {
                data_mark[i,j]="*"
            }
            else
            {
                data_mark[i,j]=""
            }
    }
}
# * 0.05>=p>0.01; ** 0.01>=p>0.001; *** 0.001>=p

pheatmap(data_norm, 
  cellheight=20,
  cellwidth=25,
  color=colorRampPalette(colors = c("purple", "white", "green"))(10),
  display_numbers=data_mark, 
  fontsize_number=18,
  filename="mark.pdf"
)

5. 根據(jù)tree將熱圖分割成2行3列

cutree_rows=num:分割行
cutree_cols=num:分割列

pheatmap(data_norm, 
  cellheight=20,
  cellwidth=25,
  color=colorRampPalette(colors = c("purple", "white", "green"))(10),
  display_numbers=data_mark, 
  fontsize_number=18,
  filename="mark_cut.pdf",
  cutree_rows=2, 
  cutree_cols=3)

5. 添加樣品和物種的分組信息

annotation_col:列分組
annotation_row:行分組
annotation_colors:分組顏色

Group=c("A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", "B", "B", "B", "B", "B")
group_sample=data.frame(Group)
rownames(group_sample)=rownames(data_norm)
# 模擬樣品分組文件

group_sample
# 查看:

                 Group
    Sample.1      A
    Sample.2      A
    Sample.3      A
    Sample.4      A
    Sample.5      A
    Sample.6      A
    Sample.7      A
    Sample.8      A
    Sample.9      A
    Sample.10     A
    Sample.11     B
    Sample.12     B
    Sample.13     B
    Sample.14     B
    Sample.15     B
    Sample.16     B
    Sample.17     B
    Sample.18     B
    Sample.19     B
    Sample.20     B

Genus=c("G1", "G1", "G1", "G1", "G1", "G2", "G2", "G2", "G2", "G2")
group_genus=data.frame(Genus)
rownames(group_genus)=colnames(data_norm)
# 模擬物種分組文件

group_genus
# 查看:

                   Genus
    Species.1     G1
    Species.2     G1
    Species.3     G1
    Species.4     G1
    Species.5     G1
    Species.6     G2
    Species.7     G2
    Species.8     G2
    Species.9     G2
    Species.10    G2

colors=list(Group=c(A="#1B9E77", B="#D95F02"),
Genus=c(G1="pink", G2="lightgreen"))
# 自定義樣品分組顏色,Genus分組使用默認顏色

pheatmap(data_norm, 
  cellheight=20,
  cellwidth=25,
  color=colorRampPalette(colors = c("purple", "white", "green"))(10),
  display_numbers=data_mark, 
  fontsize_number=18,
  filename="mark_group.pdf",
  cutree_rows=2, 
  cutree_cols=3,
  annotation_col=group_genus,
  annotation_row=group_sample, 
  annotation_colors=colors
)

單方面斜體

library(pheatmap)
## 合并種名,株名
name = paste(rose$Species, rownames(rose), sep=" ")

## 修改CAZYme排序
input = input[,c("GH29","GH33","GH95","GH136","GH112","GH2","GH42","GH20","CBM32","CBM51")]
newnames <- lapply(
  name,
  function(x) bquote(italic(.(x))))

pheatmap(input, filename="rose_hmo_number_num_sp_2.pdf", 
  cluster_row=F, cluster_col=F, 
  cellheight=20, cellwidth=20, 
  fontsize_col=15, fontsize_row=18, fontsize=12,
  fontfamily="serif", 
  colorRampPalette(c("snow", "red"))(50), 
  legend=T, annotation_legend = F, 
  labels_row = as.expression(newnames))

標簽旋轉(zhuǎn):

pheatmap(input, 
 cluster_col = T,
 color = colorRampPalette(colors = c("white", "deepskyblue1", "indianred1"))(3),
 #legend = F,
 fontsize_col = 11,  
 fontsize_row = 13,  
 cellwidth = 16,  
 cellheight = 16, angle_col = 45,
 filename = "pan_pav.pdf")

pheatmap常用參數(shù)匯總:

display_numbers=TRUE  # 使用默認矩陣數(shù)據(jù)
display_numbers=matrix  # 使用自定義矩陣數(shù)據(jù)
cutree_rows=num  # 分割行
cutree_cols=num  # 分割列
scale="column"  # 列標準化
scale="row"  # 行標準化
cellwidth=20  # cell寬度
cellheight=20  # cell高度
fontsize_number=18  # mark大小
filename="name.pdf/png"  # 保存,自動調(diào)整紙張大小
cluster_row = F  # 橫向不聚類
cluster_col = F  # 縱向不聚類
legend = F  # 去除legend層度色
annotation_legend = F  # 去除legend注釋
border = F  # 去除cell邊框
border_color = "blue"  # cell邊框顏色
border_color = NA  # cell邊框無色
annotation_names_col = F  # 不展示列l(wèi)egend的名稱
labels_row=""  
show_rownames = F  # 去除row標簽
fontsize  = 10  # legend整體大小
fontsize_col = 13  # col標簽大小
fontsize_row = 13  # row標簽大小
fontsize_number=18  # mark大小
fontfamily="serif"  # 新羅馬字體
fontface="italic"  # 斜體
newnames <- lapply(
  current_name,
  function(x) bquote(italic(.(x))))
labels_row = as.expression(newnames)  # 僅列斜體

color=colorRampPalette(colors = c("purple", "snow", "green"))(10)  # 漸變的10種顏色
color=colorRampPalette(colors = c("snow", "green", "red"))(3)  # 只取三種顏色,與matrix值對應(yīng)

## 下方高級顏色分組
names(colors) <- c("strings")
colors = list(
  group = colors,  # group名統(tǒng)一
)  # 配置顏色
annotation_row # 行分組
annotation_col =  data.frame(group = c()) # 列分組,group名與配色統(tǒng)一
annotation_colors = colors  # 使用配置色,group名保持一致

## 色庫
col = read.table("C:/Users/hutongyuan/Desktop/group_color.list", header=F, sep="\t", check.names=F, comment.char="")
colors = col[1:length(unique(group$CAZyme)),]
names(colors) <- unique(group$CAZyme)

## 獲取聚類后的矩陣
out = pheatmap(data,
    fontsize_col = 3, fontsize_row = 3, scale = 'column',
    color = colorRampPalette(c("black", "yellow"))(30),
    filename="heat_column.pdf")
str(out, max.level = 2)
cluster = data[out$tree_row$order, out$tree_col$order]
write.table(cluster, file="data_cluster.txt", sep="\t", quote=F)

參考:
R語言繪制熱圖——pheatmap
用R包中heatmap畫熱圖
使用pheatmap包繪制熱圖

更多R語言分析和繪圖:
[1] R語言UPGMA聚類分析和樹狀圖
[2] R語言菌群組成分析和Stackplot堆疊圖
[3] R語言菌群Alpha多樣性分析和Boxplot箱形圖
[4] Is it possible to italicize row names with pheatmap()?

最后編輯于
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

推薦閱讀更多精彩內(nèi)容