基因課FTP地址:ftp://http://gsx.genek.tv/2020-3-10%E7%9B%B4%E6%92%AD%E4%B8%80%E4%B8%AA%E5%AE%8C%E6%95%B4%E7%9A%84%E8%BD%AC%E5%BD%95%E7%BB%84%E9%A1%B9%E7%9B%AE/
聽張旭東老師的課
R Markdown
- 可生成html文件
- 完成后點擊Knit, 可生成Markdown文本文件
- R Markdown升級版Bookdown
導入數據
read.table(file = 'de_result.tab', sep = '\t')
加載包
library(ggplot2)
library(ggsci) # ggplot擴展包,內含不同級別期刊的標度
準備框架
ggplot(data = de_result, aes(x = log2FoldChange, y = -log10(padj))) # 創建畫布
ggplot(data = de_result, aes(x = log2FoldChange, y = -log10(padj))) + geom_point() + # 繪制散點圖,添加散點幾何對象,“+”用來接后面的添加工具
theme_bw() # 換主題,可以試theme_classic, theme_test等,theme_bw適合科研用
將 direction 映射到點的顏色
- 讓火山圖中上調、下調、ns的基因對應的點有不同的顏色
- commands
ggplot(data = de_result, aes(x = log2FoldChange, y = -log10(padj))) +
geom_point(aes(color = direction)) +
scale_color_npg() +
theme_bw() - 標注
- aes 為表示映射的函數
- scale_color_xxx() 利用ggsci包配色,npg為Nature的高度, jco等,還有很多
標度
- 對于圖表顏色、樣式等,不同的期刊有不同的喜好
- ggsci是ggplot的擴展包,記載了SCI不同級別期刊的不同喜好
- scale_color_xxx() # 標度調色板。xxx處為對應期刊,詳見幫助文檔
自定義修改顏色
- 需要制定顏色對應項的順序,需要自定義調色板
- commands
library(tidyverse)
de_result <- mutate(de_result, direction = factor(direction, levels = c('up', 'ns', 'down')))
my_palette <- c('green', 'grey', 'red')
ggplot(data = de_result, aes(x = log2FoldChange, y = -log10(padj))) +
geom_point(aes(color = direction)) +
scale_color_manual(values = my_palette) +
theme_bw() - 標注
- direction一列只有up/ns/down三個值,為離散型變量——無序,將離散型變量排序后為因子型變量——有序;
- 使用mutate需要加載tidyverse;
- mutate可以修改列的數據類型,修改為因子型變量——有序;
- my_palette參數中傳入的是自定義的顏色,可以試試顏色my_palette <- c('#E64B35FF', '#999999', '#4DBBD5FF'),數據映射的順序與因子的順序一致,若沒有定義因子順序,默認離散變量的排序為字母順序
- scale_color_manual() 傳入自己的參數
- 顏色調整
- 深沉的顏色相對好看
- 網上查找“16進制顏色”
修改點的大小
- log2FC越大的點越大
- commands
library(ggplot2)
library(tidyverse)
de_result <- mutate(de_result, direction = factor(direction, levels = c('up', 'ns', 'down')))
my_palette <- c('green', 'grey', 'red')
ggplot(data = de_result, aes(x = log2FoldChange, y = -log10(padj))) +
geom_point(aes(color = direction, size = abs(log2FoldChange))) +
scale_color_manual(values = my_palette) +
scale_size(range = c(0.1, 2)) +
theme_bw() - 標注
- geom_point(aes(size = ?)) 調節點大小的映射值
- abs求絕對值
- scale_size(range = ?) 定制點的大小,一般為0.1-2或0.1-3
增加透明度(選加)
library(tidyverse)
de_result <- mutate(de_result, direction = factor(direction, levels = c('up', 'ns', 'down')))
my_palette <- c('green', 'grey', 'red')
ggplot(data = de_result, aes(x = log2FoldChange, y = -log10(padj))) +
geom_point(aes(color = direction,
size = abs(log2FoldChange),
alpha = abs(log2FoldChange))) +
scale_color_manual(values = my_palette) +
scale_size(range = c(0.1, 2)) +
theme_bw()
點的形狀
-
有邊框的點和無邊框的點有差別
ggplot中的點形狀 - commands
library(ggplot2)
library(tidyverse)
de_result <- mutate(de_result, direction = factor(direction, levels = c('up', 'ns', 'down')))
my_palette <- c('green', 'grey', 'red')
ggplot(data = de_result, aes(x = log2FoldChange, y = -log10(padj))) +
geom_point(shape = 21,
alpha = 1/2,
color = 'black',
aes(fill = direction,
size = abs(log2FoldChange))) +
scale_color_manual(values = my_palette) +
scale_size(range = c(0.1, 2)) +
theme_bw() - 標注
- shape指定有邊框的點
- alpha = 1/2指定全部顏色透明度
- color指定邊框顏色
- fill指定填充顏色映射值
添加閾值線
-
R語言線的類型
線的類型 commands
library(ggplot2)
library(tidyverse)
de_result <- mutate(de_result, direction = factor(direction, levels = c('up', 'ns', 'down')))
my_palette <- c('green', 'grey', 'red')
ggplot(data = de_result, aes(x = log2FoldChange, y = -log10(padj))) +
geom_point(aes(color = direction, size = abs(log2FoldChange))) +
geom_hline(yintercept = -log10(0.05), linetype = 'dashed', size = 0.2) +
geom_vline(xintercept = c(-1, 1), linetype = 'dashed') +
scale_color_manual(values = my_palette) +
scale_size(range = c(0.1, 2)) +
theme_bw()-
標注
- geom_hline()繪制水平線,geom_vline()繪制垂直線;
- yintercept指定線的y軸值,xintercept指定線的x軸值;
- 坐標軸的值是通過計算得到的,不能用數值直接指定
- 虛線——dashed
- size調整線的粗細,一般用默認的即可
- color調整線的顏色,默認黑色
添加標簽
- commands
library(ggplot2)
library(tidyverse)
library(ggrepel)
de_result <- mutate(de_result, direction = factor(direction, levels = c('up', 'ns', 'down')))
top_de <- filter(de_result,
abs(log2FoldChange) > 2 & padj < 1e-50)
my_palette <- c('green', 'grey', 'red')
ggplot(data = de_result, aes(x = log2FoldChange, y = -log10(padj))) +
geom_point(aes(color = direction, size = abs(log2FoldChange))) +
geom_hline(yintercept = -log10(0.05), linetype = 'dashed', size = 0.2) +
geom_vline(xintercept = c(-1, 1), linetype = 'dashed') +
geom_label_repel(data = top_de, aes(label = id)) +
ylim(c(0, 200)) +
scale_color_manual(values = my_palette) +
scale_size(range = c(0.1, 2)) +
theme_bw() - 標注
- ggrepel包用于加標簽(自動避免標簽重疊)
- 給關鍵點添加標簽
- 使用filter需要加載tidyverse包
- 注意padj指定值的表示方式
- 如果想要指定加標簽的點,則top_de賦值改為如下操作
top_de <- filter(de_result, id == 'HF01786') - geom_label_repel()指定標簽及映射的值
- ylim篩掉太大的已經被研究過的點,防止值太大的點影響其他基因在圖中的顯示
設置坐標軸名字及標題
- commands
library(ggplot2)
library(tidyverse)
library(ggrepel)
de_result <- mutate(de_result, direction = factor(direction, levels = c('up', 'ns', 'down')))
top_de <- filter(de_result,
abs(log2FoldChange) > 2 & padj < 1e-50)
my_palette <- c('green', 'grey', 'red')
ggplot(data = de_result, aes(x = log2FoldChange, y = -log10(padj))) +
geom_point(aes(color = direction, size = abs(log2FoldChange))) +
geom_hline(yintercept = -log10(0.05), linetype = 'dashed', size = 0.2) +
geom_vline(xintercept = c(-1, 1), linetype = 'dashed') +
geom_label_repel(data = top_de, aes(label = id)) +
scale_color_manual(values = my_palette) +
scale_size(range = c(0.1, 2)) +
labs(x = 'log2 fold change',
y = '-log10(P value)',
title = 'Vocano Plot'
size = 'log2 fold change') +
theme_bw() +
theme(plot.title = element_text(size = 18, hjust = 0.5)) - 標注
- labs設置x軸、y軸、圖例(size)及全圖的標題
- theme_bw主題中標題靠左,想要居中要自己設置,不過要注意順序,先肯定theme_bw
- theme()更改主題中設置,plot.title = element_text()給標題設置,size → 字體,hjust = 0/0.5/1 → 靠左/居中/靠右,vjust可設置縱向居中
修改圖例
- commands
library(ggplot2)
library(tidyverse)
library(ggrepel)
de_result <- mutate(de_result, direction = factor(direction, levels = c('up', 'ns', 'down')))
top_de <- filter(de_result,
abs(log2FoldChange) > 2 & padj < 1e-50)
my_palette <- c('green', 'grey', 'red')
ggplot(data = de_result, aes(x = log2FoldChange, y = -log10(padj))) +
geom_point(aes(color = direction, size = abs(log2FoldChange))) +
geom_hline(yintercept = -log10(0.05), linetype = 'dashed', size = 0.2) +
geom_vline(xintercept = c(-1, 1), linetype = 'dashed') +
geom_label_repel(data = top_de, aes(label = id)) +
scale_color_manual(values = my_palette) +
scale_size(range = c(0.1, 2)) +
labs(x = 'log2 fold change',
y = '-log10(P value)',
title = 'Vocano Plot'
size = 'log2 fold change') +
guides(size = FALSE) +
theme_bw() +
theme(plot.title = element_text(size = 18, hjust = 0.5),
legend.background = element_blank(),
legend.key = element_blank(),
legend.position = c(0.93, 0.85)) - 標注
- guides()去除一個圖例
- theme中legened.background 設置圖例文字背景,legend.key設置圖例圖像背景,使背景變透明,不要遮蓋圖中的網格線,
- legned.position限定圖例位置,默認為legend.position = 'right', 想畫在圖中需要手動調整
- legend.position = c(x,y), 0→1為坐標軸最小到最大,手動調整,多次換值
圖片導出
- 方法一
Rstudio導出pdf格式,不要用PNG - 方法二
畫圖前聲明pdf文件,打開一個pdf文件
pdf(file = 'p1.pdf')
ggplot(... ...) # 畫圖省略
dev.off()
總結
- 1.圖層:顏色、透明度、性狀等的調整為全局調整;
- 2.映射:對應的點顏色、透明度、性狀等屬性有不同分類;
- 3.標度:控制映射的規律
- 4.主題:顏色是否好看