原始數據主題:Tour de France Winners
重新繪制主題:口袋妖怪 Pokemons
跟著劉博學畫圖,語雀指路:
TidyTuesday 可視化學習之 ggplot2 一筆一畫繪制表格
原始數據:
https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-07/tdf_winners.csv
重繪數據(口袋妖怪):
https://pan.baidu.com/s/1sMHPLKL_OsEpsrwJ6uQGPQ
提取碼:oxm0
轉載請注明:陳熹 chenx6542@foxmail.com (簡書號:半為花間酒)
前置知識
- rle函數
計算向量中連續相同字符(廣義游程)的個數
x <- c(1,1,1,2,3,3,3,1,1)
rle(x)
# Run Length Encoding
# lengths: int [1:4] 3 1 3 2
# values : num [1:4] 1 2 3 1
x <- c(1,2,3,4,4,1,2)
rle(x)
# Run Length Encoding
# lengths: int [1:6] 1 1 1 2 1 1
# values : num [1:6] 1 2 3 4 1 2
x <- c(1,1,2,3,4,4,1,3)
rle(x)
# Run Length Encoding
# lengths: int [1:6] 2 1 1 2 1 1
# values : num [1:6] 1 2 3 4 1 3
圖源《R編程藝術》
- glue函數
類似python3的字符串格式化
# > python
name = 'Fred'
age = '50'
print(f'My name is {name}, my age next year is {age + 1}')
# My name is Fred, my age next year is 51.
#> R
name <- "Fred"
age <- 50
anniversary <- as.Date("1991-10-12")
library(glue)
glue('My name is {name},
my age next year is {age + 1},
my anniversary is {anniversary}')
# 會識別\n換行
# My name is Fred,
# my age next year is 51,
# my anniversary is 1991-10-12
glue('My name is {name},',
'my age next year is {age + 1},',
'my anniversary is {format(anniversary, "%A, %B %d, %Y")}.')
# 以,相連直接拼接不換行
# My name is Fred,my age next year is 51,my anniversary is 星期六, 十月 12, 1991.
- coord_cartesian函數
放大鏡效果不改變圖形形狀
library(patchwork)
library(ggplot2)
p1 <- ggplot(mtcars, aes(disp, wt)) +
geom_point() +
geom_smooth()
p2 <- p1 + scale_x_continuous(limits = c(325, 500))
p3 <- p1 + coord_cartesian(xlim = c(325, 500))
p1 + p2 + p3
- with函數
# 使用with函數將dat添加到環境
dat <- matrix(rnorm(20),nrow = 4,ncol=5)
colnames(dat)<-paste("a" ,1:5,sep ="")
rownames(dat)<-paste("b",1:4,sep = "")
dat <- as.data.frame(dat)
dat$a1 + dat$a2
# 等價于
with(dat, a1+a2)
# 另一個例子
dat <- read.csv("femaleMiceWeights.csv")
X <- filter(dat,Diet == "chow") %>%
select(Bodyweight) %>% unlist
Y <- filter(dat,Diet == "hf") %>%
select(Bodyweight) %>% unlist
t.test(X,Y)$p.value
# 等價于
with(t.test(X,Y), p.value)
- here包
How can you avoid setwd()
at the top of every script?
Use the here()
function from the here package to build the path when you read or write a file. Create paths relative to the top-level directory.
getwd()
# 當前目錄,也可以直接不指定
ggsave(here::here('.',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")),
dpi = 320, width = 11, height = 17)
# 上一級目錄
ggsave(here::here('..',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")),
dpi = 320, width = 11, height = 17)
# 下級目錄
ggsave(here::here('dat','test',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")),
dpi = 320, width = 11, height = 17)
原圖復現
- 數據預處理部分
library(tidyverse)
library(lubridate)
library(countrycode)
# remotes::install_github("wilkelab/ggtext")
library(ggtext)
library(glue)
library(here)
#library(skimr)
library(dplyr)
tdf_winners <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-07/tdf_winners.csv')
tdf_table <- tdf_winners %>%
mutate(
wins_consecutive = with(rle(winner_name), rep(lengths, times = lengths)),
year = year(start_date), # 提取年數據
year_labels = ifelse(year %% 10 == 0, glue("**{year}**"), year),
year_group = case_when(
year < 1915 ~ 1,
year > 1915 & year < 1940 ~ 2,
TRUE ~ 3),
avg_speed = distance / time_overall,
country_code = countrycode(nationality, origin = "country.name", destination = "iso3c"),
winner_annot = ifelse(wins_consecutive > 2, glue("**{winner_name} ({country_code})**"), glue("{winner_name} ({country_code})"))
) %>%
# 分組很妙,添加行號
group_by(year_group) %>%
mutate(
n_annot = row_number(),
annot = ifelse((n_annot - 2) %% 3 == 0, TRUE, FALSE)
) %>%
ungroup() %>%
add_row(year = c(1915, 1916, 1917, 1918, 1940, 1941, 1942, 1943)) %>%
arrange(year) %>%
mutate(n = row_number())
- 畫圖部分
# 把字體安排上
windowsFonts(HEL=windowsFont("Helvetica CE 55 Roman"),
RMN=windowsFont("Times New Roman"),
ARL=windowsFont("Arial"),
ARLB=windowsFont("Arial Bold"),
JBM=windowsFont("JetBrains Mono"))
# step1: geom_segment() 標虛線
ggplot(tdf_table) +
# dotted gridlines ---------------------------------------------------
# 使用 geom_segment() 函數添加虛線
geom_segment(data = subset(tdf_table, !is.na(year_labels)),
aes(x = 0, xend = 24000, y = n, yend = n),
linetype = "dotted", size = 0.2) +
# step2:加上左右兩側的年份
geom_richtext(aes(x = -1000, y = n, label = year_labels),
fill = "red", label.color = NA,
label.padding = unit(0.1, "lines"),
family = "JBM", size = 2.5) +
geom_richtext(aes(x = 25000, y = n, label = year_labels),
fill = "blue", label.color = NA,
label.padding = unit(0.1, "lines"),
family = "JBM", size = 2.5) +
# step3:geom_area() 加上填充面積
geom_area(aes(x = distance * 0.625, y = n, group = year_group),
fill = "#7DDDB6", alpha = 0.6,
orientation = "y", position = "identity") +
# step4:選擇性加上每一個上面對應的點
geom_point(data = subset(tdf_table, annot),
aes(x = distance * 0.625, y = n), size = 0.5) +
# step5:給 step4 中的點加上數值
geom_label(data = subset(tdf_table, annot),
aes(x = distance * 0.625 + 100, y = n, label = distance),
fill = "#F3F2EE", label.size = 0,
label.padding = unit(0.1, "lines"),
hjust = 0, family = "JBM", size = 2.5) +
# step6:給每一行加上注釋,對應 WINNER
geom_richtext(aes(x = 5300, y = n, label = winner_annot, .na = NULL),
fill = "#F3F2EE", label.size = 0,
label.padding = unit(0.1, "lines"),
hjust = 0, family = "JBM", size = 2.5) +
geom_label(aes(x = 10300, y = n, label = glue("{winner_team}", .na = NULL)),
fill = "#F3F2EE", label.size = 0, label.padding = unit(0.1, "lines"),
hjust = 0, family = "JBM", size = 2.5) +
# step7:geom_segment 函數添加 AVERAGE SPEED 數據
geom_segment(aes(x = 16000, xend = 16000 + avg_speed * 66.67, y = n, yend = n),
size = 2, colour = "#7DDDB6", alpha = 0.6) +
# step8:選擇性添加 AVERAGE SPEED 對應的數值
geom_label(data = subset(tdf_table, annot),
aes(x = 16000 + avg_speed * 66.67 + 100, y = n,
label = round(avg_speed, 1)), fill = "#F3F2EE",
label.size = 0, label.padding = unit(0.1, "lines"),
hjust = 0, family = "JBM", size = 2.5) +
# step9:添加 TOTAL TIME 時間填充(geom_ribbon)、點、標簽
geom_ribbon(aes(xmin = 20000, xmax = 20000 + time_overall * 10, y = n, group = year_group),
fill = "#FCDF33", alpha = 0.6, orientation = "y", position = "identity") +
geom_point(data = subset(tdf_table, annot),
aes(x = 20000 + time_overall * 10, y = n), size = 0.5) +
geom_label(data = subset(tdf_table, annot),
aes(x = 20000 + time_overall * 10 + 100, y = n,
label = round(time_overall, 1)),
fill = "#F3F2EE", label.size = 0,
label.padding = unit(0.1, "lines"),
hjust = 0, family = "JBM", size = 2.5) +
# step10:annotate 函數添加豎直線
annotate("segment",
x = c(-2000, 0, 5000, 10000, 16000, 20000, 24000, 26000),
xend = c(-2000, 0, 5000, 10000, 16000, 20000, 24000, 26000),
y = -4, yend = 115, size = 0.3) +
# step11:annotate 函數添加三條橫線
annotate("segment",
x = -2000, xend = 26000,
y = c(-4, -1, 115), yend = c(-4, -1, 115), size = 0.3) +
# step12:annotate 添加表頭
annotate("text",
x = c(-1000, 2500, 7500, 13000, 18000, 22000, 25000),
y = -2.5,
label = toupper(c("year", "distance", "winner", "team", "average speed", "total time", "year")),
hjust = 0.5, family = "ARLB", size = 3.5) +
# step13:annotate 函數加上空白
annotate("rect",
xmin = -2000, ymin = c(13, 38),
xmax = 26000, ymax = c(16, 41),
fill = "#F3F2EE", colour = "black", size = 0.3) +
# step14:annotate 函數參數 richtext 添加中間小表頭
annotate("richtext", x = 13000, y = c(14.5, 39.5),
label = c("**1915-1918** Tour suspended because of Word War I",
"**1940-1946** Tour suspended because of Word War II"),
label.color = NA, fill = "#F3F2EE", hjust = 0.5,
family = "ARL", size = 3.5) +
# step15:annotate 函數參數 text 給 DISTANCE 欄加上單位刻度
annotate("text", x = c(100, 4900), y = 0,
label = c("0", "8000 km"), hjust = c(0, 1),
family = "ARL", size = 3) +
# step16:annotate 函數參數 text 給其他的添加刻度尺和注釋
annotate("text", x = c(16100, 19900), y = 0,
label = c("0", "60 km/h"), hjust = c(0, 1),
family = "ARL", size = 3) +
annotate("text", x = c(20100, 23900), y = 0,
label = c("0", "300 h"), hjust = c(0, 1),
family = "ARL", size = 3) +
annotate("text", x = 26000, y = -6,
label = "Source: alastairrushworth/tdf & kaggle.com/jaminliu | Graphic: Georgios Karamanis",
hjust = 1, family = "ARL", size = 3) +
# step17:coord_cartesian 函數取消畫板限制范圍
coord_cartesian(clip = 'off') +
# step18:scale_x_continuous 函數通過 limits 和 expand 函數控制貼 y 軸距離
scale_x_continuous(limits = c(-2300, 26300), expand = expansion(add = 1)) +
# step19:scale_y_reverse 函數翻轉 y 軸左邊起始順序,上下顛倒,并通過 expand = expansion(add = 0) 控制 y 軸頂端和低端間隙為 0
scale_y_reverse(expand = expansion(add = 0)) +
# step20:labs 加標題以及 theme_void 去除主題線條背景以及坐標軸
labs(
title = "Tour de France Winners") +
theme_void(base_family = "JBM") +
# step21:設置灰色背景,畫板大小,以及標題大小
theme(
plot.background = element_rect(fill = "#F3F2EE", colour = NA),
plot.margin = margin(20, 20, 20, 20),
plot.title = element_text(hjust = 0.01, size = 28,
family = "JBM", margin = margin(0, 0, -8, 0))
)
# ggsave(here::here('dat','test',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")),
# dpi = 320, width = 11, height = 17)
# # step22:用here方法保存圖片
ggsave(here::here('.',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")),
dpi = 320, width = 11, height = 17)
- 成品
拿數據重新繪圖
library(tidyverse)
library(lubridate)
library(countrycode)
# remotes::install_github("wilkelab/ggtext")
library(ggtext)
library(glue)
library(here)
#library(skimr)
library(dplyr)
library(RColorBrewer)
library(scales)
# 設置漸變顏色
Blues <- brewer.pal(9, "Blues")[4:7]
pal <- colorRampPalette(Blues)
new_Blues <- pal(10)
show_col(new_Blues)
# 注冊字體
windowsFonts(HEL=windowsFont("Helvetica CE 55 Roman"),
RMN=windowsFont("Times New Roman"),
ARL=windowsFont("Arial"),
ARLB=windowsFont("Arial Bold"),
JBM=windowsFont("JetBrains Mono"),
MY=windowsFont("Microsoft YaHei"),
IPML=windowsFont("IBM Plex Mono Light"),
IPS=windowsFont("IBM Plex Sans"),
IPSB=windowsFont("IBM Plex Sans Bold"),
JBMB=windowsFont("JetBrains Mono Bold"))
pokemons <- readr::read_csv('C:/Users/chenx/Desktop/pokemons.csv')
pokemons_dat <- pokemons %>%
filter((100 <= id & id <= 126 )| (131 <= id & id <= 156 ) | (161 <= id & id <= 200 )) %>%
select(1:5,'book_color','HP':'total_value') %>%
mutate(
id_group = case_when(
id < 127 ~ 1,
id > 130 & id < 127 ~ 2,
TRUE ~ 3),
# 實際上不轉換也行
color = case_when(
book_color == '綠色' ~ 'green',
book_color == '灰色' ~ 'grey',
book_color == '褐色' ~ 'brown',
book_color == '藍色' ~ 'blue',
book_color == '粉紅色' ~ 'pink',
book_color == '紅色' ~ 'red',
book_color == '黃色' ~ 'yellow',
book_color == '紫色' ~ 'purple',
book_color == '白色' ~ 'white',
book_color == '黑色' ~ 'black'),
name_type = glue('{Chinese_name}/{Japanese_name}({poketype})')
) %>%
group_by(id_group) %>%
mutate(
n_annot = row_number(),
# 設置可顯示出的數據點
annot = ifelse((n_annot - 2) %% 3 == 0, TRUE, FALSE)
) %>%
ungroup() %>%
add_row(id = c(127, 128, 129, 130, 157, 158, 159, 160)) %>%
arrange(id) %>%
mutate(n = row_number())
ggplot(pokemons_dat) +
geom_segment(data = subset(pokemons_dat, !is.na(color)),
aes(x = 0, xend = 24000, y = n, yend = n),
linetype = "dotted", size = 0.2) +
geom_richtext(aes(x = -1000, y = n, label = id),
fill = "red", label.color = NA,
label.padding = unit(0.1, "lines"),
family = "JBM", size = 2.5) +
geom_richtext(aes(x = 25000, y = n, label = id, fill = color),
label.color = NA,
label.padding = unit(0.1, "lines"),
family = "JBM", size = 2.5) +
scale_fill_manual(values =new_Blues) +
guides(fill=FALSE) +
geom_area(aes(x = total_value * 7, y = n, group = id_group),
fill = "#7DDDB6", alpha = 0.6,
orientation = "y", position = "identity") +
geom_point(data = subset(pokemons_dat, annot),
aes(x = total_value * 7, y = n), size = 0.5) +
geom_label(data = subset(pokemons_dat, annot),
aes(x = total_value * 7 + 100, y = n, label = total_value),
fill = "#F3F2EE", label.size = 0,
label.padding = unit(0.1, "lines"),
hjust = 0, family = "JBM", size = 2.5) +
# 很無奈,中文和日文打帶邊框標簽都會留出右端空格,所以只能把英文名單拎出來加邊框
geom_label(aes(x = 6000, y = n, label = name_type,.na = NULL),
fill = "#F3F2EE", label.size = 0,
label.padding = unit(0.1, "lines"),
hjust = 0, family = "ARL", size = 2.5) +
geom_richtext(aes(x = 13000, y = n, label = English_name, .na = NULL),
fill = "#F3F2EE", label.size = 0, label.padding = unit(0.1, "lines"),
hjust = 0, family = "JBM", size = 2.5) +
geom_segment(aes(x = 16000, xend = 16000 + Attack * 21, y = n, yend = n),
size = 2, colour = "#7DDDB6", alpha = 0.6) +
geom_label(data = subset(pokemons_dat, annot),
aes(x = 16000 + Attack * 21 + 100, y = n,
label = round(Attack, 1)), fill = "#F3F2EE",
label.size = 0, label.padding = unit(0.1, "lines"),
hjust = 0, family = "JBM", size = 2.5) +
geom_ribbon(aes(xmin = 21000, xmax = 21000 + Speed * 15, y = n, group = id_group),
fill = "#FCDF33", alpha = 0.6, orientation = "y", position = "identity") +
geom_point(data = subset(pokemons_dat, annot),
aes(x = 21000 + Speed * 15, y = n), size = 0.5) +
geom_label(data = subset(pokemons_dat, annot),
aes(x = 21000 + Speed * 15 + 100, y = n,
label = round(Speed, 1)),
fill = "#F3F2EE", label.size = 0,
label.padding = unit(0.1, "lines"),
hjust = 0, family = "JBM", size = 2.5) +
annotate("segment",
x = c(-2000, 0, 5700, 12700, 16000, 21000, 24000, 26000),
xend = c(-2000, 0, 5700, 12700, 16000, 21000, 24000, 26000),
y = -4, yend = 103, size = 0.3) +
annotate("segment",
x = -2000, xend = 26000,
y = c(-4, -1, 103), yend = c(-4, -1, 103), size = 0.3) +
annotate("text",
x = c(-1000, 2750, 9200, 14350, 18500, 22500, 25000),
y = -2.5,
label = toupper(c("ID", "ABILITY SCORES", "NAME/TYPE", "ENAME", "ATTACK", "SPEED", "ID")),
hjust = 0.5, family = "ARLB", size = 3.5) +
annotate("rect",
xmin = -2000, ymin = c(27.5, 57.5),
xmax = 26000, ymax = c(31.5, 61.5),
fill = "#F3F2EE", colour = "black", size = 0.3) +
annotate("richtext", x = 13000, y = c(29.5, 59.5),
label = c("**127-130** Which Have Been Deleted :) I",
"**157-160** Which Have Been Deleted :) II"),
label.color = NA, fill = "#F3F2EE", hjust = 0.5,
family = "JBM", size = 5) +
annotate("text", x = c(100, 5600), y = 0,
label = c("0", "800"), hjust = c(0, 1),
family = "JBM", size = 3) +
annotate("text", x = c(16100, 20900), y = 0,
label = c("0", "250"), hjust = c(0, 1),
family = "JBM", size = 3) +
annotate("text", x = c(21100, 23900), y = 0,
label = c("0", "200"), hjust = c(0, 1),
family = "JBM", size = 3) +
annotate("text", x = 26000, y = -6,
label = "Source: wiki.52poke.com | Graphic: Xi Chen",
hjust = 1, family = "JBM", size = 3) +
coord_cartesian(clip = 'off') +
scale_x_continuous(limits = c(-2300, 26300), expand = expansion(add = 1)) +
scale_y_reverse(expand = expansion(add = 0)) +
labs(
title = "Pokemons"
) +
theme_void(base_family = "JBM") +
theme(
plot.background = element_rect(fill = "#F3F2EE", colour = NA),
plot.margin = margin(20, 20, 20, 20),
plot.title = element_text(hjust = 0.01, size = 28,
family = "JBMB", margin = margin(0, 0, -8, 0))
)
ggsave(here::here('.',paste0("Pokemons ", format(Sys.time(), "%Y%m%d"), ".png")),
dpi = 640, width = 10, height = 14)