[R語言] TidyTuesday ggplot2可視化學習 1 (Tour de France Winners)

原始數據主題:Tour de France Winners

重新繪制主題:口袋妖怪 Pokemons

跟著劉博學畫圖,語雀指路
TidyTuesday 可視化學習之 ggplot2 一筆一畫繪制表格
原始數據
https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-07/tdf_winners.csv
重繪數據(口袋妖怪)
https://pan.baidu.com/s/1sMHPLKL_OsEpsrwJ6uQGPQ
提取碼:oxm0
轉載請注明:陳熹 chenx6542@foxmail.com (簡書號:半為花間酒)

前置知識

- rle函數

計算向量中連續相同字符(廣義游程)的個數

x <- c(1,1,1,2,3,3,3,1,1)
rle(x)
# Run Length Encoding
# lengths: int [1:4] 3 1 3 2
# values : num [1:4] 1 2 3 1

x <- c(1,2,3,4,4,1,2)
rle(x)
# Run Length Encoding
# lengths: int [1:6] 1 1 1 2 1 1
# values : num [1:6] 1 2 3 4 1 2

x <- c(1,1,2,3,4,4,1,3)
rle(x)
# Run Length Encoding
# lengths: int [1:6] 2 1 1 2 1 1
# values : num [1:6] 1 2 3 4 1 3

圖源《R編程藝術》

- glue函數

類似python3的字符串格式化

# > python
name = 'Fred'
age = '50'
print(f'My name is {name}, my age next year is {age + 1}')
# My name is Fred, my age next year is 51.
#> R
name <- "Fred"
age <- 50
anniversary <- as.Date("1991-10-12")

library(glue)
glue('My name is {name},
      my age next year is {age + 1},
      my anniversary is {anniversary}')
# 會識別\n換行
# My name is Fred,
# my age next year is 51,
# my anniversary is 1991-10-12

glue('My name is {name},',
     'my age next year is {age + 1},',
     'my anniversary is {format(anniversary, "%A, %B %d, %Y")}.')
# 以,相連直接拼接不換行
# My name is Fred,my age next year is 51,my anniversary is 星期六, 十月 12, 1991.

- coord_cartesian函數

放大鏡效果不改變圖形形狀

library(patchwork)
library(ggplot2)

p1 <- ggplot(mtcars, aes(disp, wt)) +
  geom_point() +
  geom_smooth()
p2 <- p1 + scale_x_continuous(limits = c(325, 500))
p3 <- p1 + coord_cartesian(xlim = c(325, 500))

p1 + p2 + p3

- with函數

部分參考:R語言with/within函數添加數據框到環境變量

# 使用with函數將dat添加到環境
dat <- matrix(rnorm(20),nrow = 4,ncol=5)
colnames(dat)<-paste("a" ,1:5,sep ="")
rownames(dat)<-paste("b",1:4,sep = "")
dat <- as.data.frame(dat)

dat$a1 + dat$a2
# 等價于
with(dat, a1+a2)

# 另一個例子
dat <- read.csv("femaleMiceWeights.csv") 
X <- filter(dat,Diet == "chow") %>%  
  select(Bodyweight) %>% unlist
Y <- filter(dat,Diet == "hf") %>%  
  select(Bodyweight) %>% unlist

t.test(X,Y)$p.value
# 等價于
with(t.test(X,Y), p.value)

- here包

部分參考:Project-oriented workflow 面向項目的工作流程

How can you avoid setwd() at the top of every script?
Use the here() function from the here package to build the path when you read or write a file. Create paths relative to the top-level directory.

getwd()

# 當前目錄,也可以直接不指定
ggsave(here::here('.',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")), 
       dpi = 320, width = 11, height = 17)

# 上一級目錄
ggsave(here::here('..',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")), 
       dpi = 320, width = 11, height = 17)

# 下級目錄
ggsave(here::here('dat','test',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")), 
       dpi = 320, width = 11, height = 17)

原圖復現

- 數據預處理部分

library(tidyverse)
library(lubridate)
library(countrycode)
# remotes::install_github("wilkelab/ggtext")
library(ggtext)
library(glue)
library(here)
#library(skimr)
library(dplyr)

tdf_winners <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-07/tdf_winners.csv')

tdf_table  <- tdf_winners %>% 
  mutate(
    wins_consecutive = with(rle(winner_name), rep(lengths, times = lengths)),
    year = year(start_date), # 提取年數據
    year_labels = ifelse(year %% 10 == 0, glue("**{year}**"), year),
    year_group = case_when(
      year < 1915 ~ 1,
      year > 1915 & year < 1940 ~ 2,
      TRUE ~ 3),
    avg_speed = distance / time_overall,
    country_code = countrycode(nationality, origin = "country.name", destination = "iso3c"),
    winner_annot = ifelse(wins_consecutive > 2, glue("**{winner_name} ({country_code})**"), glue("{winner_name} ({country_code})"))
  ) %>%
  # 分組很妙,添加行號
  group_by(year_group) %>% 
  mutate(
    n_annot = row_number(),
    annot = ifelse((n_annot - 2) %% 3 == 0, TRUE, FALSE)
  ) %>% 
  ungroup() %>% 
  add_row(year = c(1915, 1916, 1917, 1918, 1940, 1941, 1942, 1943)) %>%
  arrange(year) %>% 
  mutate(n = row_number())

- 畫圖部分

# 把字體安排上
windowsFonts(HEL=windowsFont("Helvetica CE 55 Roman"),
             RMN=windowsFont("Times New Roman"),
             ARL=windowsFont("Arial"),
             ARLB=windowsFont("Arial Bold"),
             JBM=windowsFont("JetBrains Mono"))

# step1: geom_segment() 標虛線
ggplot(tdf_table) +
  # dotted gridlines ---------------------------------------------------
  # 使用 geom_segment() 函數添加虛線
  geom_segment(data = subset(tdf_table, !is.na(year_labels)),
             aes(x = 0, xend = 24000, y = n, yend = n), 
             linetype = "dotted", size = 0.2) +
  # step2:加上左右兩側的年份
  geom_richtext(aes(x = -1000, y = n, label = year_labels), 
              fill = "red", label.color = NA, 
              label.padding = unit(0.1, "lines"), 
              family = "JBM", size = 2.5) +
  geom_richtext(aes(x = 25000, y = n, label = year_labels),
                fill = "blue", label.color = NA, 
                label.padding = unit(0.1, "lines"),
                family = "JBM", size = 2.5) +
  # step3:geom_area() 加上填充面積
  geom_area(aes(x = distance * 0.625, y = n, group = year_group), 
            fill = "#7DDDB6", alpha = 0.6, 
            orientation = "y", position = "identity") +
  # step4:選擇性加上每一個上面對應的點
  geom_point(data = subset(tdf_table, annot), 
             aes(x = distance * 0.625, y = n), size = 0.5) +
  # step5:給 step4 中的點加上數值
  geom_label(data = subset(tdf_table, annot), 
             aes(x = distance * 0.625 + 100, y = n, label = distance), 
             fill = "#F3F2EE", label.size = 0, 
             label.padding = unit(0.1, "lines"), 
             hjust = 0, family = "JBM", size = 2.5) +
  # step6:給每一行加上注釋,對應 WINNER
  geom_richtext(aes(x = 5300, y = n, label = winner_annot, .na = NULL), 
              fill = "#F3F2EE", label.size = 0, 
              label.padding = unit(0.1, "lines"), 
              hjust = 0, family = "JBM", size = 2.5) +
  geom_label(aes(x = 10300, y = n, label = glue("{winner_team}", .na = NULL)), 
             fill = "#F3F2EE", label.size = 0, label.padding = unit(0.1, "lines"),
             hjust = 0, family = "JBM", size = 2.5) +
  # step7:geom_segment 函數添加 AVERAGE  SPEED 數據
  geom_segment(aes(x = 16000, xend = 16000 + avg_speed * 66.67, y = n, yend = n), 
               size = 2, colour = "#7DDDB6", alpha = 0.6) +
  # step8:選擇性添加 AVERAGE SPEED 對應的數值
  geom_label(data = subset(tdf_table, annot), 
             aes(x = 16000 + avg_speed * 66.67 + 100, y = n, 
                 label = round(avg_speed, 1)), fill = "#F3F2EE", 
             label.size = 0, label.padding = unit(0.1, "lines"), 
             hjust = 0, family = "JBM", size = 2.5) +
  # step9:添加 TOTAL TIME 時間填充(geom_ribbon)、點、標簽
  geom_ribbon(aes(xmin = 20000, xmax = 20000 + time_overall * 10, y = n, group = year_group),
              fill = "#FCDF33", alpha = 0.6, orientation = "y", position = "identity") +
  geom_point(data = subset(tdf_table, annot), 
             aes(x = 20000 + time_overall * 10, y = n), size = 0.5) +
  geom_label(data = subset(tdf_table, annot), 
             aes(x = 20000 + time_overall * 10 + 100, y = n, 
                 label = round(time_overall, 1)), 
             fill = "#F3F2EE", label.size = 0, 
             label.padding = unit(0.1, "lines"),
             hjust = 0, family = "JBM", size = 2.5) +
  # step10:annotate 函數添加豎直線
  annotate("segment", 
           x = c(-2000, 0, 5000, 10000, 16000, 20000, 24000, 26000),
           xend = c(-2000, 0, 5000, 10000, 16000, 20000, 24000, 26000),
           y = -4, yend = 115, size = 0.3) +
  # step11:annotate 函數添加三條橫線
  annotate("segment",
           x = -2000, xend = 26000, 
           y = c(-4, -1, 115), yend = c(-4, -1, 115), size = 0.3) +
  # step12:annotate 添加表頭
  annotate("text", 
           x = c(-1000, 2500, 7500, 13000, 18000, 22000, 25000), 
           y = -2.5, 
           label = toupper(c("year", "distance", "winner", "team", "average speed", "total time", "year")), 
           hjust = 0.5, family = "ARLB", size = 3.5) +
  # step13:annotate 函數加上空白
  annotate("rect",
           xmin = -2000, ymin = c(13, 38), 
           xmax = 26000, ymax = c(16, 41), 
           fill = "#F3F2EE", colour = "black", size = 0.3) +
  # step14:annotate 函數參數 richtext 添加中間小表頭
  annotate("richtext", x = 13000, y = c(14.5, 39.5), 
           label = c("**1915-1918** Tour suspended because of Word War I",
                     "**1940-1946** Tour suspended because of Word War II"), 
           label.color = NA, fill = "#F3F2EE", hjust = 0.5, 
           family = "ARL", size = 3.5) +
  # step15:annotate 函數參數 text 給 DISTANCE 欄加上單位刻度
  annotate("text", x = c(100, 4900), y = 0, 
           label = c("0", "8000 km"), hjust = c(0, 1), 
           family = "ARL", size = 3) +
  # step16:annotate 函數參數 text 給其他的添加刻度尺和注釋
  annotate("text", x = c(16100, 19900), y = 0, 
           label = c("0", "60 km/h"), hjust = c(0, 1), 
           family = "ARL", size = 3) +
  annotate("text", x = c(20100, 23900), y = 0, 
           label = c("0", "300 h"), hjust = c(0, 1), 
           family = "ARL", size = 3) +
  annotate("text", x = 26000, y = -6, 
           label = "Source: alastairrushworth/tdf & kaggle.com/jaminliu | Graphic: Georgios Karamanis", 
           hjust = 1, family = "ARL", size = 3) +
  # step17:coord_cartesian 函數取消畫板限制范圍
  coord_cartesian(clip = 'off') +
  # step18:scale_x_continuous 函數通過 limits 和 expand 函數控制貼 y 軸距離
  scale_x_continuous(limits = c(-2300, 26300), expand = expansion(add = 1)) +
  # step19:scale_y_reverse 函數翻轉 y 軸左邊起始順序,上下顛倒,并通過 expand = expansion(add = 0) 控制 y 軸頂端和低端間隙為 0
  scale_y_reverse(expand = expansion(add = 0)) +
  # step20:labs 加標題以及 theme_void 去除主題線條背景以及坐標軸
  labs(
    title = "Tour de France Winners") +
  theme_void(base_family = "JBM") +
  # step21:設置灰色背景,畫板大小,以及標題大小
  theme(
    plot.background = element_rect(fill = "#F3F2EE", colour = NA),
    plot.margin = margin(20, 20, 20, 20),
    plot.title = element_text(hjust = 0.01, size = 28, 
                              family = "JBM", margin = margin(0, 0, -8, 0))
  )


# ggsave(here::here('dat','test',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")), 
#      dpi = 320, width = 11, height = 17)

# # step22:用here方法保存圖片
ggsave(here::here('.',paste0("tour-de-france ", format(Sys.time(), "%Y%m%d"), ".png")), 
       dpi = 320, width = 11, height = 17)

- 成品

拿數據重新繪圖

library(tidyverse)
library(lubridate)
library(countrycode)
# remotes::install_github("wilkelab/ggtext")
library(ggtext)
library(glue)
library(here)
#library(skimr)
library(dplyr)
library(RColorBrewer)
library(scales)

# 設置漸變顏色
Blues <- brewer.pal(9, "Blues")[4:7]
pal <- colorRampPalette(Blues)
new_Blues <- pal(10)
show_col(new_Blues)

# 注冊字體
windowsFonts(HEL=windowsFont("Helvetica CE 55 Roman"),
             RMN=windowsFont("Times New Roman"),
             ARL=windowsFont("Arial"),
             ARLB=windowsFont("Arial Bold"),
             JBM=windowsFont("JetBrains Mono"),
             MY=windowsFont("Microsoft YaHei"),
             IPML=windowsFont("IBM Plex Mono Light"),
             IPS=windowsFont("IBM Plex Sans"),
             IPSB=windowsFont("IBM Plex Sans Bold"),
             JBMB=windowsFont("JetBrains Mono Bold"))

pokemons <- readr::read_csv('C:/Users/chenx/Desktop/pokemons.csv')

pokemons_dat <- pokemons %>% 
  filter((100 <= id & id <= 126 )| (131 <= id & id <= 156 ) |  (161 <= id & id <= 200 )) %>% 
  select(1:5,'book_color','HP':'total_value') %>% 
  mutate(
    id_group = case_when(
      id < 127 ~ 1,
      id > 130 & id < 127 ~ 2,
      TRUE ~ 3),
    # 實際上不轉換也行
    color = case_when(
    book_color == '綠色' ~ 'green',
    book_color == '灰色' ~ 'grey',
    book_color == '褐色' ~ 'brown',
    book_color == '藍色' ~ 'blue',
    book_color == '粉紅色' ~ 'pink',
    book_color == '紅色' ~ 'red',
    book_color == '黃色' ~ 'yellow',
    book_color == '紫色' ~ 'purple',
    book_color == '白色' ~ 'white',
    book_color == '黑色' ~ 'black'),
    name_type = glue('{Chinese_name}/{Japanese_name}({poketype})')
  ) %>% 
  group_by(id_group) %>% 
  mutate(
    n_annot = row_number(),
    # 設置可顯示出的數據點
    annot = ifelse((n_annot - 2) %% 3 == 0, TRUE, FALSE)
  ) %>% 
  ungroup() %>% 
  add_row(id = c(127, 128, 129, 130, 157, 158, 159, 160)) %>% 
  arrange(id) %>% 
  mutate(n = row_number())
  
ggplot(pokemons_dat) +
  geom_segment(data = subset(pokemons_dat, !is.na(color)),
             aes(x = 0, xend = 24000, y = n, yend = n), 
             linetype = "dotted", size = 0.2) +
  geom_richtext(aes(x = -1000, y = n, label = id), 
                fill = "red", label.color = NA, 
                label.padding = unit(0.1, "lines"), 
                family = "JBM", size = 2.5) +
  geom_richtext(aes(x = 25000, y = n, label = id, fill = color),
                label.color = NA, 
                label.padding = unit(0.1, "lines"),
                family = "JBM", size = 2.5) + 
  scale_fill_manual(values =new_Blues) +
  guides(fill=FALSE) +
  geom_area(aes(x = total_value * 7, y = n, group = id_group), 
            fill = "#7DDDB6", alpha = 0.6, 
            orientation = "y", position = "identity") +
  geom_point(data = subset(pokemons_dat, annot), 
             aes(x = total_value * 7, y = n), size = 0.5) +
  geom_label(data = subset(pokemons_dat, annot), 
             aes(x = total_value * 7 + 100, y = n, label = total_value), 
             fill = "#F3F2EE", label.size = 0, 
             label.padding = unit(0.1, "lines"), 
             hjust = 0, family = "JBM", size = 2.5) +
  # 很無奈,中文和日文打帶邊框標簽都會留出右端空格,所以只能把英文名單拎出來加邊框
  geom_label(aes(x = 6000, y = n, label = name_type,.na = NULL), 
                fill = "#F3F2EE", label.size = 0, 
                label.padding = unit(0.1, "lines"), 
                hjust = 0, family = "ARL", size = 2.5) +
  geom_richtext(aes(x = 13000, y = n, label = English_name, .na = NULL), 
             fill = "#F3F2EE", label.size = 0, label.padding = unit(0.1, "lines"),
             hjust = 0, family = "JBM", size = 2.5) +
  geom_segment(aes(x = 16000, xend = 16000 + Attack * 21, y = n, yend = n), 
               size = 2, colour = "#7DDDB6", alpha = 0.6) +
  geom_label(data = subset(pokemons_dat, annot), 
             aes(x = 16000 + Attack * 21 + 100, y = n, 
                 label = round(Attack, 1)), fill = "#F3F2EE", 
             label.size = 0, label.padding = unit(0.1, "lines"), 
             hjust = 0, family = "JBM", size = 2.5) +
  geom_ribbon(aes(xmin = 21000, xmax = 21000 + Speed * 15, y = n, group = id_group),
              fill = "#FCDF33", alpha = 0.6, orientation = "y", position = "identity") +
  geom_point(data = subset(pokemons_dat, annot), 
             aes(x = 21000 + Speed * 15, y = n), size = 0.5) +
  geom_label(data = subset(pokemons_dat, annot), 
             aes(x = 21000 + Speed * 15 + 100, y = n, 
                 label = round(Speed, 1)), 
             fill = "#F3F2EE", label.size = 0, 
             label.padding = unit(0.1, "lines"),
             hjust = 0, family = "JBM", size = 2.5) +
  annotate("segment", 
           x = c(-2000, 0, 5700, 12700, 16000, 21000, 24000, 26000),
           xend = c(-2000, 0, 5700, 12700, 16000, 21000, 24000, 26000),
           y = -4, yend = 103, size = 0.3) +
  annotate("segment",
           x = -2000, xend = 26000, 
           y = c(-4, -1, 103), yend = c(-4, -1, 103), size = 0.3) +
  annotate("text", 
           x = c(-1000, 2750, 9200, 14350, 18500, 22500, 25000), 
           y = -2.5, 
           label = toupper(c("ID", "ABILITY SCORES", "NAME/TYPE", "ENAME", "ATTACK", "SPEED", "ID")), 
           hjust = 0.5, family = "ARLB", size = 3.5) +
  annotate("rect",
           xmin = -2000, ymin = c(27.5, 57.5), 
           xmax = 26000, ymax = c(31.5, 61.5), 
           fill = "#F3F2EE", colour = "black", size = 0.3) +
  annotate("richtext", x = 13000, y = c(29.5, 59.5), 
           label = c("**127-130**  Which Have Been Deleted :) I",
                     "**157-160**  Which Have Been Deleted :) II"), 
           label.color = NA, fill = "#F3F2EE", hjust = 0.5, 
           family = "JBM", size = 5) +
  annotate("text", x = c(100, 5600), y = 0, 
           label = c("0", "800"), hjust = c(0, 1), 
           family = "JBM", size = 3) +
  annotate("text", x = c(16100, 20900), y = 0, 
           label = c("0", "250"), hjust = c(0, 1), 
           family = "JBM", size = 3) +
  annotate("text", x = c(21100, 23900), y = 0, 
           label = c("0", "200"), hjust = c(0, 1), 
           family = "JBM", size = 3) +
  annotate("text", x = 26000, y = -6, 
           label = "Source: wiki.52poke.com | Graphic: Xi Chen", 
           hjust = 1, family = "JBM", size = 3) +
  coord_cartesian(clip = 'off') +
  scale_x_continuous(limits = c(-2300, 26300), expand = expansion(add = 1)) +
  scale_y_reverse(expand = expansion(add = 0)) +
  labs(
    title = "Pokemons"
  ) +
  theme_void(base_family = "JBM") +
  theme(
    plot.background = element_rect(fill = "#F3F2EE", colour = NA),
    plot.margin = margin(20, 20, 20, 20),
    plot.title = element_text(hjust = 0.01, size = 28, 
                              family = "JBMB", margin = margin(0, 0, -8, 0))
  )

ggsave(here::here('.',paste0("Pokemons ", format(Sys.time(), "%Y%m%d"), ".png")), 
       dpi = 640, width = 10, height = 14)

- 成品

最后編輯于
?著作權歸作者所有,轉載或內容合作請聯系作者
平臺聲明:文章內容(如有圖片或視頻亦包括在內)由作者上傳并發布,文章內容僅代表作者本人觀點,簡書系信息發布平臺,僅提供信息存儲服務。

推薦閱讀更多精彩內容