作業目的: Data Visualization Layers

這份作業希望你可以利用視覺化的文法(grammar of graphics) 呈現結果。

這次的作業使用我自己抓的 PTT 棒球版資料,有興趣的話可以點進完整的文章 誰是中職人氣王?哪隊球迷愛出征?PTT資料全解析

因為重點是練習畫圖,所以我都先畫好基本的圖,請你改成範例的樣子。

作業: Data Manipulation and Joining

### 這邊不要動
library(tidyverse)
library(lubridate)
library(ggrepel)

df_hot_agg <- read_rds("data/Lab07/df_hot_agg.rds")
# df_hot_agg <- rea_csv("data/Lab07/df_hot_agg.csv")
df_main_type_n_big <- read_rds("data/Lab07/df_main_type_n_big.rds")
# df_main_type_n_big <- read_csv("data/Lab07/df_main_type_n_big.csv")
df_main_date_agg_four <- read_rds("data/Lab07/df_main_date_agg_four.rds")
# df_main_date_agg_four <- read_csv("data/Lab07/df_main_date_agg_four.csv")
df_push_agg_join_agg <- read_rds("data/Lab07/df_push_agg_join_agg.rds")
# df_push_agg_join_agg <- read_csv("data/Lab07/df_push_agg_join_agg.csv")

value_team_pair = c("Elephants"="中信兄弟","Guardians"="富邦悍將","Lions"="統一獅","Monkeys"="樂天桃猿","Dragons"="味全龍")
value_team_color_english = c("Elephants"="#FAB419","Guardians"="#004D9A","Monkeys"="#990036","Lions"="#FF8000","Dragons"="#D71010")
value_team_pair_short = c("Elephants"="爪爪","Guardians"="邦邦","Lions"="喵喵","Monkeys"="吱吱")

1. facet + coord

### raw
df_hot_agg %>%
  ggplot(aes(x = board, y = value)) +
  geom_col() +
  facet_wrap(type ~ .)

### 熱門度統計: 文章數、推文數
df_hot_agg %>%
  filter(type %in% c("n_article", "n_comment")) %>%
  ggplot() + geom_col(aes(x = fct_rev(board), y=value,fill=board)) +
  facet_wrap(~type, scales = "free", nrow = 2) +
  coord_flip() +
  scale_fill_manual(values=value_team_color_english,labels = value_team_pair) +
  scale_x_discrete(labels = value_team_pair_short) +
  scale_y_continuous(labels = scales::comma) +
  guides(fill=F) +
  labs(title = "爪爪人氣高:中信兄弟文章數量和推文數量都居於首位", 
       subtitle = "中華職棒四隊 PTT 隊板上文章與推文數,2019/11/16 - 2020/12/31",
       caption = "*隊伍排序依照文章總數由大至小") + 
  xlab(NULL)+ylab(NULL)+
  ggthemes::theme_clean(base_family = "Noto Sans CJK TC Medium",
                        base_size = 14) +
  theme(legend.text = element_text(family = "Noto Sans CJK TC Medium"),
        legend.title = element_text(family = "Noto Sans CJK TC Medium"),
        axis.text = element_text(family = "Noto Sans CJK TC Medium", size=16),
        strip.text = element_text(family = "Noto Sans CJK TC Bold", size = 18),
        plot.title = element_text(family = "Noto Sans CJK TC Medium", size = 26),
        plot.subtitle = element_text(family = "Noto Sans CJK TC Medium", size = 22),
        plot.caption = element_text(family = "Noto Sans CJK TC Medium", size = 16)) + 
  theme(plot.margin=unit(c(8,8,8,8),"mm"))

2. alpha + position

### raw
df_main_type_n_big %>%
  ggplot(aes(x = board, y = per, alpha = type)) +
  geom_col()

### 各板的發文類型有差嗎
df_main_type_n_big %>% mutate(type = fct_relevel(as.factor(type), "[非轉播]")) %>%
  mutate(board = fct_reorder(board, n)) %>%
  ggplot(aes(x=board,y=per,fill=board,alpha=type)) + geom_col(position = "stack") +
  coord_flip() +
  scale_fill_manual(values = value_team_color_english, labels = value_team_pair) +
  scale_alpha_discrete(range = c(0.75, 1)) +
  scale_x_discrete(label = value_team_pair_short) +
  scale_y_continuous(labels = scales::percent, limits = c(0,1.05), breaks = seq(0,1,0.25), expand = c(0, 0)) +
  guides(fill=F) +
  guides(alpha=guide_legend(title="文章類型")) +
  labs(title = "桃猿的轉播文佔比相對較高,其餘三隊比例相似", 
       subtitle = "中華職棒四隊 PTT 隊板上轉播類型文章佔比,2019/11/16 - 2020/12/31",
       caption = "*隊伍排序依照文章總數由大至小") + 
  xlab(NULL)+ylab(NULL)+
  ggthemes::theme_clean(base_family = "Noto Sans CJK TC Medium",
                        base_size = 14) +
  theme(panel.grid.major = element_line(colour="white", size=0)) +
  theme(legend.text = element_text(family = "Noto Sans CJK TC Medium"),
        legend.title = element_text(family = "Noto Sans CJK TC Medium"),
        axis.text = element_text(family = "Noto Sans CJK TC Medium", size=16),
        strip.text = element_text(family = "Noto Sans CJK TC Bold", size = 18),
        plot.title = element_text(family = "Noto Sans CJK TC Medium", size = 26),
        plot.subtitle = element_text(family = "Noto Sans CJK TC Medium", size = 22),
        plot.caption = element_text(family = "Noto Sans CJK TC Medium", size = 14),
        legend.position = "bottom") + 
  theme(plot.margin=unit(c(8,8,8,8),"mm"))

3. facet + scale_x_date

### raw
df_main_date_agg_four %>%
  ggplot(aes(x = date, y = n_article)) +
  geom_line() +
  facet_wrap(board ~ .)

### 發生大事件
df_main_date_agg_four %>%
  ggplot(aes(x=date,y=n_article,group=board,color=board,label=text)) + geom_line() +
  geom_point(size = 0) +
  geom_text_repel(color = "black", size = 6,
                  nudge_x = -10,
                  nudge_y = 20,
                  family = "Noto Sans CJK TC Medium") +
  geom_point(aes(x=date,y=n_article_label), size = 1.7) +
  guides(fill=F) +
  facet_wrap(~board, scales = "free") +
  scale_color_manual(values = value_team_color_english, labels = value_team_pair) +
  scale_x_date(    date_breaks="3 months",
                   date_minor_breaks = "1 months",
                   date_labels = "%b %y",
                   limits = c(min(df_main_date_agg_four$date,na.rm = T),
                              max(df_main_date_agg_four$date,na.rm = T))) +
  scale_y_continuous(labels = scales::comma, limits=c(0,1000)) +
  labs(title = "疫情下的開幕戰與總冠軍戰是討論尖峰,但冷熱起伏有隊伍差異", 
       subtitle = "中華職棒四隊 PTT 隊板上每日文章數量,2019/11/16 - 2020/12/31",
       caption = "*隊伍排序依照文章總數由大至小") + 
  xlab(NULL)+ylab(NULL)+
  guides(color=F)+
  ggthemes::theme_clean(base_family = "Noto Sans CJK TC Medium",
  base_size = 14) +
  theme(legend.text = element_text(family = "Noto Sans CJK TC Medium"),
        legend.title = element_text(family = "Noto Sans CJK TC Medium"),
        axis.text = element_text(family = "Noto Sans CJK TC Medium", size=16),
        strip.text = element_text(family = "Noto Sans CJK TC Bold", size = 18),
        plot.title = element_text(family = "Noto Sans CJK TC Medium", size = 26),
        plot.subtitle = element_text(family = "Noto Sans CJK TC Medium", size = 22),
        plot.caption = element_text(family = "Noto Sans CJK TC Medium", size = 16)) + 
  theme(plot.margin=unit(c(8,8,8,8),"mm"))

4. fct_reorder + facet

### raw
df_push_agg_join_agg %>%
  ggplot(aes(x = name, y = n)) +
  geom_col() +
  facet_wrap(board ~ ., scales = "free") +
  coord_flip()

### 都在討論誰
df_push_agg_join_agg %>% 
  mutate(name = fct_reorder(as.factor(name), desc(rank))) %>%
  ggplot(aes(x = name, y = n, fill = board)) + geom_col() +
  facet_wrap(board ~ ., scales = "free") +
  coord_flip()  +
  guides(fill=F) +
  scale_fill_manual(values = value_team_color_english, labels = value_team_pair) +
  scale_y_continuous(labels = scales::comma) +
  labs(title = "各隊人氣王:泡麵昌、羅力、四爺、龍貓", 
       subtitle = "中華職棒四隊 PTT 上各隊球迷最常提及本隊人物前十,2019/11/16 - 2020/12/31",
       caption = "*舉例:林岳平包含大餅/餅總/餅瓜/岳平/火球餅\n*排除轉播文後利用正規表達式計算") + 
  xlab(NULL)+ylab(NULL)+
  guides(color=F)+
  ggthemes::theme_clean(base_family = "Noto Sans CJK TC Medium",
                        base_size = 14) +
  theme(legend.text = element_text(family = "Noto Sans CJK TC Medium"),
        legend.title = element_text(family = "Noto Sans CJK TC Medium"),
        axis.text = element_text(family = "Noto Sans CJK TC Medium", size=16),
        strip.text = element_text(family = "Noto Sans CJK TC Bold", size = 18),
        plot.title = element_text(family = "Noto Sans CJK TC Medium", size = 26),
        plot.subtitle = element_text(family = "Noto Sans CJK TC Medium", size = 22),
        plot.caption = element_text(family = "Noto Sans CJK TC Medium", size = 14),
        legend.position="bottom") + 
  theme(plot.margin=unit(c(8,8,8,8),"mm"))