這份作業希望你可以利用視覺化的文法(grammar of graphics) 呈現結果。
這次的作業使用我自己抓的 PTT 棒球版資料,有興趣的話可以點進完整的文章 誰是中職人氣王?哪隊球迷愛出征?PTT資料全解析 。
因為重點是練習畫圖,所以我都先畫好基本的圖,請你改成範例的樣子。
### 這邊不要動
library(tidyverse)
library(lubridate)
library(ggrepel)
<- read_rds("data/Lab07/df_hot_agg.rds")
df_hot_agg # df_hot_agg <- rea_csv("data/Lab07/df_hot_agg.csv")
<- read_rds("data/Lab07/df_main_type_n_big.rds")
df_main_type_n_big # df_main_type_n_big <- read_csv("data/Lab07/df_main_type_n_big.csv")
<- read_rds("data/Lab07/df_main_date_agg_four.rds")
df_main_date_agg_four # df_main_date_agg_four <- read_csv("data/Lab07/df_main_date_agg_four.csv")
<- read_rds("data/Lab07/df_push_agg_join_agg.rds")
df_push_agg_join_agg # df_push_agg_join_agg <- read_csv("data/Lab07/df_push_agg_join_agg.csv")
= c("Elephants"="中信兄弟","Guardians"="富邦悍將","Lions"="統一獅","Monkeys"="樂天桃猿","Dragons"="味全龍")
value_team_pair = c("Elephants"="#FAB419","Guardians"="#004D9A","Monkeys"="#990036","Lions"="#FF8000","Dragons"="#D71010")
value_team_color_english = c("Elephants"="爪爪","Guardians"="邦邦","Lions"="喵喵","Monkeys"="吱吱") value_team_pair_short
### raw
%>%
df_hot_agg ggplot(aes(x = board, y = value)) +
geom_col() +
facet_wrap(type ~ .)
### 熱門度統計: 文章數、推文數
%>%
df_hot_agg filter(type %in% c("n_article", "n_comment")) %>%
ggplot() + geom_col(aes(x = fct_rev(board), y=value,fill=board)) +
facet_wrap(~type, scales = "free", nrow = 2) +
coord_flip() +
scale_fill_manual(values=value_team_color_english,labels = value_team_pair) +
scale_x_discrete(labels = value_team_pair_short) +
scale_y_continuous(labels = scales::comma) +
guides(fill=F) +
labs(title = "爪爪人氣高:中信兄弟文章數量和推文數量都居於首位",
subtitle = "中華職棒四隊 PTT 隊板上文章與推文數,2019/11/16 - 2020/12/31",
caption = "*隊伍排序依照文章總數由大至小") +
xlab(NULL)+ylab(NULL)+
::theme_clean(base_family = "Noto Sans CJK TC Medium",
ggthemesbase_size = 14) +
theme(legend.text = element_text(family = "Noto Sans CJK TC Medium"),
legend.title = element_text(family = "Noto Sans CJK TC Medium"),
axis.text = element_text(family = "Noto Sans CJK TC Medium", size=16),
strip.text = element_text(family = "Noto Sans CJK TC Bold", size = 18),
plot.title = element_text(family = "Noto Sans CJK TC Medium", size = 26),
plot.subtitle = element_text(family = "Noto Sans CJK TC Medium", size = 22),
plot.caption = element_text(family = "Noto Sans CJK TC Medium", size = 16)) +
theme(plot.margin=unit(c(8,8,8,8),"mm"))
### raw
%>%
df_main_type_n_big ggplot(aes(x = board, y = per, alpha = type)) +
geom_col()
### 各板的發文類型有差嗎
%>% mutate(type = fct_relevel(as.factor(type), "[非轉播]")) %>%
df_main_type_n_big mutate(board = fct_reorder(board, n)) %>%
ggplot(aes(x=board,y=per,fill=board,alpha=type)) + geom_col(position = "stack") +
coord_flip() +
scale_fill_manual(values = value_team_color_english, labels = value_team_pair) +
scale_alpha_discrete(range = c(0.75, 1)) +
scale_x_discrete(label = value_team_pair_short) +
scale_y_continuous(labels = scales::percent, limits = c(0,1.05), breaks = seq(0,1,0.25), expand = c(0, 0)) +
guides(fill=F) +
guides(alpha=guide_legend(title="文章類型")) +
labs(title = "桃猿的轉播文佔比相對較高,其餘三隊比例相似",
subtitle = "中華職棒四隊 PTT 隊板上轉播類型文章佔比,2019/11/16 - 2020/12/31",
caption = "*隊伍排序依照文章總數由大至小") +
xlab(NULL)+ylab(NULL)+
::theme_clean(base_family = "Noto Sans CJK TC Medium",
ggthemesbase_size = 14) +
theme(panel.grid.major = element_line(colour="white", size=0)) +
theme(legend.text = element_text(family = "Noto Sans CJK TC Medium"),
legend.title = element_text(family = "Noto Sans CJK TC Medium"),
axis.text = element_text(family = "Noto Sans CJK TC Medium", size=16),
strip.text = element_text(family = "Noto Sans CJK TC Bold", size = 18),
plot.title = element_text(family = "Noto Sans CJK TC Medium", size = 26),
plot.subtitle = element_text(family = "Noto Sans CJK TC Medium", size = 22),
plot.caption = element_text(family = "Noto Sans CJK TC Medium", size = 14),
legend.position = "bottom") +
theme(plot.margin=unit(c(8,8,8,8),"mm"))
### raw
%>%
df_main_date_agg_four ggplot(aes(x = date, y = n_article)) +
geom_line() +
facet_wrap(board ~ .)
### 發生大事件
%>%
df_main_date_agg_four ggplot(aes(x=date,y=n_article,group=board,color=board,label=text)) + geom_line() +
geom_point(size = 0) +
geom_text_repel(color = "black", size = 6,
nudge_x = -10,
nudge_y = 20,
family = "Noto Sans CJK TC Medium") +
geom_point(aes(x=date,y=n_article_label), size = 1.7) +
guides(fill=F) +
facet_wrap(~board, scales = "free") +
scale_color_manual(values = value_team_color_english, labels = value_team_pair) +
scale_x_date( date_breaks="3 months",
date_minor_breaks = "1 months",
date_labels = "%b %y",
limits = c(min(df_main_date_agg_four$date,na.rm = T),
max(df_main_date_agg_four$date,na.rm = T))) +
scale_y_continuous(labels = scales::comma, limits=c(0,1000)) +
labs(title = "疫情下的開幕戰與總冠軍戰是討論尖峰,但冷熱起伏有隊伍差異",
subtitle = "中華職棒四隊 PTT 隊板上每日文章數量,2019/11/16 - 2020/12/31",
caption = "*隊伍排序依照文章總數由大至小") +
xlab(NULL)+ylab(NULL)+
guides(color=F)+
::theme_clean(base_family = "Noto Sans CJK TC Medium",
ggthemesbase_size = 14) +
theme(legend.text = element_text(family = "Noto Sans CJK TC Medium"),
legend.title = element_text(family = "Noto Sans CJK TC Medium"),
axis.text = element_text(family = "Noto Sans CJK TC Medium", size=16),
strip.text = element_text(family = "Noto Sans CJK TC Bold", size = 18),
plot.title = element_text(family = "Noto Sans CJK TC Medium", size = 26),
plot.subtitle = element_text(family = "Noto Sans CJK TC Medium", size = 22),
plot.caption = element_text(family = "Noto Sans CJK TC Medium", size = 16)) +
theme(plot.margin=unit(c(8,8,8,8),"mm"))
### raw
%>%
df_push_agg_join_agg ggplot(aes(x = name, y = n)) +
geom_col() +
facet_wrap(board ~ ., scales = "free") +
coord_flip()
### 都在討論誰
%>%
df_push_agg_join_agg mutate(name = fct_reorder(as.factor(name), desc(rank))) %>%
ggplot(aes(x = name, y = n, fill = board)) + geom_col() +
facet_wrap(board ~ ., scales = "free") +
coord_flip() +
guides(fill=F) +
scale_fill_manual(values = value_team_color_english, labels = value_team_pair) +
scale_y_continuous(labels = scales::comma) +
labs(title = "各隊人氣王:泡麵昌、羅力、四爺、龍貓",
subtitle = "中華職棒四隊 PTT 上各隊球迷最常提及本隊人物前十,2019/11/16 - 2020/12/31",
caption = "*舉例:林岳平包含大餅/餅總/餅瓜/岳平/火球餅\n*排除轉播文後利用正規表達式計算") +
xlab(NULL)+ylab(NULL)+
guides(color=F)+
::theme_clean(base_family = "Noto Sans CJK TC Medium",
ggthemesbase_size = 14) +
theme(legend.text = element_text(family = "Noto Sans CJK TC Medium"),
legend.title = element_text(family = "Noto Sans CJK TC Medium"),
axis.text = element_text(family = "Noto Sans CJK TC Medium", size=16),
strip.text = element_text(family = "Noto Sans CJK TC Bold", size = 18),
plot.title = element_text(family = "Noto Sans CJK TC Medium", size = 26),
plot.subtitle = element_text(family = "Noto Sans CJK TC Medium", size = 22),
plot.caption = element_text(family = "Noto Sans CJK TC Medium", size = 14),
legend.position="bottom") +
theme(plot.margin=unit(c(8,8,8,8),"mm"))