dplyr & ggplo2
)安裝、載入一些基本的套件
if(!require(dplyr)) install.packages("dplyr")
if(!require(tidyr)) install.packages("tidyr")
if(!require(ggplot2)) install.packages("ggplot2")
if(!require(plotly)) install.packages("plotly")
if(!require(babynames)) install.packages("babynames")
檢視資料
Classes 'tbl_df', 'tbl' and 'data.frame': 1924665 obs. of 5 variables:
$ year: num 1880 1880 1880 1880 1880 1880 1880 1880 1880 1880 ...
$ sex : chr "F" "F" "F" "F" ...
$ name: chr "Mary" "Anna" "Emma" "Elizabeth" ...
$ n : int 7065 2604 2003 1939 1746 1578 1472 1414 1320 1288 ...
$ prop: num 0.0724 0.0267 0.0205 0.0199 0.0179 ...
篩選出男生資料
某些男生名字的『數量』
mbaby %>%
filter(name %in% c('Steven', 'Thomas', 'Matthew')) %>%
arrange(name, year) %>% data.frame %>%
ggplot(aes(x=year,y=number,col=name)) +
geom_line()
某些男生名字的『比例』
mbaby %>%
group_by(year) %>%
mutate(year_total = sum(number)) %>%
ungroup() %>%
filter(name %in% c('Steven', 'Thomas', 'Matthew')) %>%
mutate(fraction = number / year_total) %>%
ggplot(aes(x=year,y=fraction,col=name)) +
geom_line()
簡化程式
mbaby %>%
group_by(year) %>%
mutate(fraction = number / sum(number)) %>%
filter(name %in% c('Steven', 'Thomas', 'Matthew')) %>%
ggplot(aes(x=year,y=fraction,col=name)) +
geom_line()
熱門男生名字的『比例』
mtop = mbaby %>% group_by(year) %>% top_n(1, number) %>%
pull(name) %>% unique()
mbaby %>% group_by(year) %>%
mutate(fraction = number / sum(number)) %>%
filter(name %in% mtop) %>%
ggplot(aes(x=year,y=fraction,col=name)) +
geom_line(alpha=0.5) + geom_point(size=0.5) -> g
ggplotly(g)
熱門女生名字的『比例』
fbaby = babynames %>%
rename(number = n) %>%
filter(year %in% seq(1880, 2017, 5), sex=="F")
ftop = fbaby %>% group_by(year) %>% top_n(1, number) %>%
pull(name) %>% unique()
fbaby %>% group_by(year) %>%
mutate(fraction = number / sum(number)) %>%
filter(name %in% ftop) %>%
ggplot(aes(x=year,y=fraction,col=name)) +
geom_line(alpha=0.5) + geom_point(size=0.5) -> g
ggplotly(g)
💡 學習重點:
■ 每一份資料都是一個
■ 每一行程式都是一個
■ 所謂寫
■ dplyr
§ 物件:tibble
§ 運算符號:%>%
§ 功能:
。select
: 依名稱選擇欄位
。filter
: 依條件選取紀錄
。mutate
: 運算新欄位
。summarise
: 欄位統計
。group_by
: 資料分群
。…