(4D)單元摘要: 使用台灣各級行政區的資料來練習
summary()
mean()
, mediam()
, min()
, max()
, …hist()
, density()
, boxplot()
GGally::ggpair()
資料來源: 社會經濟資訊服務平台
載入套件和資料
Sys.setlocale(category="LC_ALL", locale="cht")
pacman::p_load(dplyr,tibble,GGally,plotly,ggpubr)
rm(list=ls(all=T))
load("data/villages.rdata")
Town = filter(V,year=="108Y") %>% group_by(COUNTY, TOWN) %>%
summarise(`男性比` = sum(`性比例` * `人口數`)/sum(`人口數`), .groups='drop') %>%
left_join((Town))
Joining, by = c("COUNTY", "TOWN")
COUNTY TOWN 男性比 人口總數
Length:368 Length:368 Min. : 86.9 Min. : 685
Class :character Class :character 1st Qu.:100.0 1st Qu.: 14548
Mode :character Mode :character Median :105.5 Median : 30782
Mean :106.2 Mean : 64052
3rd Qu.:111.9 3rd Qu.: 77849
Max. :157.2 Max. :551480
老化指數 人均應稅所得 METRO REGION
Min. : 36.5 Min. : 20.1 六都:158 北: 95
1st Qu.:100.6 1st Qu.: 123.6 縣市:210 中: 88
Median :144.6 Median : 189.6 南:128
Mean :161.8 Mean : 263.1 東: 41
3rd Qu.:205.1 3rd Qu.: 291.8 離: 16
Max. :592.4 Max. :3267.1
x = Town$老化指數
par(mfrow=c(3,1),cex=0.8)
boxplot(x,horizontal=T) # 盒狀圖
hist(x) # 直方圖
plot(density(x)) # 密度函數
rug(x,col='blue') # 刻度線
北 中 南 東 離
130.70 129.12 165.53 160.64 148.02
$北
Min. 1st Qu. Median Mean 3rd Qu. Max.
43.3 83.5 130.7 146.5 183.9 496.9
$中
Min. 1st Qu. Median Mean 3rd Qu. Max.
56.0 94.5 129.1 146.3 183.3 296.4
$南
Min. 1st Qu. Median Mean 3rd Qu. Max.
36.5 120.0 165.5 186.1 241.6 592.4
$東
Min. 1st Qu. Median Mean 3rd Qu. Max.
44.7 105.8 160.6 157.7 219.0 311.1
$離
Min. 1st Qu. Median Mean 3rd Qu. Max.
56.8 113.7 148.0 153.1 185.7 336.4
# A tibble: 5 x 3
REGION old pop
* <fct> <dbl> <dbl>
1 北 131. 77444
2 中 129. 39076.
3 南 166. 26725
4 東 161. 11653
5 離 148. 9112.
g = ggplot(Town, aes(x=`老化指數`,fill=REGION))
ggarrange(
g + geom_density(alpha=0.5),
g + geom_boxplot(),
common.legend=T)
[1] -0.39509
ggplot(Town,aes(x,y)) + geom_point() +
# geom_smooth(se=F,color='blue') +
geom_smooth(se=F,color='green',method='lm')
# A tibble: 5 x 2
REGION cor
* <fct> <dbl>
1 北 -0.628
2 中 -0.669
3 南 -0.315
4 東 0.0758
5 離 0.0861
metro = c("臺北市","新北市","桃園市","臺中市","臺南市","高雄市")
df = Town %>% filter(COUNTY%in%metro) %>%
mutate_at(vars(人口總數:人均應稅所得),log10) %>%
mutate_at('COUNTY', factor, metro, substr(metro,1,2))
highlight_key(df) %>% ggpairs(
columns = c(3,5,6,1), mapping = aes(color=COUNTY,label=TOWN),
# lower = list(continuous=wrap("smooth",size=0.5,se=F)),
diag = list(continuous=wrap("densityDiag", alpha = 0.7, col='gray'))
) %>% ggplotly() %>%
highlight("plotly_selected")
df = Town %>% filter(REGION != "離") %>%
mutate_at(vars(`人口總數`:`人均應稅所得`),log10) %>%
mutate_at('REGION', factor, c('北','中','南','東','離'))
df = as.data.frame(df)
rownames(df) = paste0(df$COUNTY,df$TOWN)
highlight_key(df) %>% ggpairs(
columns = c(3,5,6,8),
mapping = aes(color=REGION),
lower = list(continuous = wrap("points", alpha = 0.85, size=1)),
diag = list(continuous=wrap("densityDiag", alpha = 0.7, col='gray'))
) %>%
ggplotly() %>% highlight("plotly_selected")
請各組利用放在data/villages.rdata
(V
,Vill
,Town
)裡面的資料:
dplyr
X1
,X2
,X3
)C
)summary
, median
, mean
)hist
, boxplot
, plot(density())
)cor()
geom_point
+ grom_smooth