載入套件與資料
FM = count(A, from, sort=T)$from
TO = count(A, to, sort=T)$to
top11 = union(FM[1:10], TO[1:11])
df = filter(A, from%in%top11, to%in%top11) %>% mutate(
from = factor(from, rev(TO[1:11])),
to = factor(to, TO[1:11])
) %>%
group_by(from, to, .drop=T) %>% summarise(
t3 = mean(t3, na.rm=T),
r.delay = 100*mean(t.delay > 0, na.rm=T),
.groups='drop')
平均物流時間與送貨延遲比率
cols = c('seagreen', 'green', 'white',
'gold','orange', 'red', 'red', 'red', 'red')
p1 = ggplot(df, aes(to, from, fill=t3)) +
geom_tile() + scale_fill_gradientn(colors=cols[1:6])
p2 = ggplot(df, aes(to, from, fill=r.delay)) + geom_tile() +
ggtitle("平均物流時間(days) 與 送貨延遲比率(%)") +
scale_fill_gradientn(colors=cols)
lapply(list(p1,p2),ggplotly) %>% lapply(hide_colorbar) %>% subplot(margin=0.03)
🌻 sf
這個圖資套件可以很方便的讀進地圖,並將圖資與其他資料欄位整合,一起放在同一個資料框裡面。
# pacman::p_load(sf,tmap,readxl)
# B = read_excel("data/BR.xlsx")
# B$GDP = B$GDP %>%
# str_remove(",") %>% str_extract("^[0-9\\.]+") %>% as.numeric
# st_geometry(B) = st_geometry(
# st_read("data/BRUFE250GC_SIR.shp",crs=4674)[B$sfid,])
Loading required package: brazilmaps
pacman::p_load(sf,tmap,readxl,brazilmaps)
B = read_excel("data/BR.xlsx") %>%
arrange(sfid) %>% mutate(
GDP = str_remove(GDP,",") %>% str_extract("^[0-9\\.]+") %>% as.numeric)
st_geometry(B) = st_geometry(get_brmap("State"))
save(B, file="data/B.rdata")
🌻 各州中心點與距離矩陣
🌻 直接用plot()
指令,就可以把資料欄位畫在地圖上面
🌻 濾掉極端值(DF
),讓色階可以呈現較多資訊
🌻 用ggplot
和plotly
可以畫得更好,但是所有的欄位都要用同一個色階
filter(B, stCode != "DF") %>%
transmute(Population=Population/1000, GDPpCap, stCode) %>%
gather("key","value", 1:2) %>%
ggplot(aes(label=stCode,fill=value)) + geom_sf() +
scale_fill_gradientn(colors=cols[1:6]) +
theme(axis.text=element_blank(),axis.ticks=element_blank()) +
facet_wrap(~key, nrow=1) -> g
ggplotly(g)
🌻 用tmap
可以結合互動地圖,請參考tmap: get started!
將供應鏈相關資料匯整到stCode
:df1
,df2
df1 = A %>% group_by(to) %>% summarise(
t_freight = sum(freight_value, na.rm=T),
a_freight = mean(freight_value, na.rm=T),
t_price = sum(price, na.rm=T),
a_price = mean(price, na.rm=T),
r_freight = round(100*t_freight/(t_freight+t_price),2)
) %>% rename(stCode=to); df1
# A tibble: 27 x 6
stCode t_freight a_freight t_price a_price r_freight
* <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AC 3687. 40.1 15983. 174. 18.7
2 AL 15915. 35.8 80315. 181. 16.5
3 AM 5479. 33.2 22357. 135. 19.7
4 AP 2788. 34.0 13474. 164. 17.2
5 BA 100157. 26.4 511350. 135. 16.4
6 CE 48352. 32.7 227255. 154. 17.5
7 DF 50626. 21.0 302604. 126. 14.3
8 ES 49765. 22.1 275037. 122. 15.3
9 GO 53115. 22.8 294592. 126. 15.3
10 MA 31524. 38.3 119648. 145. 20.8
# ... with 17 more rows
df2 = A %>% mutate(r.delay = t.delay > 0) %>% group_by(to) %>%
summarise_at(vars(t3:r.delay), mean, na.rm=T) %>%
mutate_at(vars(t3:t.limit), round, 1) %>%
mutate(r.delay = round(100*r.delay,2)) %>%
rename(stCode=to); df2
# A tibble: 27 x 7
stCode t3 t.total t.estimate t.delay t.limit r.delay
* <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 AC 17.3 20.7 41.1 -20.3 6.9 3.3
2 AL 20.8 24.5 32.5 -8.1 6.8 24.1
3 AM 23.5 26.4 45.6 -19.2 6.7 4.29
4 AP 24.6 28.2 45.8 -17.8 7.3 4.94
5 BA 15.9 19.2 29.5 -10.3 6.7 13.7
6 CE 17.6 21 31.3 -10.4 6.6 15.3
7 DF 9.7 13 24.5 -11.5 7 7.43
8 ES 12.2 15.6 25.6 -9.9 6.7 12.2
9 GO 12.3 15.4 27 -11.6 6.7 7.86
10 MA 17.8 21.7 30.9 -9.2 7.3 20.4
# ... with 17 more rows
將資料併入圖資資料框:B2
Joining, by = "stCode"
Joining, by = "stCode"
🌻 在地圖上比較供應鏈相關欄位
🌻 使用ggplot
和plotly
加強互動性和可比較性
df = filter(B2, stCode!="DF") %>% select(
stCode, `1.人口`=Population, `2.人均所得`=GDPpCap, `3.總銷售金額`=t_price,
`4.平均預計到貨天數`=t.estimate, `5.平均實際到貨天數`=t.total, `6.平均物流天數`=t3,
`7.平均延遲天數`=t.delay, `8.物流費用比例(%)`=r_freight,
`9.延遲比例(%)`=r.delay # 中文欄位名稱,方便閱讀
) %>%
gather('key','value',`1.人口`:`9.延遲比例(%)`) %>%
group_by(key) %>% mutate(
`實際值` = paste0(stCode,"(",value,")"), # 用各州的實際值做標題
`標準化` = scale(value) %>% round(2) # 使用統一的標準化色階
) %>% ungroup
ggplot(df, aes(label=`實際值`,fill=`標準化`)) + geom_sf() +
scale_fill_gradientn(colors=cols[1:6]) +
facet_wrap(~key, ncol=3) +
theme(axis.text=element_blank(),axis.ticks=element_blank()) -> g
ggplotly(g)
資料分析的結果並不能直接變成策略,要做出實用的商業企劃,通常需要加上一些「假設」和「模擬」: