載入套件與資料
匯入顧客與廠商的位置
A = left_join(P[,1:2],TPC) %>%
select(product_id, category=product_category_name_english) %>%
right_join(I) %>%
left_join(S[,c(1,4)]) %>%
left_join(O) %>%
left_join(C[,c(1,5)]) %>%
rename(from=seller_state, to=customer_state)
A = group_by(R, order_id) %>% summarise(score = review_score[1]) %>%
right_join(A)
計算各階段作業績效指標
A = A %>% mutate(
t1 = difftime(order_approved_at, order_purchase_timestamp, units="days"),
t2 = difftime(order_delivered_carrier_date, order_approved_at, units="days"),
t3 = difftime(order_delivered_customer_date, order_delivered_carrier_date, units="days"),
t.total = t1+t2+t3,
t.estimate = difftime(order_estimated_delivery_date, order_purchase_timestamp, units="days"),
t.delay = t.total - t.estimate,
t.limit = difftime(shipping_limit_date, order_purchase_timestamp, units="days")
) %>%
mutate_at(vars(t1:t.limit), as.numeric)
各品類、各階段平均作業時間比較
table(A$category) %>% sort() %>% tail(20) %>% names -> top20
A %>% filter(category %in% top20) %>%
group_by(category) %>%
summarise_at(vars(t1:t3), mean, na.rm=T) %>%
gather("stage","days",t1:t3) %>%
mutate(stage = factor(stage, c('t3','t2','t1'))) %>%
ggplot(aes(x=category, y=days, fill=stage)) +
geom_bar(stat="Identity") +
coord_flip()
各品類總作業時間分佈
par(cex=0.7, mar=c(5,12,4,2))
boxplot(t.total~category, subset(A, category %in% top20), horizontal=T, las=2,
main="Delivery Timespan (days) per Category", xlab="days")
濾掉特殊品類office_furniture
a = A %>% filter(category %in% top20, category!="office_furniture") %>%
mutate(ym = as.Date(format(order_delivered_customer_date, "%Y-%m-15")) ) %>%
filter(ym>=as.Date("2017-03-15"), ym<=as.Date("2018-08-15"))
作業績效與滿意度指標的關係
a %>% group_by(ym) %>% summarise(
estmate_timespan = mean(t.estimate, na.rm=T),
actual_timespan = mean(t.total, na.rm=T),
percetage_of_delay = 100*mean(t.delay > 0, na.rm=T),
std2_score = mean(score, na.rm=T)
) %>%
mutate_at(vars(std2_score), ~ 2*(.-mean(.))/sd(.)) %>%
gather("metrics","value",-ym) %>%
ggplot(aes(x=ym, y=value, col=metrics)) +
geom_point() + geom_line() +
ggtitle("作業績效與滿意度指標") + xlab("date (yr'mn)") +
scale_x_date(date_breaks="2 months", date_labels="%y'%m")-> p
ggplotly(p)
⏰ 滿意度(std2_score
)與送貨延遲比率(percentage_of_delay
)有明顯的負相關