ggplot
ggplot(segment, aes(x=log(avgItemsSold), y=avgPrice, col=avgScore)) +
geom_point(aes(size=sqrt(noSellers))) +
geom_text(aes(label=business_segment), size=3)
ggplotly
g = ggplot(segment, aes(
x=log(avgItemsSold), y=avgPrice, col=avgScore, label=business_segment)) +
geom_point(aes(size=sqrt(noSellers)))
ggplotly(g)
googleVis
# op = options(gvis.plot.tag='chart')
segment$year = 2018
gvisMotionChart(segment, "business_segment", "year") %>% plot
Merge TPC$product_category_name_english
into P
as P$category
Joining, by = "product_category_name"
Merge P$category into
I`
Joining, by = "product_id"
Summerise by category
category = filter(I, !is.na(category)) %>%
group_by(category) %>% summarise(
itemsSold = n(),
totalRev = sum(price),
avgPrice = mean(price),
noProduct = n_distinct(product_id),
noSeller = n_distinct(seller_id),
dummy = 2018
) %>% arrange(desc(totalRev))
`summarise()` ungrouping output (override with `.groups` argument)
Top-20 categories
# A tibble: 20 x 7
category itemsSold totalRev avgPrice noProduct noSeller dummy
<chr> <int> <dbl> <dbl> <int> <int> <dbl>
1 health_beauty 9670 1258681. 130. 2444 492 2018
2 watches_gifts 5991 1205006. 201. 1329 101 2018
3 bed_bath_table 11115 1036989. 93.3 3029 196 2018
4 sports_leisure 8641 988049. 114. 2867 481 2018
5 computers_accessories 7827 911954. 117. 1639 287 2018
6 furniture_decor 8334 729762. 87.6 2657 370 2018
7 cool_stuff 3796 635291. 167. 789 267 2018
8 housewares 6964 632249. 90.8 2335 468 2018
9 auto 4235 592720. 140. 1900 383 2018
10 garden_tools 4347 485256. 112. 753 237 2018
11 toys 4117 483947. 118. 1411 252 2018
12 baby 3065 411765. 134. 919 244 2018
13 perfumery 3419 399125. 117. 868 175 2018
14 telephony 4545 323668. 71.2 1134 149 2018
15 office_furniture 1691 273961. 162. 309 34 2018
16 stationery 2517 230943. 91.8 849 173 2018
17 computers 203 222963. 1098. 30 9 2018
18 pet_shop 1947 214315. 110. 719 137 2018
19 musical_instruments 680 191499. 282. 289 70 2018
20 small_appliances 679 190649. 281. 231 105 2018
靜態多軸互動
併入時間資料
X = left_join(O[, c(1,4)], R[,2:3]) %>% # pull score & timestamp into 'O'
rename(
time = order_purchase_timestamp,
score = review_score) %>%
mutate( # cut timestamp into quarter
quarter = as.Date(cut(time, "quarter"))
) %>%
right_join(I) %>% # merge score & quarter into 'I'
filter(category %in% top20$category) %>% # pick out the top20 categories
group_by(category, quarter) %>%
summarise( # summarise by category & quarter
itemsSold = n(),
totalRev = sum(price),
avgPrice = mean(price),
avgScore = mean(score),
noProduct = n_distinct(product_id),
noSeller = n_distinct(seller_id)
) %>%
arrange(category, quarter) # order by category & quarter
Joining, by = "order_id"
Joining, by = "order_id"
`summarise()` regrouping output by 'category' (override with `.groups` argument)
調整資料範圍、去除離群值
X2 = X %>% # adjustment before ploting
filter(quarter >= as.Date("2017-04-01")) %>%
filter(!(category %in% c("computers", "office_furniture"))) %>%
mutate(avgPrice = pmax(avgPrice, 3)) %>% as.data.frame
動態多軸互動