options(scipen=10, digits=3)
rm(list=ls(all=TRUE))
pacman::p_load(dplyr, ggplot2, plotly, arules, arulesViz)
load("data/tf0.rdata")
Z0 %>% filter(age %in% c('a39','a49'), prod %in% prod10) %>%
group_by(prod, age, date) %>% summarise(
t.qty = sum(qty),
u.price = sum(price)/t.qty
) %>%
ggplot(aes(x=u.price,y=t.qty,col=age)) +
geom_smooth(method='lm',se=F) +
facet_wrap(~prod,scales="free") + theme_bw()
## `summarise()` has grouped output by 'prod', 'age'. You can override using the `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
## cust cat prod
## 1 32256 2007 23789
mx = xtabs(~cust+prod, Z0, sparse=T)
mx@x = rep(1, length(mx@x))
mx = mx[,order(-colSums(mx))]
sum(colSums(mx) > 200)
## [1] 634
## [1] TRUE
N = 100
px = data.frame(
pid = rownames(mx)[1:N],
size = colSums(mx)[1:N],
rev = apply(mx[,1:N], 2, function(v) mean(A0$rev[v > 0])),
raw = apply(mx[,1:N], 2, function(v) mean(A0$raw[v > 0])),
margin = apply(mx[,1:N], 2, function(v) sum(A0$raw[v>0])/sum(A0$rev[v>0]) )
)
summary(px)
## pid size rev raw
## Length:100 Min. : 592 Min. :4992 Min. : 677
## Class :character 1st Qu.: 708 1st Qu.:6260 1st Qu.: 895
## Mode :character Median : 819 Median :6858 Median :1023
## Mean : 999 Mean :6903 Mean :1027
## 3rd Qu.:1026 3rd Qu.:7502 3rd Qu.:1141
## Max. :6025 Max. :9159 Max. :1486
## margin
## Min. :0.127
## 1st Qu.:0.141
## Median :0.149
## Mean :0.148
## 3rd Qu.:0.155
## Max. :0.171
ggplot(px, aes(x=rev, y=margin, text=pid)) +
geom_point(aes(size=size), alpha=0.4, col='brown') +
theme_bw() -> p
ggplotly(p)
Z0 %>% filter(prod %in% colnames(mx)[1:500]) %>%
mutate(uprice = price/qty) %>%
group_by(prod) %>% summarise(
noPrice= n_distinct(uprice),
maxPrice = max(uprice),
minPrice = min(uprice),
avgPrice = sum(price)/sum(qty),
totalQty = sum(qty),
noOrders = n()
) %>% arrange(desc(noOrders)) %>% View
par(cex=0.6)
Z0 %>% filter(prod == '4714981010038') %>%
mutate(uprice = price/qty) %>%
count(uprice) %>%
{barplot(.$n, name=round(.$uprice,2), las=2)}
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.083 0.097 0.155 0.130 0.199 0.585
## large
## loss FALSE TRUE
## FALSE 60.3 31.5
## TRUE 7.8 0.3
mutate(A0, Loss = margin < 0) %>%
ggplot(aes(x=rev, fill=Loss)) +
geom_density(alpha=0.3) +
scale_x_log10() + theme_bw()