Association Rules for Market Basket Analysis (R) cited & modified from: http://www.informit.com/promotions/code-files-modeling-techniques-in-predictive-analytics-141177
library(arules) # association rules
library(arulesViz) # data visualization of association rules
library(RColorBrewer) # color palettes for plots
data(Groceries) # grocery transactions object from arules package
dim(Groceries) # 9835 baskets x 169 items
## [1] 9835 169
inspect(Groceries[1:5])
## items
## [1] {citrus fruit,
## semi-finished bread,
## margarine,
## ready soups}
## [2] {tropical fruit,
## yogurt,
## coffee}
## [3] {whole milk}
## [4] {pip fruit,
## yogurt,
## cream cheese ,
## meat spreads}
## [5] {other vegetables,
## whole milk,
## condensed milk,
## long life bakery product}
# as(Groceries, "data.frame")
par(cex=0.8)
itemFrequencyPlot(Groceries, support = 0.025, xlim = c(0,0.3),
type = "relative", horiz = TRUE, col = "dark red", las = 1,
xlab = paste0(
"Proportion of Market Baskets Containing Item\n",
"(Item Relative Frequency or Support)"))
df = itemInfo(Groceries)
str(df) # levels 10, 55
## 'data.frame': 169 obs. of 3 variables:
## $ labels: chr "frankfurter" "sausage" "liver loaf" "ham" ...
## $ level2: Factor w/ 55 levels "baby food","bags",..: 44 44 44 44 44 44 44 42 42 41 ...
## $ level1: Factor w/ 10 levels "canned food",..: 6 6 6 6 6 6 6 6 6 6 ...
groceries <- aggregate(Groceries, itemInfo(Groceries)[["level2"]])
# dim(groceries) # 9835 baskets x 55 items
itemFrequencyPlot(groceries, support = 0.025, xlim = c(0,0.5),
type = "relative", horiz = TRUE, col = "blue", las = 1,
xlab = paste0("Proportion of Market Baskets Containing Item\n",
"(Item Relative Frequency or Support)"))
This is done by setting very low criteria for support and confidence:
first.rules <- apriori(groceries,
parameter = list(support = 0.001, confidence = 0.05))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.05 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 9
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[55 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [54 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 7 8 done [0.02s].
## writing ... [69921 rule(s)] done [0.01s].
## creating S4 object ... done [0.03s].
summary(first.rules) # yields 69,921 rules... too many
## set of 69921 rules
##
## rule length distribution (lhs + rhs):sizes
## 1 2 3 4 5 6 7 8
## 21 1205 10467 23895 22560 9888 1813 72
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 4.000 4.000 4.502 5.000 8.000
##
## summary of quality measures:
## support confidence lift count
## Min. :0.001017 Min. :0.0500 Min. : 0.4475 Min. : 10.00
## 1st Qu.:0.001118 1st Qu.:0.2110 1st Qu.: 1.8315 1st Qu.: 11.00
## Median :0.001525 Median :0.4231 Median : 2.2573 Median : 15.00
## Mean :0.002488 Mean :0.4364 Mean : 2.5382 Mean : 24.47
## 3rd Qu.:0.002339 3rd Qu.:0.6269 3rd Qu.: 2.9662 3rd Qu.: 23.00
## Max. :0.443010 Max. :1.0000 Max. :16.1760 Max. :4357.00
##
## mining info:
## data ntransactions support confidence
## groceries 9835 0.001 0.05
second.rules <- apriori(groceries,
parameter = list(support = 0.025, confidence = 0.05))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.05 0.1 1 none FALSE TRUE 5 0.025 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 245
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[55 item(s), 9835 transaction(s)] done [0.02s].
## sorting and recoding items ... [32 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [344 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(second.rules) # yields 344 rules
## set of 344 rules
##
## rule length distribution (lhs + rhs):sizes
## 1 2 3 4
## 21 162 129 32
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 2.0 2.0 2.5 3.0 4.0
##
## summary of quality measures:
## support confidence lift count
## Min. :0.02542 Min. :0.05043 Min. :0.6669 Min. : 250.0
## 1st Qu.:0.03030 1st Qu.:0.18202 1st Qu.:1.2498 1st Qu.: 298.0
## Median :0.03854 Median :0.39522 Median :1.4770 Median : 379.0
## Mean :0.05276 Mean :0.37658 Mean :1.4831 Mean : 518.9
## 3rd Qu.:0.05236 3rd Qu.:0.51271 3rd Qu.:1.7094 3rd Qu.: 515.0
## Max. :0.44301 Max. :0.79841 Max. :2.4073 Max. :4357.0
##
## mining info:
## data ntransactions support confidence
## groceries 9835 0.025 0.05
plot(second.rules, cex=0.8,
control=list(jitter=2, col = rev(brewer.pal(9, "RdYlGn"))),
shading = "lift")
plot(second.rules, method="grouped",
control=list(col = rev(brewer.pal(9, "RdYlGn"))))
vegie.rules <- subset(second.rules, subset = rhs %pin% "vegetables")
inspect(vegie.rules) # 41 rules
## lhs rhs support confidence lift count
## [1] {} => {vegetables} 0.27300458 0.2730046 1.0000000 2685
## [2] {poultry} => {vegetables} 0.02897814 0.5745968 2.1047148 285
## [3] {pork} => {vegetables} 0.03009659 0.5220459 1.9122238 296
## [4] {staple foods} => {vegetables} 0.02613116 0.5160643 1.8903136 257
## [5] {eggs} => {vegetables} 0.03141840 0.4951923 1.8138608 309
## [6] {games/books/hobby} => {vegetables} 0.02785968 0.3145809 1.1522918 274
## [7] {long-life bakery products} => {vegetables} 0.02907982 0.3492063 1.2791227 286
## [8] {perfumery} => {vegetables} 0.03213015 0.4056483 1.4858662 316
## [9] {beef} => {vegetables} 0.04585663 0.5595533 2.0496116 451
## [10] {bags} => {vegetables} 0.03141840 0.3175745 1.1632571 309
## [11] {vinegar/oils} => {vegetables} 0.04199288 0.4666667 1.7093731 413
## [12] {chocolate} => {vegetables} 0.03192679 0.2934579 1.0749195 314
## [13] {beer} => {vegetables} 0.03406202 0.2189542 0.8020168 335
## [14] {frozen foods} => {vegetables} 0.04738180 0.4052174 1.4842879 466
## [15] {cheese} => {vegetables} 0.05531266 0.4365971 1.5992300 544
## [16] {sausage} => {vegetables} 0.07625826 0.4032258 1.4769929 750
## [17] {fruit} => {vegetables} 0.10706660 0.4297959 1.5743176 1053
## [18] {non-alc. drinks} => {vegetables} 0.09456024 0.2974097 1.0893944 930
## [19] {bread and backed goods} => {vegetables} 0.11621759 0.3363743 1.2321198 1143
## [20] {dairy produce} => {vegetables} 0.17041179 0.3846683 1.4090180 1676
## [21] {beef,
## dairy produce} => {vegetables} 0.02989324 0.6074380 2.2250104 294
## [22] {dairy produce,
## vinegar/oils} => {vegetables} 0.03141840 0.5355286 1.9616103 309
## [23] {dairy produce,
## frozen foods} => {vegetables} 0.03436706 0.5121212 1.8758704 338
## [24] {cheese,
## fruit} => {vegetables} 0.02674123 0.5197628 1.9038613 263
## [25] {bread and backed goods,
## cheese} => {vegetables} 0.02887646 0.4536741 1.6617821 284
## [26] {cheese,
## dairy produce} => {vegetables} 0.04219624 0.4987981 1.8270686 415
## [27] {fruit,
## sausage} => {vegetables} 0.03426538 0.5290424 1.9378517 337
## [28] {non-alc. drinks,
## sausage} => {vegetables} 0.03029995 0.4156206 1.5223944 298
## [29] {bread and backed goods,
## sausage} => {vegetables} 0.04382308 0.4229637 1.5492916 431
## [30] {dairy produce,
## sausage} => {vegetables} 0.05266904 0.4905303 1.7967842 518
## [31] {fruit,
## non-alc. drinks} => {vegetables} 0.04361973 0.4657980 1.7061914 429
## [32] {bread and backed goods,
## fruit} => {vegetables} 0.05124555 0.4763705 1.7449177 504
## [33] {dairy produce,
## fruit} => {vegetables} 0.07869853 0.5032510 1.8433793 774
## [34] {bread and backed goods,
## non-alc. drinks} => {vegetables} 0.04636502 0.3731588 1.3668590 456
## [35] {dairy produce,
## non-alc. drinks} => {vegetables} 0.06446365 0.4243641 1.5544213 634
## [36] {bread and backed goods,
## dairy produce} => {vegetables} 0.08195221 0.4366197 1.5993128 806
## [37] {dairy produce,
## fruit,
## sausage} => {vegetables} 0.02714794 0.5741935 2.1032378 267
## [38] {bread and backed goods,
## dairy produce,
## sausage} => {vegetables} 0.03284189 0.5135135 1.8809704 323
## [39] {dairy produce,
## fruit,
## non-alc. drinks} => {vegetables} 0.03304525 0.5183413 1.8986543 325
## [40] {bread and backed goods,
## dairy produce,
## fruit} => {vegetables} 0.04077275 0.5276316 1.9326840 401
## [41] {bread and backed goods,
## dairy produce,
## non-alc. drinks} => {vegetables} 0.03345196 0.4627286 1.6949480 329
top.vegie.rules <- head(sort(vegie.rules, decreasing = TRUE, by = "lift"), 10)
inspect(top.vegie.rules)
## lhs rhs support confidence lift count
## [1] {beef,
## dairy produce} => {vegetables} 0.02989324 0.6074380 2.225010 294
## [2] {poultry} => {vegetables} 0.02897814 0.5745968 2.104715 285
## [3] {dairy produce,
## fruit,
## sausage} => {vegetables} 0.02714794 0.5741935 2.103238 267
## [4] {beef} => {vegetables} 0.04585663 0.5595533 2.049612 451
## [5] {dairy produce,
## vinegar/oils} => {vegetables} 0.03141840 0.5355286 1.961610 309
## [6] {fruit,
## sausage} => {vegetables} 0.03426538 0.5290424 1.937852 337
## [7] {bread and backed goods,
## dairy produce,
## fruit} => {vegetables} 0.04077275 0.5276316 1.932684 401
## [8] {pork} => {vegetables} 0.03009659 0.5220459 1.912224 296
## [9] {cheese,
## fruit} => {vegetables} 0.02674123 0.5197628 1.903861 263
## [10] {dairy produce,
## fruit,
## non-alc. drinks} => {vegetables} 0.03304525 0.5183413 1.898654 325
plot(top.vegie.rules, method="graph",
control=list(type="items", alpha=1, labelCol="blue"),
shading = "lift")
## Warning: Unknown control parameters: type
## Available control parameters (with default values):
## main = Graph for 10 rules
## nodeColors = c("#66CC66FF", "#9999CCFF")
## nodeCol = c("#EE0000FF", "#EE0303FF", "#EE0606FF", "#EE0909FF", "#EE0C0CFF", "#EE0F0FFF", "#EE1212FF", "#EE1515FF", "#EE1818FF", "#EE1B1BFF", "#EE1E1EFF", "#EE2222FF", "#EE2525FF", "#EE2828FF", "#EE2B2BFF", "#EE2E2EFF", "#EE3131FF", "#EE3434FF", "#EE3737FF", "#EE3A3AFF", "#EE3D3DFF", "#EE4040FF", "#EE4444FF", "#EE4747FF", "#EE4A4AFF", "#EE4D4DFF", "#EE5050FF", "#EE5353FF", "#EE5656FF", "#EE5959FF", "#EE5C5CFF", "#EE5F5FFF", "#EE6262FF", "#EE6666FF", "#EE6969FF", "#EE6C6CFF", "#EE6F6FFF", "#EE7272FF", "#EE7575FF", "#EE7878FF", "#EE7B7BFF", "#EE7E7EFF", "#EE8181FF", "#EE8484FF", "#EE8888FF", "#EE8B8BFF", "#EE8E8EFF", "#EE9191FF", "#EE9494FF", "#EE9797FF", "#EE9999FF", "#EE9B9BFF", "#EE9D9DFF", "#EE9F9FFF", "#EEA0A0FF", "#EEA2A2FF", "#EEA4A4FF", "#EEA5A5FF", "#EEA7A7FF", "#EEA9A9FF", "#EEABABFF", "#EEACACFF", "#EEAEAEFF", "#EEB0B0FF", "#EEB1B1FF", "#EEB3B3FF", "#EEB5B5FF", "#EEB7B7FF", "#EEB8B8FF", "#EEBABAFF", "#EEBCBCFF", "#EEBDBDFF", "#EEBFBFFF", "#EEC1C1FF", "#EEC3C3FF", "#EEC4C4FF", "#EEC6C6FF", "#EEC8C8FF", "#EEC9C9FF", "#EECBCBFF", "#EECDCDFF", "#EECFCFFF", "#EED0D0FF", "#EED2D2FF", "#EED4D4FF", "#EED5D5FF", "#EED7D7FF", "#EED9D9FF", "#EEDBDBFF", "#EEDCDCFF", "#EEDEDEFF", "#EEE0E0FF", "#EEE1E1FF", "#EEE3E3FF", "#EEE5E5FF", "#EEE7E7FF", "#EEE8E8FF", "#EEEAEAFF", "#EEECECFF", "#EEEEEEFF")
## edgeCol = c("#474747FF", "#494949FF", "#4B4B4BFF", "#4D4D4DFF", "#4F4F4FFF", "#515151FF", "#535353FF", "#555555FF", "#575757FF", "#595959FF", "#5B5B5BFF", "#5E5E5EFF", "#606060FF", "#626262FF", "#646464FF", "#666666FF", "#686868FF", "#6A6A6AFF", "#6C6C6CFF", "#6E6E6EFF", "#707070FF", "#727272FF", "#747474FF", "#767676FF", "#787878FF", "#7A7A7AFF", "#7C7C7CFF", "#7E7E7EFF", "#808080FF", "#828282FF", "#848484FF", "#868686FF", "#888888FF", "#8A8A8AFF", "#8C8C8CFF", "#8D8D8DFF", "#8F8F8FFF", "#919191FF", "#939393FF", "#959595FF", "#979797FF", "#999999FF", "#9A9A9AFF", "#9C9C9CFF", "#9E9E9EFF", "#A0A0A0FF", "#A2A2A2FF", "#A3A3A3FF", "#A5A5A5FF", "#A7A7A7FF", "#A9A9A9FF", "#AAAAAAFF", "#ACACACFF", "#AEAEAEFF", "#AFAFAFFF", "#B1B1B1FF", "#B3B3B3FF", "#B4B4B4FF", "#B6B6B6FF", "#B7B7B7FF", "#B9B9B9FF", "#BBBBBBFF", "#BCBCBCFF", "#BEBEBEFF", "#BFBFBFFF", "#C1C1C1FF", "#C2C2C2FF", "#C3C3C4FF", "#C5C5C5FF", "#C6C6C6FF", "#C8C8C8FF", "#C9C9C9FF", "#CACACAFF", "#CCCCCCFF", "#CDCDCDFF", "#CECECEFF", "#CFCFCFFF", "#D1D1D1FF", "#D2D2D2FF", "#D3D3D3FF", "#D4D4D4FF", "#D5D5D5FF", "#D6D6D6FF", "#D7D7D7FF", "#D8D8D8FF", "#D9D9D9FF", "#DADADAFF", "#DBDBDBFF", "#DCDCDCFF", "#DDDDDDFF", "#DEDEDEFF", "#DEDEDEFF", "#DFDFDFFF", "#E0E0E0FF", "#E0E0E0FF", "#E1E1E1FF", "#E1E1E1FF", "#E2E2E2FF", "#E2E2E2FF", "#E2E2E2FF")
## alpha = 0.5
## cex = 1
## itemLabels = TRUE
## labelCol = #000000B3
## measureLabels = FALSE
## precision = 3
## layout = NULL
## layoutParams = list()
## arrowSize = 0.5
## engine = igraph
## plot = TRUE
## plot_options = list()
## max = 100
## verbose = FALSE
Suppose your client is someone other than the local farmer,