Association Rules for Market Basket Analysis (R) cited & modified from: http://www.informit.com/promotions/code-files-modeling-techniques-in-predictive-analytics-141177

library(arules)  # association rules
library(arulesViz)  # data visualization of association rules
library(RColorBrewer)  # color palettes for plots

data(Groceries)  # grocery transactions object from arules package
dim(Groceries)   # 9835 baskets x 169 items  
## [1] 9835  169
inspect(Groceries[1:5])
##     items                     
## [1] {citrus fruit,            
##      semi-finished bread,     
##      margarine,               
##      ready soups}             
## [2] {tropical fruit,          
##      yogurt,                  
##      coffee}                  
## [3] {whole milk}              
## [4] {pip fruit,               
##      yogurt,                  
##      cream cheese ,           
##      meat spreads}            
## [5] {other vegetables,        
##      whole milk,              
##      condensed milk,          
##      long life bakery product}
# as(Groceries, "data.frame") 


Examine frequency for each item with support greater than 0.025

par(cex=0.8)
itemFrequencyPlot(Groceries, support = 0.025, xlim = c(0,0.3),
  type = "relative", horiz = TRUE, col = "dark red", las = 1,
  xlab = paste0(
    "Proportion of Market Baskets Containing Item\n",
    "(Item Relative Frequency or Support)"))

Explore possibilities for combining similar items

df = itemInfo(Groceries)
str(df) # levels 10, 55
## 'data.frame':    169 obs. of  3 variables:
##  $ labels: chr  "frankfurter" "sausage" "liver loaf" "ham" ...
##  $ level2: Factor w/ 55 levels "baby food","bags",..: 44 44 44 44 44 44 44 42 42 41 ...
##  $ level1: Factor w/ 10 levels "canned food",..: 6 6 6 6 6 6 6 6 6 6 ...

Aggregate items using the 55 level2 levels for food categories

groceries <- aggregate(Groceries, itemInfo(Groceries)[["level2"]])  
# dim(groceries) # 9835 baskets x 55 items  
itemFrequencyPlot(groceries, support = 0.025, xlim = c(0,0.5),
  type = "relative", horiz = TRUE, col = "blue", las = 1,
  xlab = paste0("Proportion of Market Baskets Containing Item\n",
    "(Item Relative Frequency or Support)"))

Obtain large set of association rules for items by category and all shoppers

This is done by setting very low criteria for support and confidence:

first.rules <- apriori(groceries, 
  parameter = list(support = 0.001, confidence = 0.05))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##        0.05    0.1    1 none FALSE            TRUE       5   0.001      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 9 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[55 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [54 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 7 8 done [0.02s].
## writing ... [69921 rule(s)] done [0.01s].
## creating S4 object  ... done [0.03s].
summary(first.rules)  # yields 69,921 rules... too many
## set of 69921 rules
## 
## rule length distribution (lhs + rhs):sizes
##     1     2     3     4     5     6     7     8 
##    21  1205 10467 23895 22560  9888  1813    72 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   4.000   4.000   4.502   5.000   8.000 
## 
## summary of quality measures:
##     support           confidence          lift             count        
##  Min.   :0.001017   Min.   :0.0500   Min.   : 0.4475   Min.   :  10.00  
##  1st Qu.:0.001118   1st Qu.:0.2110   1st Qu.: 1.8315   1st Qu.:  11.00  
##  Median :0.001525   Median :0.4231   Median : 2.2573   Median :  15.00  
##  Mean   :0.002488   Mean   :0.4364   Mean   : 2.5382   Mean   :  24.47  
##  3rd Qu.:0.002339   3rd Qu.:0.6269   3rd Qu.: 2.9662   3rd Qu.:  23.00  
##  Max.   :0.443010   Max.   :1.0000   Max.   :16.1760   Max.   :4357.00  
## 
## mining info:
##       data ntransactions support confidence
##  groceries          9835   0.001       0.05

Select association rules using thresholds for support and confidence

second.rules <- apriori(groceries, 
  parameter = list(support = 0.025, confidence = 0.05))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##        0.05    0.1    1 none FALSE            TRUE       5   0.025      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 245 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[55 item(s), 9835 transaction(s)] done [0.02s].
## sorting and recoding items ... [32 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [344 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
summary(second.rules)  # yields 344 rules
## set of 344 rules
## 
## rule length distribution (lhs + rhs):sizes
##   1   2   3   4 
##  21 162 129  32 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0     2.0     2.0     2.5     3.0     4.0 
## 
## summary of quality measures:
##     support          confidence           lift            count       
##  Min.   :0.02542   Min.   :0.05043   Min.   :0.6669   Min.   : 250.0  
##  1st Qu.:0.03030   1st Qu.:0.18202   1st Qu.:1.2498   1st Qu.: 298.0  
##  Median :0.03854   Median :0.39522   Median :1.4770   Median : 379.0  
##  Mean   :0.05276   Mean   :0.37658   Mean   :1.4831   Mean   : 518.9  
##  3rd Qu.:0.05236   3rd Qu.:0.51271   3rd Qu.:1.7094   3rd Qu.: 515.0  
##  Max.   :0.44301   Max.   :0.79841   Max.   :2.4073   Max.   :4357.0  
## 
## mining info:
##       data ntransactions support confidence
##  groceries          9835   0.025       0.05

Data visualization of association rules in scatter plot

plot(second.rules, cex=0.8,
  control=list(jitter=2, col = rev(brewer.pal(9, "RdYlGn"))),
  shading = "lift")   

Grouped matrix of rules

plot(second.rules, method="grouped",    
  control=list(col = rev(brewer.pal(9, "RdYlGn"))))

Select rules with vegetables in consequent (right-hand-side) item subsets

vegie.rules <- subset(second.rules, subset = rhs %pin% "vegetables")
inspect(vegie.rules)  # 41 rules
##      lhs                            rhs             support confidence      lift count
## [1]  {}                          => {vegetables} 0.27300458  0.2730046 1.0000000  2685
## [2]  {poultry}                   => {vegetables} 0.02897814  0.5745968 2.1047148   285
## [3]  {pork}                      => {vegetables} 0.03009659  0.5220459 1.9122238   296
## [4]  {staple foods}              => {vegetables} 0.02613116  0.5160643 1.8903136   257
## [5]  {eggs}                      => {vegetables} 0.03141840  0.4951923 1.8138608   309
## [6]  {games/books/hobby}         => {vegetables} 0.02785968  0.3145809 1.1522918   274
## [7]  {long-life bakery products} => {vegetables} 0.02907982  0.3492063 1.2791227   286
## [8]  {perfumery}                 => {vegetables} 0.03213015  0.4056483 1.4858662   316
## [9]  {beef}                      => {vegetables} 0.04585663  0.5595533 2.0496116   451
## [10] {bags}                      => {vegetables} 0.03141840  0.3175745 1.1632571   309
## [11] {vinegar/oils}              => {vegetables} 0.04199288  0.4666667 1.7093731   413
## [12] {chocolate}                 => {vegetables} 0.03192679  0.2934579 1.0749195   314
## [13] {beer}                      => {vegetables} 0.03406202  0.2189542 0.8020168   335
## [14] {frozen foods}              => {vegetables} 0.04738180  0.4052174 1.4842879   466
## [15] {cheese}                    => {vegetables} 0.05531266  0.4365971 1.5992300   544
## [16] {sausage}                   => {vegetables} 0.07625826  0.4032258 1.4769929   750
## [17] {fruit}                     => {vegetables} 0.10706660  0.4297959 1.5743176  1053
## [18] {non-alc. drinks}           => {vegetables} 0.09456024  0.2974097 1.0893944   930
## [19] {bread and backed goods}    => {vegetables} 0.11621759  0.3363743 1.2321198  1143
## [20] {dairy produce}             => {vegetables} 0.17041179  0.3846683 1.4090180  1676
## [21] {beef,                                                                           
##       dairy produce}             => {vegetables} 0.02989324  0.6074380 2.2250104   294
## [22] {dairy produce,                                                                  
##       vinegar/oils}              => {vegetables} 0.03141840  0.5355286 1.9616103   309
## [23] {dairy produce,                                                                  
##       frozen foods}              => {vegetables} 0.03436706  0.5121212 1.8758704   338
## [24] {cheese,                                                                         
##       fruit}                     => {vegetables} 0.02674123  0.5197628 1.9038613   263
## [25] {bread and backed goods,                                                         
##       cheese}                    => {vegetables} 0.02887646  0.4536741 1.6617821   284
## [26] {cheese,                                                                         
##       dairy produce}             => {vegetables} 0.04219624  0.4987981 1.8270686   415
## [27] {fruit,                                                                          
##       sausage}                   => {vegetables} 0.03426538  0.5290424 1.9378517   337
## [28] {non-alc. drinks,                                                                
##       sausage}                   => {vegetables} 0.03029995  0.4156206 1.5223944   298
## [29] {bread and backed goods,                                                         
##       sausage}                   => {vegetables} 0.04382308  0.4229637 1.5492916   431
## [30] {dairy produce,                                                                  
##       sausage}                   => {vegetables} 0.05266904  0.4905303 1.7967842   518
## [31] {fruit,                                                                          
##       non-alc. drinks}           => {vegetables} 0.04361973  0.4657980 1.7061914   429
## [32] {bread and backed goods,                                                         
##       fruit}                     => {vegetables} 0.05124555  0.4763705 1.7449177   504
## [33] {dairy produce,                                                                  
##       fruit}                     => {vegetables} 0.07869853  0.5032510 1.8433793   774
## [34] {bread and backed goods,                                                         
##       non-alc. drinks}           => {vegetables} 0.04636502  0.3731588 1.3668590   456
## [35] {dairy produce,                                                                  
##       non-alc. drinks}           => {vegetables} 0.06446365  0.4243641 1.5544213   634
## [36] {bread and backed goods,                                                         
##       dairy produce}             => {vegetables} 0.08195221  0.4366197 1.5993128   806
## [37] {dairy produce,                                                                  
##       fruit,                                                                          
##       sausage}                   => {vegetables} 0.02714794  0.5741935 2.1032378   267
## [38] {bread and backed goods,                                                         
##       dairy produce,                                                                  
##       sausage}                   => {vegetables} 0.03284189  0.5135135 1.8809704   323
## [39] {dairy produce,                                                                  
##       fruit,                                                                          
##       non-alc. drinks}           => {vegetables} 0.03304525  0.5183413 1.8986543   325
## [40] {bread and backed goods,                                                         
##       dairy produce,                                                                  
##       fruit}                     => {vegetables} 0.04077275  0.5276316 1.9326840   401
## [41] {bread and backed goods,                                                         
##       dairy produce,                                                                  
##       non-alc. drinks}           => {vegetables} 0.03345196  0.4627286 1.6949480   329

Sort by lift and identify the top 10 rules

top.vegie.rules <- head(sort(vegie.rules, decreasing = TRUE, by = "lift"), 10)
inspect(top.vegie.rules) 
##      lhs                         rhs             support confidence     lift count
## [1]  {beef,                                                                       
##       dairy produce}          => {vegetables} 0.02989324  0.6074380 2.225010   294
## [2]  {poultry}                => {vegetables} 0.02897814  0.5745968 2.104715   285
## [3]  {dairy produce,                                                              
##       fruit,                                                                      
##       sausage}                => {vegetables} 0.02714794  0.5741935 2.103238   267
## [4]  {beef}                   => {vegetables} 0.04585663  0.5595533 2.049612   451
## [5]  {dairy produce,                                                              
##       vinegar/oils}           => {vegetables} 0.03141840  0.5355286 1.961610   309
## [6]  {fruit,                                                                      
##       sausage}                => {vegetables} 0.03426538  0.5290424 1.937852   337
## [7]  {bread and backed goods,                                                     
##       dairy produce,                                                              
##       fruit}                  => {vegetables} 0.04077275  0.5276316 1.932684   401
## [8]  {pork}                   => {vegetables} 0.03009659  0.5220459 1.912224   296
## [9]  {cheese,                                                                     
##       fruit}                  => {vegetables} 0.02674123  0.5197628 1.903861   263
## [10] {dairy produce,                                                              
##       fruit,                                                                      
##       non-alc. drinks}        => {vegetables} 0.03304525  0.5183413 1.898654   325
plot(top.vegie.rules, method="graph", 
  control=list(type="items", alpha=1, labelCol="blue"), 
  shading = "lift")
## Warning: Unknown control parameters: type
## Available control parameters (with default values):
## main  =  Graph for 10 rules
## nodeColors    =  c("#66CC66FF", "#9999CCFF")
## nodeCol   =  c("#EE0000FF", "#EE0303FF", "#EE0606FF", "#EE0909FF", "#EE0C0CFF", "#EE0F0FFF", "#EE1212FF", "#EE1515FF", "#EE1818FF", "#EE1B1BFF", "#EE1E1EFF", "#EE2222FF", "#EE2525FF", "#EE2828FF", "#EE2B2BFF", "#EE2E2EFF", "#EE3131FF", "#EE3434FF", "#EE3737FF", "#EE3A3AFF", "#EE3D3DFF", "#EE4040FF", "#EE4444FF", "#EE4747FF", "#EE4A4AFF", "#EE4D4DFF", "#EE5050FF", "#EE5353FF", "#EE5656FF", "#EE5959FF", "#EE5C5CFF", "#EE5F5FFF", "#EE6262FF", "#EE6666FF", "#EE6969FF", "#EE6C6CFF", "#EE6F6FFF", "#EE7272FF", "#EE7575FF",  "#EE7878FF", "#EE7B7BFF", "#EE7E7EFF", "#EE8181FF", "#EE8484FF", "#EE8888FF", "#EE8B8BFF", "#EE8E8EFF", "#EE9191FF", "#EE9494FF", "#EE9797FF", "#EE9999FF", "#EE9B9BFF", "#EE9D9DFF", "#EE9F9FFF", "#EEA0A0FF", "#EEA2A2FF", "#EEA4A4FF", "#EEA5A5FF", "#EEA7A7FF", "#EEA9A9FF", "#EEABABFF", "#EEACACFF", "#EEAEAEFF", "#EEB0B0FF", "#EEB1B1FF", "#EEB3B3FF", "#EEB5B5FF", "#EEB7B7FF", "#EEB8B8FF", "#EEBABAFF", "#EEBCBCFF", "#EEBDBDFF", "#EEBFBFFF", "#EEC1C1FF", "#EEC3C3FF", "#EEC4C4FF", "#EEC6C6FF", "#EEC8C8FF",  "#EEC9C9FF", "#EECBCBFF", "#EECDCDFF", "#EECFCFFF", "#EED0D0FF", "#EED2D2FF", "#EED4D4FF", "#EED5D5FF", "#EED7D7FF", "#EED9D9FF", "#EEDBDBFF", "#EEDCDCFF", "#EEDEDEFF", "#EEE0E0FF", "#EEE1E1FF", "#EEE3E3FF", "#EEE5E5FF", "#EEE7E7FF", "#EEE8E8FF", "#EEEAEAFF", "#EEECECFF", "#EEEEEEFF")
## edgeCol   =  c("#474747FF", "#494949FF", "#4B4B4BFF", "#4D4D4DFF", "#4F4F4FFF", "#515151FF", "#535353FF", "#555555FF", "#575757FF", "#595959FF", "#5B5B5BFF", "#5E5E5EFF", "#606060FF", "#626262FF", "#646464FF", "#666666FF", "#686868FF", "#6A6A6AFF", "#6C6C6CFF", "#6E6E6EFF", "#707070FF", "#727272FF", "#747474FF", "#767676FF", "#787878FF", "#7A7A7AFF", "#7C7C7CFF", "#7E7E7EFF", "#808080FF", "#828282FF", "#848484FF", "#868686FF", "#888888FF", "#8A8A8AFF", "#8C8C8CFF", "#8D8D8DFF", "#8F8F8FFF", "#919191FF", "#939393FF",  "#959595FF", "#979797FF", "#999999FF", "#9A9A9AFF", "#9C9C9CFF", "#9E9E9EFF", "#A0A0A0FF", "#A2A2A2FF", "#A3A3A3FF", "#A5A5A5FF", "#A7A7A7FF", "#A9A9A9FF", "#AAAAAAFF", "#ACACACFF", "#AEAEAEFF", "#AFAFAFFF", "#B1B1B1FF", "#B3B3B3FF", "#B4B4B4FF", "#B6B6B6FF", "#B7B7B7FF", "#B9B9B9FF", "#BBBBBBFF", "#BCBCBCFF", "#BEBEBEFF", "#BFBFBFFF", "#C1C1C1FF", "#C2C2C2FF", "#C3C3C4FF", "#C5C5C5FF", "#C6C6C6FF", "#C8C8C8FF", "#C9C9C9FF", "#CACACAFF", "#CCCCCCFF", "#CDCDCDFF", "#CECECEFF", "#CFCFCFFF", "#D1D1D1FF",  "#D2D2D2FF", "#D3D3D3FF", "#D4D4D4FF", "#D5D5D5FF", "#D6D6D6FF", "#D7D7D7FF", "#D8D8D8FF", "#D9D9D9FF", "#DADADAFF", "#DBDBDBFF", "#DCDCDCFF", "#DDDDDDFF", "#DEDEDEFF", "#DEDEDEFF", "#DFDFDFFF", "#E0E0E0FF", "#E0E0E0FF", "#E1E1E1FF", "#E1E1E1FF", "#E2E2E2FF", "#E2E2E2FF", "#E2E2E2FF")
## alpha     =  0.5
## cex   =  1
## itemLabels    =  TRUE
## labelCol  =  #000000B3
## measureLabels     =  FALSE
## precision     =  3
## layout    =  NULL
## layoutParams  =  list()
## arrowSize     =  0.5
## engine    =  igraph
## plot  =  TRUE
## plot_options  =  list()
## max   =  100
## verbose   =  FALSE



Practice:

Suppose your client is someone other than the local farmer,