Apriori

# Data Preprocessing

# install.packages('arules')

library(arules)

dataset = read.csv('Market_Basket_Optimisation.csv', header = FALSE)
#sparce matrix

dataset = read.transactions('Market_Basket_Optimisation.csv', sep = ',', rm.duplicates = TRUE)

distribution of transactions with duplicates:

summary(dataset)

transactions as itemMatrix in sparse format with
 7501 rows (elements/itemsets/transactions) and
 119 columns (items) and a density of 0.03288973 

most frequent items:
mineral water          eggs     spaghetti  french fries     chocolate       (Other) 
         1788          1348          1306          1282          1229         22405 

element (itemset/transaction) length distribution:
sizes
   1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16   18   19   20 
1754 1358 1044  816  667  493  391  324  259  139  102   67   40   22   17    4    1    2    1 

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  1.000   2.000   3.000   3.914   5.000  20.000 

includes extended item information - examples:
             labels
1           almonds
2 antioxydant juice
3         asparagus

itemFrequencyPlot(dataset, topN = 20)


itemFrequencyPlot(dataset, topN = 10)




# Training Apriori on the dataset

#higher support(hight frequent product) and confidence

rules = apriori(data = dataset, parameter = list(support = 0.003, confidence = 0.8))

output

Apriori

Parameter specification:
 confidence minval smax arem  aval originalSupport maxtime support minlen maxlen
        0.8    0.1    1 none FALSE            TRUE       5   0.003      1     10
 target   ext
  rules FALSE

Algorithmic control:
 filter tree heap memopt load sort verbose
    0.1 TRUE TRUE  FALSE TRUE    2    TRUE

Absolute minimum support count: 22 

set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[119 item(s), 7501 transaction(s)] done [0.00s].
sorting and recoding items ... [115 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 5 done [0.01s].
writing ... [0 rule(s)] done [0.00s].
creating S4 object  ... done [0.00s].

# Visualising the results
inspect(sort(rules, by = 'lift')[1:10])

Error in slot(x, s)[i] : subscript out of bounds


rules = apriori(data = dataset, parameter = list(support = 0.003, confidence = 0.4))

Apriori

Parameter specification:
 confidence minval smax arem  aval originalSupport maxtime support minlen maxlen
        0.4    0.1    1 none FALSE            TRUE       5   0.003      1     10
 target   ext
  rules FALSE

Algorithmic control:
 filter tree heap memopt load sort verbose
    0.1 TRUE TRUE  FALSE TRUE    2    TRUE

Absolute minimum support count: 22 

set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[119 item(s), 7501 transaction(s)] done [0.00s].
sorting and recoding items ... [115 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 5 done [0.01s].
writing ... [281 rule(s)] done [0.00s].
creating S4 object  ... done [0.00s].


lhs                                            rhs                 support    
[1]  {mineral water,whole wheat pasta}           => {olive oil}         0.003866151
[2]  {spaghetti,tomato sauce}                    => {ground beef}       0.003066258
[3]  {french fries,herb & pepper}                => {ground beef}       0.003199573
[4]  {cereals,spaghetti}                         => {ground beef}       0.003066258
[5]  {frozen vegetables,mineral water,soup}      => {milk}              0.003066258
[6]  {chocolate,herb & pepper}                   => {ground beef}       0.003999467
[7]  {chocolate,mineral water,shrimp}            => {frozen vegetables} 0.003199573
[8]  {frozen vegetables,mineral water,olive oil} => {milk}              0.003332889
[9]  {cereals,ground beef}                       => {spaghetti}         0.003066258
[10] {frozen vegetables,soup}                    => {milk}              0.003999467
     confidence lift     count
[1]  0.4027778  6.115863 29   
[2]  0.4893617  4.980600 23   
[3]  0.4615385  4.697422 24   
[4]  0.4600000  4.681764 23   
[5]  0.6052632  4.670863 23   
[6]  0.4411765  4.490183 30   
[7]  0.4210526  4.417225 24   
[8]  0.5102041  3.937285 25   
[9]  0.6764706  3.885303 23   
[10] 0.5000000  3.858539 30   


rules = apriori(data = dataset, parameter = list(support = 0.004, confidence = 0.2))

Apriori

Parameter specification:
 confidence minval smax arem  aval originalSupport maxtime support minlen maxlen
        0.2    0.1    1 none FALSE            TRUE       5   0.004      1     10
 target   ext
  rules FALSE

Algorithmic control:
 filter tree heap memopt load sort verbose
    0.1 TRUE TRUE  FALSE TRUE    2    TRUE

Absolute minimum support count: 30 

set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[119 item(s), 7501 transaction(s)] done [0.01s].
sorting and recoding items ... [114 item(s)] done [0.00s].
creating transaction tree ... done [0.00s].
checking subsets of size 1 2 3 4 done [0.01s].
writing ... [811 rule(s)] done [0.00s].
creating S4 object  ... done [0.00s]. 

# Visualising the results
inspect(sort(rules, by = 'lift')[1:10])

lhs                       rhs                 support confidence     lift count
[1]  {light cream}          => {chicken}       0.004532729  0.2905983 4.843951    34
[2]  {pasta}                => {escalope}      0.005865885  0.3728814 4.700812    44
[3]  {pasta}                => {shrimp}        0.005065991  0.3220339 4.506672    38
[4]  {eggs,                                                                         
      ground beef}          => {herb & pepper} 0.004132782  0.2066667 4.178455    31
[5]  {whole wheat pasta}    => {olive oil}     0.007998933  0.2714932 4.122410    60
[6]  {herb & pepper,                                                                
      spaghetti}            => {ground beef}   0.006399147  0.3934426 4.004360    48
[7]  {herb & pepper,                                                                
      mineral water}        => {ground beef}   0.006665778  0.3906250 3.975683    50
[8]  {tomato sauce}         => {ground beef}   0.005332622  0.3773585 3.840659    40
[9]  {mushroom cream sauce} => {escalope}      0.005732569  0.3006993 3.790833    43
[10] {frozen vegetables,                                                            
      mineral water,                                                                
      spaghetti}            => {ground beef}   0.004399413  0.3666667 3.731841    33



Comments

Popular posts from this blog

Decision Tree Classification