#!/usr/bin/env Rscript if (!require(arules)) install.packages("arules", repos = "https://cran.r-project.org/", Ncpus = 16) # nolint library(arules) if (!require(arulesViz)) install.packages("arulesViz", repos = "https://cran.r-project.org/", Ncpus = 16) # nolint library(arulesViz) t <- read.transactions("AssociationRules.csv", format = "basket", sep = " ") summary(t) # a most freq names(sort(itemFrequency(t), decreasing = TRUE)[1]) # b max amount of items max(size(t)) # 0..0.01 support, 0 confidence rules0 <- apriori(t, parameter = list(supp = 0.01, conf = 0, minlen = 2)) # c length(rules0) # d rules05 <- apriori(t, parameter = list(supp = 0.01, conf = 0.5, minlen = 2)) length(rules05) # f plot(rules05, measure = c("support", "lift"), shading = "confidence") # g plot(rules05, measure = c("support", "confidence"), shading = "lift") # j rules_supp_01 <- subset(rules05, support >= 0.1) rules_by_support <- sort(rules_supp_01, by = "support", descreasing = FALSE) inspect(rules_by_support) # k rules_conf_08 <- sort(subset(rules05, confidence > 0.8), by = "lift", descreasing = FALSE) inspect(rules_conf_08) plot(rules_conf_08, method = "matrix", engine = "grid", measure = "confidence", shading = "lift", control = list(recorded = FALSE) ) plot(rules_conf_08, method = "matrix", engine = "grid", shading = c("lift", "confidence") ) # n rules_lift_max_3 <- head(sort(rules05, by = "lift", descreasing = TRUE), 3) # o plot(rules_lift_max_3, method = "graph", engine = "igraph") inspect(rules_lift_max_3) exit # q train_t <- head(t, 8000) test_t <- tail(t, 2000) train <- apriori(train_t, parameter = list(supp = 0.01, conf = 0.5)) subset_train <- subset(train, lift > 15) subset_train_df <- as(subset_train, "data.frame") # https://www.rdocumentation.org/packages/arules/versions/1.7-9/topics/interestMeasure test <- interestMeasure(subset_train, transactions = test_t, measure = c("support", "confidence", "lift", "count"), reuse = FALSE) for (i in 1:length(subset_train)) { cat(subset_train_df$rules[i]) cat("\n") cat("train conf:", subset_train_df$confidence[i], "lift:", subset_train_df$lift[i]) cat("\n") cat("test conf:", test$confidence[i], "lift:", test$lift[i]) cat("\n") }