1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
|
#!/usr/bin/env Rscript
if (!require(arules)) install.packages("arules", repos = "https://cran.r-project.org/", Ncpus = 16) # nolint
library(arules)
if (!require(arulesViz)) install.packages("arulesViz", repos = "https://cran.r-project.org/", Ncpus = 16) # nolint
library(arulesViz)
t <- read.transactions("AssociationRules.csv", format = "basket", sep = " ")
summary(t)
# a most freq
names(sort(itemFrequency(t), decreasing = TRUE)[1])
# b max amount of items
max(size(t))
# 0..0.01 support, 0 confidence
rules0 <- apriori(t, parameter = list(supp = 0.01, conf = 0, minlen = 2))
# c
length(rules0)
# d
rules05 <- apriori(t, parameter = list(supp = 0.01, conf = 0.5, minlen = 2))
length(rules05)
# f
plot(rules05, measure = c("support", "lift"), shading = "confidence")
# g
plot(rules05, measure = c("support", "confidence"), shading = "lift")
# j
rules_supp_01 <- subset(rules05, support >= 0.1)
rules_by_support <- sort(rules_supp_01, by = "support", descreasing = FALSE)
inspect(rules_by_support)
# k
rules_conf_08 <- sort(subset(rules05, confidence > 0.8), by = "lift", descreasing = FALSE)
inspect(rules_conf_08)
plot(rules_conf_08,
method = "matrix",
engine = "grid",
measure = "confidence",
shading = "lift",
control = list(recorded = FALSE)
)
plot(rules_conf_08,
method = "matrix",
engine = "grid",
shading = c("lift", "confidence")
)
# n
rules_lift_max_3 <- head(sort(rules05, by = "lift", descreasing = TRUE), 3)
# o
plot(rules_lift_max_3, method = "graph", engine = "igraph")
inspect(rules_lift_max_3)
exit
# q
train_t <- head(t, 8000)
test_t <- tail(t, 2000)
train <- apriori(train_t, parameter = list(supp = 0.01, conf = 0.5))
subset_train <- subset(train, lift > 15)
subset_train_df <- as(subset_train, "data.frame")
# https://www.rdocumentation.org/packages/arules/versions/1.7-9/topics/interestMeasure
test <- interestMeasure(subset_train, transactions = test_t, measure = c("support", "confidence", "lift", "count"), reuse = FALSE)
for (i in 1:length(subset_train)) {
cat(subset_train_df$rules[i])
cat("\n")
cat("train conf:", subset_train_df$confidence[i], "lift:", subset_train_df$lift[i])
cat("\n")
cat("test conf:", test$confidence[i], "lift:", test$lift[i])
cat("\n")
}
|