Open walid-shaiket opened 4 years ago
library(data.table) library(dplyr) library(ggplot2) library(knitr) library(stringr) library(DT) library(plotly) library(arules) library(arulesViz) library(visNetwork) library(igraph) library(kableExtra)
install.packages("arules", dependencies=TRUE) library(arules)
Symptom_data= read.table(file = 'D:/MSc_UofM/Data Mining/Project/DataSets/symptoms.tsv', sep = '\t', header = FALSE)
data(Symptom_data) rules <- apriori(Symptom_data, parameter=list(support=0.005, confidence=0.2)) rules
library("RColorBrewer") arules::itemFrequencyPlot(Symptom_data, topN=20, col=brewer.pal(8,'Pastel2'), main='Relative Item Frequency Plot', type="relative", ylab="Item Frequency (Relative)")
plot(rules)
library(colorspace) # for sequential_hcl plot(rules, control=list(col=sequential_hcl(100)))
plot(rules, shading="order", control=list(main = "Two-key plot", col=rainbow(5)))
subrules <- subset(rules, lift>2.5) subrules
plot(subrules, method="matrix", measure="lift") plot(subrules, method="matrix", measure="lift", control=list(reorder=TRUE))
plot(subrules, method="matrix3D", measure="lift") plot(subrules, method="matrix3D", measure="lift", control=list(reorder=TRUE))
plot(subrules, method="matrix", measure=c("lift", "confidence")) plot(subrules, method="matrix", measure=c("lift", "confidence"), control=list(reorder=TRUE))
oneRule <- sample(rules, 1) plot(oneRule, method="doubledecker", data = Symptom_data)
plot(rules, method="grouped")
subrules2 <- sample(rules, 10) plot(subrules2, method="graph")
plot(subrules2, method="graph", control=list(layout=igraph::in_circle())) plot(subrules2, method="graph", control=list( layout=igraph::with_graphopt(spring.const=5, mass=50)))
plot(subrules2, method="graph", control=list(type="itemsets")) plot(subrules2, method="graph", engine= 'interactive') plot(subrules2, method="graph", control=list(engine="graphviz"))
plot(subrules2, method="paracoord") plot(subrules2, method="paracoord", control=list(reorder=TRUE))
iplots (experimental) sel <- plot(rules, method="iplots",interactive= TRUE)
itemsets <- eclat(Symptom_data, parameter = list(support = 0.02, minlen=2)) plot(itemsets) plot(itemsets, method="graph") plot(itemsets, method="paracoord", control=list(alpha=.5, reorder=TRUE))
quality(itemsets) <- interestMeasure(itemsets, trans=Symptom_data) head(quality(itemsets)) plot(itemsets, measure=c("support", "allConfidence"), shading="lift")
Load packages
library(data.table) library(dplyr) library(ggplot2) library(knitr) library(stringr) library(DT) library(plotly) library(arules) library(arulesViz) library(visNetwork) library(igraph) library(kableExtra)
install.packages("arules", dependencies=TRUE) library(arules)
Symptom_data= read.table(file = 'D:/MSc_UofM/Data Mining/Project/DataSets/symptoms.tsv', sep = '\t', header = FALSE)
data(Symptom_data) rules <- apriori(Symptom_data, parameter=list(support=0.005, confidence=0.2)) rules
library("RColorBrewer") arules::itemFrequencyPlot(Symptom_data, topN=20, col=brewer.pal(8,'Pastel2'), main='Relative Item Frequency Plot', type="relative", ylab="Item Frequency (Relative)")
Scatterplot
plot(rules)
sel <- plot(rules, interactive=TRUE)
Scatterplot with custom colors
library(colorspace) # for sequential_hcl plot(rules, control=list(col=sequential_hcl(100)))
Two-key plot is a scatterplot with shading = "order"
plot(rules, shading="order", control=list(main = "Two-key plot", col=rainbow(5)))
The following techniques work better with fewer rules
subrules <- subset(rules, lift>2.5) subrules
2D matrix with shading
plot(subrules, method="matrix", measure="lift") plot(subrules, method="matrix", measure="lift", control=list(reorder=TRUE))
3D matrix
plot(subrules, method="matrix3D", measure="lift") plot(subrules, method="matrix3D", measure="lift", control=list(reorder=TRUE))
matrix with two measures
plot(subrules, method="matrix", measure=c("lift", "confidence")) plot(subrules, method="matrix", measure=c("lift", "confidence"), control=list(reorder=TRUE))
Doubledecker plot only works for a single rule
oneRule <- sample(rules, 1) plot(oneRule, method="doubledecker", data = Symptom_data)
grouped matrix plot
plot(rules, method="grouped")
graphs only work well with very few rules
subrules2 <- sample(rules, 10) plot(subrules2, method="graph")
igraph layout generators can be used (see ? igraph::layout_)
plot(subrules2, method="graph", control=list(layout=igraph::in_circle())) plot(subrules2, method="graph", control=list( layout=igraph::with_graphopt(spring.const=5, mass=50)))
plot(subrules2, method="graph", control=list(type="itemsets")) plot(subrules2, method="graph", engine= 'interactive') plot(subrules2, method="graph", control=list(engine="graphviz"))
parallel coordinates plot
plot(subrules2, method="paracoord") plot(subrules2, method="paracoord", control=list(reorder=TRUE))
iplots (experimental) sel <- plot(rules, method="iplots",interactive= TRUE)
for itemsets
itemsets <- eclat(Symptom_data, parameter = list(support = 0.02, minlen=2)) plot(itemsets) plot(itemsets, method="graph") plot(itemsets, method="paracoord", control=list(alpha=.5, reorder=TRUE))
add more quality measures to use for the scatterplot
quality(itemsets) <- interestMeasure(itemsets, trans=Symptom_data) head(quality(itemsets)) plot(itemsets, measure=c("support", "allConfidence"), shading="lift")
outputs