apetkau / comp7944-project

Project on visualizing association rules extracted from covid-19 data.
Apache License 2.0
1 stars 1 forks source link

Apriori and network Graph for Geographic dataset #4

Open walid-shaiket opened 4 years ago

walid-shaiket commented 4 years ago

Load packages

library(data.table) library(dplyr) library(ggplot2) library(knitr) library(stringr) library(DT) library(plotly) library(arules) library(arulesViz) library(visNetwork) library(igraph) library(kableExtra)

install.packages("arules", dependencies=TRUE) library(arules)

location_data= read.table(file = 'D:/MSc_UofM/Data Mining/Project/DataSets/geographic_date.tsv', sep = '\t', header = FALSE)

data(location_data) location_rules <- apriori(location_data, parameter=list(support=0.9, confidence=1.0)) location_rules

library("RColorBrewer") arules::itemFrequencyPlot(location_data, topN=20, col=brewer.pal(8,'Pastel2'), main='Relative Item Frequency Plot', type="relative", ylab="Item Frequency (Relative)")

Scatterplot

plot(location_rules) sel <- plot(location_rules, interactive=TRUE)

Scatterplot with custom colors

library(colorspace) # for sequential_hcl plot(location_rules, control=list(col=sequential_hcl(100)))

Two-key plot is a scatterplot with shading = "order"

plot(location_rules, shading="order", control=list(main = "Two-key plot", col=rainbow(5)))

The following techniques work better with fewer rules

subrules <- subset(location_rules, lift>1) subrules

2D matrix with shading

plot(subrules, method="matrix", measure="lift") plot(subrules, method="matrix", measure="lift", control=list(reorder=TRUE))

3D matrix

plot(subrules, method="matrix3D", measure="lift") plot(subrules, method="matrix3D", measure="lift", control=list(reorder=TRUE))

matrix with two measures

plot(subrules, method="matrix", measure=c("lift", "confidence")) plot(subrules, method="matrix", measure=c("lift", "confidence"), control=list(reorder=TRUE))

Doubledecker plot only works for a single rule

oneRule <- sample(location_rules, 1) plot(oneRule, method="doubledecker", data = location_data)

grouped matrix plot

plot(location_rules, method="grouped")

graphs only work well with very few rules

subrules2 <- sample(location_rules, 10) plot(subrules2, method="graph")

igraph layout generators can be used (see ? igraph::layout_)

plot(subrules2, method="graph", control=list(layout=igraph::in_circle())) plot(subrules2, method="graph", control=list( layout=igraph::with_graphopt(spring.const=5, mass=50)))

plot(subrules2, method="graph", control=list(type="itemsets")) plot(subrules2, method="graph", engine= 'interactive')

parallel coordinates plot

plot(subrules2, method="paracoord") plot(subrules2, method="paracoord", control=list(reorder=TRUE))

iplots (experimental) sel <- plot(location_rules, method="iplots",interactive= TRUE)

for itemsets

itemsets <- eclat(location_data, parameter = list(support = 0.9, minlen=2)) plot(itemsets) plot(itemsets, method="graph") plot(itemsets, method="paracoord", control=list(alpha=.5, reorder=TRUE))

add more quality measures to use for the scatterplot

quality(itemsets) <- interestMeasure(itemsets, trans=location_data) head(quality(itemsets)) plot(itemsets, measure=c("support", "allConfidence"), shading="lift")

outputs

3D matrix conseq vs lift

antice-conseque-lift

circular plot

graph for 10 rules

graph-itemsets

network_graph_geo

parallel_coord_reordered

scatterPlot_ordered

sup_vs_conf_44krules