apetkau / comp7944-project

Project on visualizing association rules extracted from covid-19 data.
Apache License 2.0
1 stars 1 forks source link

Apriori and Network graph of Symptoms in R #3

Open walid-shaiket opened 4 years ago

walid-shaiket commented 4 years ago

Load packages

library(data.table) library(dplyr) library(ggplot2) library(knitr) library(stringr) library(DT) library(plotly) library(arules) library(arulesViz) library(visNetwork) library(igraph) library(kableExtra)

install.packages("arules", dependencies=TRUE) library(arules)

Symptom_data= read.table(file = 'D:/MSc_UofM/Data Mining/Project/DataSets/symptoms.tsv', sep = '\t', header = FALSE)

data(Symptom_data) rules <- apriori(Symptom_data, parameter=list(support=0.005, confidence=0.2)) rules

library("RColorBrewer") arules::itemFrequencyPlot(Symptom_data, topN=20, col=brewer.pal(8,'Pastel2'), main='Relative Item Frequency Plot', type="relative", ylab="Item Frequency (Relative)")

Scatterplot

plot(rules)

sel <- plot(rules, interactive=TRUE)

Scatterplot with custom colors

library(colorspace) # for sequential_hcl plot(rules, control=list(col=sequential_hcl(100)))

Two-key plot is a scatterplot with shading = "order"

plot(rules, shading="order", control=list(main = "Two-key plot", col=rainbow(5)))

The following techniques work better with fewer rules

subrules <- subset(rules, lift>2.5) subrules

2D matrix with shading

plot(subrules, method="matrix", measure="lift") plot(subrules, method="matrix", measure="lift", control=list(reorder=TRUE))

3D matrix

plot(subrules, method="matrix3D", measure="lift") plot(subrules, method="matrix3D", measure="lift", control=list(reorder=TRUE))

matrix with two measures

plot(subrules, method="matrix", measure=c("lift", "confidence")) plot(subrules, method="matrix", measure=c("lift", "confidence"), control=list(reorder=TRUE))

Doubledecker plot only works for a single rule

oneRule <- sample(rules, 1) plot(oneRule, method="doubledecker", data = Symptom_data)

grouped matrix plot

plot(rules, method="grouped")

graphs only work well with very few rules

subrules2 <- sample(rules, 10) plot(subrules2, method="graph")

igraph layout generators can be used (see ? igraph::layout_)

plot(subrules2, method="graph", control=list(layout=igraph::in_circle())) plot(subrules2, method="graph", control=list( layout=igraph::with_graphopt(spring.const=5, mass=50)))

plot(subrules2, method="graph", control=list(type="itemsets")) plot(subrules2, method="graph", engine= 'interactive') plot(subrules2, method="graph", control=list(engine="graphviz"))

parallel coordinates plot

plot(subrules2, method="paracoord") plot(subrules2, method="paracoord", control=list(reorder=TRUE))

iplots (experimental) sel <- plot(rules, method="iplots",interactive= TRUE)

for itemsets

itemsets <- eclat(Symptom_data, parameter = list(support = 0.02, minlen=2)) plot(itemsets) plot(itemsets, method="graph") plot(itemsets, method="paracoord", control=list(alpha=.5, reorder=TRUE))

add more quality measures to use for the scatterplot

quality(itemsets) <- interestMeasure(itemsets, trans=Symptom_data) head(quality(itemsets)) plot(itemsets, measure=c("support", "allConfidence"), shading="lift")

outputs

ConfVsSupport_R

Graph_for_10_rules

Graph_for_10_rules_circle

Graph_for_10rules_list

graph_for_100_items

Network_graph_symptoms_better

Parallel_coord_reordered

Scatter_plot_sup_itemsets