Implementation of the Spectral Ranking for Abnormality (SRA) algorithm as described in the paper K. Nian, H. Zhang, A. Tayal, T. F. Coelman, Y. Li, (2014) 'Unsupervised Spectral Ranking for Anomaly and Application to Auto Insurance Fraud Detection'
library(kernlab) library(ggplot2)
data(promotergene)
df=as.data.frame(sapply(promotergene[,-1],catcalinhara))
hammingkernelMatrix = hammingkernel2(df,lambda = .6)
SpectralAnomaly = sra(hammingkernelMatrix, Xi = .4)
g = ggplot(SpectralAnomaly$EigenSpace,aes(x=np_Eigenvector_1, y = np_Eigenvector_2,color=ifelse(sign(SpectralAnomaly$Anomaly)==-1,1,SpectralAnomaly$Anomaly+1))) + geom_point() + scale_color_gradient("Anomaly",trans="log",low="blue",high="red") g = g + ggtitle(paste("mFLAG= ",SpectralAnomaly$mFLAG)) g = g + theme(legend.title = element_text(face="plain"), legend.text = element_text(color = "white")) g
mushroom=read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", header=FALSE, sep=",") mush.split=split(mushroom,mushroom$V1) mush.e=mush.split[[1]] mush.p=mush.split[[2]] mush.p=mush.p[sample(nrow(mush.p),300),] mushroom =rbind(mush.e,mush.p)
table(mushroom$V1)
df=as.data.frame(sapply(mushroom[,-1],catcalinhara))
ptm <- proc.time() hammingkernelMatrix = hammingkernel(df,lambda = .5) proc.time() - ptm
ptm <- proc.time() SpectralAnomaly = sra(hammingkernelMatrix, Xi = .1) proc.time() - ptm
g = ggplot(SpectralAnomaly$EigenSpace,aes(x=np_Eigenvector_1, y = np_Eigenvector_2,color=ifelse(sign(SpectralAnomaly$Anomaly)==-1,1,SpectralAnomaly$Anomaly+1))) + geom_point() + scale_color_gradient("Anomaly",trans="log",low="black",high="red") g = g + ggtitle(paste("mFLAG= ",SpectralAnomaly$mFLAG)) g = g + theme(legend.title = element_text(face="plain"), legend.text = element_text(color = "white")) g
g = ggplot(SpectralAnomaly$EigenSpace,aes(x=np_Eigenvector_1, y = np_Eigenvector_2,color=ifelse(sign(SpectralAnomaly$Anomaly)==-1,1,SpectralAnomaly$Anomaly+1+max(abs(SpectralAnomaly$EigenSpace[,"np_Eigenvector_2"])) - abs(SpectralAnomaly$EigenSpace[,"np_Eigenvector_2"])))) + geom_point() + scale_color_gradient("Anomaly",low="black",high="red") g = g + ggtitle(paste("mFLAG= ",SpectralAnomaly$mFLAG)) g = g + theme(legend.title = element_text(face="plain"), legend.text = element_text(color = "white")) g
library(ROCR) ROCRpred = prediction(SpectralAnomaly$Anomaly, mushroom$V1) perf = performance(ROCRpred, "tpr", "fpr") plot(perf,colorize=T,print.cutoffs.at=seq(0,1,by=0.05),main=paste("AUC: ",as.numeric(performance(ROCRpred, "auc")@y.values)))
breastcancer=read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data", header=FALSE, sep=",")
table(breastcancer$V2)
df=as.data.frame(sapply(breastcancer[,c(-1,-2)],catcalinhara))
ptm <- proc.time() hammingkernelMatrix = hammingkernel(df,lambda = .8) proc.time() - ptm
ptm <- proc.time() SpectralAnomaly = sra(hammingkernelMatrix, Xi = .4) proc.time() - ptm
g = ggplot(SpectralAnomaly$EigenSpace,aes(x=np_Eigenvector_1, y = np_Eigenvector_2,color=ifelse(sign(SpectralAnomaly$Anomaly)==-1,1,SpectralAnomaly$Anomaly+1))) + geom_point() + scale_color_gradient("Anomaly",trans="log",low="black",high="red") g = g + ggtitle(paste("mFLAG= ",SpectralAnomaly$mFLAG)) g = g + theme(legend.title = element_text(face="plain"), legend.text = element_text(color = "white")) g
library(ROCR) ROCRpred = prediction(SpectralAnomaly$Anomaly, breastcancer$V2) perf = performance(ROCRpred, "tpr", "fpr") plot(perf,colorize=T,print.cutoffs.at=seq(0,1,by=0.2),main=paste("AUC: ",as.numeric(performance(ROCRpred, "auc")@y.values)))