selkamand / sigminerUtils

Easily Run Sigminer Analysis and Save Results to sqlite DB
Other
0 stars 0 forks source link

Fix NA result of sigstats::sig_cosine_similarity() similarity #6

Closed selkamand closed 3 months ago

selkamand commented 3 months ago
tally_1 <- data.frame(
  channel = c(
    "1:Del:C:0", "1:Del:C:1", "1:Del:C:2", "1:Del:C:3", "1:Del:C:4", "1:Del:C:5",
    "1:Del:T:0", "1:Del:T:1", "1:Del:T:2", "1:Del:T:3", "1:Del:T:4", "1:Del:T:5",
    "1:Ins:C:0", "1:Ins:C:1", "1:Ins:C:2", "1:Ins:C:3", "1:Ins:C:4", "1:Ins:C:5",
    "1:Ins:T:0", "1:Ins:T:1", "1:Ins:T:2", "1:Ins:T:3", "1:Ins:T:4", "1:Ins:T:5",
    "2:Del:M:1", "2:Del:R:0", "2:Del:R:1", "2:Del:R:2", "2:Del:R:3", "2:Del:R:4",
    "2:Del:R:5", "2:Ins:R:0", "2:Ins:R:1", "2:Ins:R:2", "2:Ins:R:3", "2:Ins:R:4",
    "2:Ins:R:5", "3:Del:M:1", "3:Del:M:2", "3:Del:R:0", "3:Del:R:1", "3:Del:R:2",
    "3:Del:R:3", "3:Del:R:4", "3:Del:R:5", "3:Ins:R:0", "3:Ins:R:1", "3:Ins:R:2",
    "3:Ins:R:3", "3:Ins:R:4", "3:Ins:R:5", "4:Del:M:1", "4:Del:M:2", "4:Del:M:3",
    "4:Del:R:0", "4:Del:R:1", "4:Del:R:2", "4:Del:R:3", "4:Del:R:4", "4:Del:R:5",
    "4:Ins:R:0", "4:Ins:R:1", "4:Ins:R:2", "4:Ins:R:3", "4:Ins:R:4", "4:Ins:R:5",
    "5:Del:M:1", "5:Del:M:2", "5:Del:M:3", "5:Del:M:4", "5:Del:M:5", "5:Del:R:0",
    "5:Del:R:1", "5:Del:R:2", "5:Del:R:3", "5:Del:R:4", "5:Del:R:5", "5:Ins:R:0",
    "5:Ins:R:1", "5:Ins:R:2", "5:Ins:R:3", "5:Ins:R:4", "5:Ins:R:5"
  ),
  type = rep(
    c(
      "1:Del:C", "1:Del:T", "1:Ins:C", "1:Ins:T", "2:Del:M", "2:Del:R", "2:Ins:R",
      "3:Del:M", "3:Del:R", "3:Ins:R", "4:Del:M", "4:Del:R", "4:Ins:R", "5:Del:M",
      "5:Del:R", "5:Ins:R"
    ),
    c(6L, 6L, 6L, 6L, 1L, 6L, 6L, 2L, 6L, 6L, 3L, 6L, 6L, 5L, 6L, 6L)
  ),
  fraction = rep(c(0, 0.5, 0, 0.5, 0), c(60L, 1L, 6L, 1L, 15L)),
  count = rep(c(0L, 1L, 0L, 1L, 0L), c(60L, 1L, 6L, 1L, 15L))
)

tally_2 <- tibble::tibble(
  channel = c(
    "1:Del:C:0", "1:Del:C:1", "1:Del:C:2", "1:Del:C:3", "1:Del:C:4", "1:Del:C:5",
    "1:Del:T:0", "1:Del:T:1", "1:Del:T:2", "1:Del:T:3", "1:Del:T:4", "1:Del:T:5",
    "1:Ins:C:0", "1:Ins:C:1", "1:Ins:C:2", "1:Ins:C:3", "1:Ins:C:4", "1:Ins:C:5",
    "1:Ins:T:0", "1:Ins:T:1", "1:Ins:T:2", "1:Ins:T:3", "1:Ins:T:4", "1:Ins:T:5",
    "2:Del:M:1", "2:Del:R:0", "2:Del:R:1", "2:Del:R:2", "2:Del:R:3", "2:Del:R:4",
    "2:Del:R:5", "2:Ins:R:0", "2:Ins:R:1", "2:Ins:R:2", "2:Ins:R:3", "2:Ins:R:4",
    "2:Ins:R:5", "3:Del:M:1", "3:Del:M:2", "3:Del:R:0", "3:Del:R:1", "3:Del:R:2",
    "3:Del:R:3", "3:Del:R:4", "3:Del:R:5", "3:Ins:R:0", "3:Ins:R:1", "3:Ins:R:2",
    "3:Ins:R:3", "3:Ins:R:4", "3:Ins:R:5", "4:Del:M:1", "4:Del:M:2", "4:Del:M:3",
    "4:Del:R:0", "4:Del:R:1", "4:Del:R:2", "4:Del:R:3", "4:Del:R:4", "4:Del:R:5",
    "4:Ins:R:0", "4:Ins:R:1", "4:Ins:R:2", "4:Ins:R:3", "4:Ins:R:4", "4:Ins:R:5",
    "5:Del:M:1", "5:Del:M:2", "5:Del:M:3", "5:Del:M:4", "5:Del:M:5", "5:Del:R:0",
    "5:Del:R:1", "5:Del:R:2", "5:Del:R:3", "5:Del:R:4", "5:Del:R:5", "5:Ins:R:0",
    "5:Ins:R:1", "5:Ins:R:2", "5:Ins:R:3", "5:Ins:R:4", "5:Ins:R:5"
  ),
  type = rep(
    c(
      "1:Del:C", "1:Del:T", "1:Ins:C", "1:Ins:T", "2:Del:M", "2:Del:R", "2:Ins:R",
      "3:Del:M", "3:Del:R", "3:Ins:R", "4:Del:M", "4:Del:R", "4:Ins:R", "5:Del:M",
      "5:Del:R", "5:Ins:R"
    ),
    c(6L, 6L, 6L, 6L, 1L, 6L, 6L, 2L, 6L, 6L, 3L, 6L, 6L, 5L, 6L, 6L)
  ),
  fraction = numeric(83),
  count = integer(83),
  class = rep("ID83", 83L),
)
selkamand commented 3 months ago

Can be simplified to

tally1 <- rep(c(0, 0.5, 0, 0.5, 0), c(60L, 1L, 6L, 1L, 15L))

tally2 <-  numeric(83)

lsa::cosine(tally1, tally2)
selkamand commented 3 months ago

Actually that simplification made the problem easier. Its impossible to calculate the cosine angle between vectors when one of them has 0 magnitude. But the current version of sigstats tests for this so a simple update fixed it