AndriSignorell / DescTools

Tools for Descriptive Statistics and Exploratory Data Analysis
http://andrisignorell.github.io/DescTools/
82 stars 18 forks source link

Some errors in tests/misc.R on Darwin PowerPC #117

Closed barracuda156 closed 1 year ago

barracuda156 commented 1 year ago
R version 4.3.0 (2023-04-21) -- "Already Tomorrow"
Copyright (C) 2023 The R Foundation for Statistical Computing
Platform: powerpc-apple-darwin10.8.0 (32-bit)

> 
> 
> library(DescTools)
> 
> # stopifnot(exprs = {
> #   all.equal(pretty10exp(10^expo, drop.1=TRUE, sub10 = c(-2, 2)),
> #             expression(10^-3, 0.01, 0.1, 1, 10, 100, 10^3, 10^4))
> #   
> #   identical(pretty10exp(10^expo, drop.1=TRUE, sub10 = c(-2, 2), lab.type="latex"),
> #             c("$10^{-3}$", "0.01", "0.1", "1", "10", "100",
> #               "$10^{3}$", "$10^{4}$"))
> #   ## gave exponential format for "latex" case.
> # })
> # 
> 
> set.seed(45)
> (z <- as.numeric(names(w <- table(x <- sample(-10:20, size=50, r=TRUE)))))
 [1] -9 -8 -7 -6 -5 -4 -3 -2  0  3  4  5  6  7  8  9 10 12 13 15 16 17 18 19 20
> 
> stopifnot(all(
+   identical(Mode(5), structure(5, freq = 1L))
+   , identical(Mode(NA), structure(NA_real_, freq = NA_integer_)) 
+   , identical(Mode(c(NA, NA)), structure(NA_real_, freq = NA_integer_)) 
+   , identical(Mode(c(NA, 0:5)), structure(NA_real_, freq = NA_integer_)) 
+   , identical(Mode(c(NA, 0:5), na.rm=TRUE), structure(NA_real_, freq = 1L)) 
+   , identical(Mode(c(NA, 0:5, 5), na.rm=TRUE), structure(5, freq = 2L)) 
+   , identical(Mode(c(0:5, 4, 5, 6)), structure(c(4, 5), freq = 2L)) 
+   , identical(Mode(c(0:8, rep(c(1,3, 8), each=5))), structure(c(1, 3, 8), freq = 6L)) 
+   
+   , all.equal(Kurt(x = z, weights = w, method = 1), Kurt(x = x, method = 1))
+   , all.equal(Kurt(x = z, weights = w, method = 2), Kurt(x = x, method = 2))
+   , all.equal(Kurt(x = z, weights = w, method = 3), Kurt(x = x, method = 3))
+   
+   , all.equal(Skew(x = z, weights = w, method = 1), Skew(x = x, method = 1))
+   , all.equal(Skew(x = z, weights = w, method = 2), Skew(x = x, method = 2))
+   , all.equal(Skew(x = z, weights = w, method = 3), Skew(x = x, method = 3))
+   
+   , all.equal(CoefVar(z, weights = w, unbiased = TRUE), CoefVar(x, unbiased = TRUE))
+   , all.equal(CoefVar(z, weights = w, unbiased = FALSE), CoefVar(x, unbiased = FALSE))
+   
+   , all.equal(MeanAD(x), MeanAD(z, w))
+   , all.equal(MeanAD(x, center = Median), MeanAD(z, w, center = Median))
+   , all.equal(MeanAD(x, center = 7), MeanAD(z, w, center = 7))
+   
+ ))
> 
> 
> 
> # test Desc base function
> 
> x <- c(rnorm(n = 100, sd = 10), NA) 
> z <- Desc(x)[[1]]
> 
> stopifnot(all(  
+   identical(z$length,    length(x))
+   , identical(z$NAs,     sum(is.na(x)))
+   , identical(z$unique,  length(unique(na.omit(x))))
+   , identical(z$`0s`,    sum(x==0, na.rm=TRUE))
+   , IsZero(z$mean - mean(x, na.rm=TRUE))
+   , identical(unname(z$quant), 
+               unname(quantile(x, na.rm=TRUE, probs=c(0,0.05,.1,.25,.5,.75,.9,.95,1))))
+   , identical(z$range,   diff(range(x, na.rm=TRUE)))
+   , IsZero(z$sd - sd(x, na.rm=TRUE))
+   , IsZero(z$vcoef - sd(x, na.rm=TRUE)/mean(x, na.rm = TRUE))
+   , identical(z$mad,     mad(x, na.rm=TRUE))
+   , identical(z$IQR,     IQR(x, na.rm=TRUE))
+ ))
> 
> 
> 
> 
> # test BinomDiffCI with https://www.lexjansen.com/wuss/2016/127_Final_Paper_PDF.pdf
> 
> # 5. Mee is given as 0.0533 in the literature, which probably is a rounding error
> # it's corrected from 0.533 to 0.534 in ‘lit1’ and from 0.7225 to 0.7224 in ‘lit2’ for comparison reasons
> # Mee 4 from  0.0857 to 0.0858
> 
> meth <- c("wald","waldcc","hal","jp","mee","mn","score","scorecc","ha","ac","blj")
> 
> stopifnot(all(
+   identical(unname(round(BinomDiffCI(56, 70, 48, 80, method = meth), 4)[, -1]), 
+             cbind(c(0.0575, 0.0441, 0.0535, 0.0531, 0.0534, 
+                     0.0528, 0.0524, 0.0428, 0.0494, 0.0525, 0.054), 
+                   c(0.3425, 0.3559, 0.3351, 0.3355, 0.3377, 
+                     0.3382, 0.3339, 0.3422, 0.3506, 0.3358, 0.34))),
+   identical(unname(round(BinomDiffCI(9, 10, 3, 10, method = meth), 4)[, -1]), 
+             cbind(c(0.2605, 0.1605, 0.1777, 0.176, 0.1821, 
+                     0.17, 0.1705, 0.1013, 0.1922, 0.16, 0.1869), 
+                   c(0.9395, 1, 0.8289, 0.8306, 0.837, 0.8406, 
+                     0.809, 0.8387, 1, 0.84, 0.904))),
+   identical(unname(round(BinomDiffCI(10, 10, 0, 20, method = meth), 4)[, -1]), 
+             cbind(c(1, 0.925, 0.7482, 0.7431, 0.7224, 0.7156, 
+                     0.6791, 0.6014, 0.95, 0.6922, 0.7854), 
+                   c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1))), 
+   identical(unname(round(BinomDiffCI(84, 101, 89, 105, method = meth), 4)[, -1]), 
+             cbind(c(-0.1162, -0.1259, -0.1152, -0.116, -0.1188, 
+                     -0.1191, -0.1177, -0.1245, -0.1216, -0.1168, -0.117), 
+                   c(0.0843, 0.094, 0.0834, 0.0843, 0.0858, 0.086, 0.0851, 
+                     0.0918, 0.0898, 0.085, 0.0852)))
+ ))
Error: all(identical(unname(round(BinomDiffCI(56, 70, 48, 80, method = meth),  .... is not TRUE
Execution halted
AndriSignorell commented 1 year ago

Thnx, I can' reproduce this. Could you run the following to narrow down, please:

identical(unname(round(BinomDiffCI(56, 70, 48, 80, method = meth), 4)[, -1]), 
          cbind(c(0.0575, 0.0441, 0.0535, 0.0531, 0.0534, 
                  0.0528, 0.0524, 0.0428, 0.0494, 0.0525, 0.054), 
                c(0.3425, 0.3559, 0.3351, 0.3355, 0.3377, 
                  0.3382, 0.3339, 0.3422, 0.3506, 0.3358, 0.34)))

identical(unname(round(BinomDiffCI(9, 10, 3, 10, method = meth), 4)[, -1]), 
          cbind(c(0.2605, 0.1605, 0.1777, 0.176, 0.1821, 
                  0.17, 0.1705, 0.1013, 0.1922, 0.16, 0.1869), 
                c(0.9395, 1, 0.8289, 0.8306, 0.837, 0.8406, 
                  0.809, 0.8387, 1, 0.84, 0.904)))

identical(unname(round(BinomDiffCI(10, 10, 0, 20, method = meth), 4)[, -1]), 
          cbind(c(1, 0.925, 0.7482, 0.7431, 0.7224, 0.7156, 
                  0.6791, 0.6014, 0.95, 0.6922, 0.7854), 
                c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)))

identical(unname(round(BinomDiffCI(84, 101, 89, 105, method = meth), 4)[, -1]), 
          cbind(c(-0.1162, -0.1259, -0.1152, -0.116, -0.1188, 
                  -0.1191, -0.1177, -0.1245, -0.1216, -0.1168, -0.117), 
                c(0.0843, 0.094, 0.0834, 0.0843, 0.0858, 0.086, 0.0851, 
                  0.0918, 0.0898, 0.085, 0.0852)))
barracuda156 commented 1 year ago

@AndriSignorell Thank you for responding! All these evaluate to false:

Last login: Sun May 14 11:06:27 on ttys002
macmini:~ svacchanda$ r

R version 4.3.0 (2023-04-21) -- "Already Tomorrow"
Copyright (C) 2023 The R Foundation for Statistical Computing
Platform: powerpc-apple-darwin10.8.0 (32-bit)

> library(DescTools)

> meth <- c("wald","waldcc","hal","jp","mee","mn","score","scorecc","ha","ac","blj")
> identical(unname(round(BinomDiffCI(56, 70, 48, 80, method = meth), 4)[, -1]), +           cbind(c(0.0575, 0.0441, 0.0535, 0.0531, 0.0534, 
+                   0.0528, 0.0524, 0.0428, 0.0494, 0.0525, 0.054), 
+                 c(0.3425, 0.3559, 0.3351, 0.3355, 0.3377, 
+                   0.3382, 0.3339, 0.3422, 0.3506, 0.3358, 0.34)))
[1] FALSE
> identical(unname(round(BinomDiffCI(9, 10, 3, 10, method = meth), 4)[, -1]), 
+           cbind(c(0.2605, 0.1605, 0.1777, 0.176, 0.1821, 
+                   0.17, 0.1705, 0.1013, 0.1922, 0.16, 0.1869), 
+                 c(0.9395, 1, 0.8289, 0.8306, 0.837, 0.8406, 
+                   0.809, 0.8387, 1, 0.84, 0.904)))
[1] FALSE
> identical(unname(round(BinomDiffCI(10, 10, 0, 20, method = meth), 4)[, -1]), 
+           cbind(c(1, 0.925, 0.7482, 0.7431, 0.7224, 0.7156, 
+                   0.6791, 0.6014, 0.95, 0.6922, 0.7854), 
+                 c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)))
[1] FALSE
> identical(unname(round(BinomDiffCI(84, 101, 89, 105, method = meth), 4)[, -1]), 
+           cbind(c(-0.1162, -0.1259, -0.1152, -0.116, -0.1188, 
+                   -0.1191, -0.1177, -0.1245, -0.1216, -0.1168, -0.117), 
+                 c(0.0843, 0.094, 0.0834, 0.0843, 0.0858, 0.086, 0.0851, 
+                   0.0918, 0.0898, 0.085, 0.0852)))
[1] FALSE
AndriSignorell commented 1 year ago

Hmm, this looks like a numeric issue. Please run:

meth <- c("wald","waldcc","hal","jp","mee","mn","score","scorecc","ha","ac","blj")

cbind(DescTools::BinomDiffCI(56, 70, 48, 80, method = meth),
      round(DescTools::BinomDiffCI(56, 70, 48, 80, method = meth), 4)[,-1])
cbind(DescTools::BinomDiffCI(9, 10, 3, 10, method = meth),
      round(DescTools::BinomDiffCI(9, 10, 3, 10, method = meth), 4)[,-1])
cbind(DescTools::BinomDiffCI(10, 10, 0, 20, method = meth),
      round(DescTools::BinomDiffCI(10, 10, 0, 20, method = meth), 4)[,-1])
cbind(DescTools::BinomDiffCI(84, 101, 89, 105, method = meth),
      round(DescTools::BinomDiffCI(84, 101, 89, 105, method = meth), 4)[,-1])
barracuda156 commented 1 year ago
> library(DescTools)
> meth <- c("wald","waldcc","hal","jp","mee","mn","score","scorecc","ha","ac","blj")
> cbind(DescTools::BinomDiffCI(56, 70, 48, 80, method = meth),
+       round(DescTools::BinomDiffCI(56, 70, 48, 80, method = meth), 4)[,-1])
        est     lwr.ci    upr.ci lwr.ci upr.ci
wald    0.2 0.05750490 0.3424951 0.0575 0.3425
waldcc  0.2 0.04411204 0.3558880 0.0441 0.3559
hal     0.2 0.05350371 0.3351272 0.0535 0.3351
jp      0.2 0.05311449 0.3355347 0.0531 0.3355
mee     0.2 0.05335712 0.3377272 0.0534 0.3377
mn      0.2 0.05282969 0.3381730 0.0528 0.3382
score   0.2 0.05243147 0.3338727 0.0524 0.3339
scorecc 0.2 0.04276787 0.3421863 0.0428 0.3422
ha      0.2 0.04940685 0.3505931 0.0494 0.3506
ac      0.2 0.05245293 0.3357585 0.0525 0.3358
blj     0.2 0.05398899 0.3400294 0.0540 0.3400
> cbind(DescTools::BinomDiffCI(9, 10, 3, 10, method = meth),
+       round(DescTools::BinomDiffCI(9, 10, 3, 10, method = meth), 4)[,-1])
        est    lwr.ci    upr.ci lwr.ci upr.ci
wald    0.6 0.2605243 0.9394757 0.2605 0.9395
waldcc  0.6 0.1605243 1.0000000 0.1605 1.0000
hal     0.6 0.1777160 0.8289338 0.1777 0.8289
jp      0.6 0.1760029 0.8306470 0.1760 0.8306
mee     0.6 0.1821489 0.8369538 0.1821 0.8370
mn      0.6 0.1700251 0.8406495 0.1700 0.8406
score   0.6 0.1705227 0.8090180 0.1705 0.8090
scorecc 0.6 0.1012872 0.8386690 0.1013 0.8387
ha      0.6 0.1921612 1.0000000 0.1922 1.0000
ac      0.6 0.1600008 0.8399992 0.1600 0.8400
blj     0.6 0.1868771 0.9040319 0.1869 0.9040
> cbind(DescTools::BinomDiffCI(10, 10, 0, 20, method = meth),
+       round(DescTools::BinomDiffCI(10, 10, 0, 20, method = meth), 4)[,-1])
        est    lwr.ci upr.ci lwr.ci upr.ci
wald      1 1.0000000      1 1.0000      1
waldcc    1 0.9250000      1 0.9250      1
hal       1 0.7481682      1 0.7482      1
jp        1 0.7431410      1 0.7431      1
mee       1 0.7224437      1 0.7224      1
mn        1 0.7156186      1 0.7156      1
score     1 0.6790860      1 0.6791      1
scorecc   1 0.6013931      1 0.6014      1
ha        1 0.9500000      1 0.9500      1
ac        1 0.6922432      1 0.6922      1
blj       1 0.7853682      1 0.7854      1
> cbind(DescTools::BinomDiffCI(84, 101, 89, 105, method = meth),
+       round(DescTools::BinomDiffCI(84, 101, 89, 105, method = meth), 4)[,-1])
                est     lwr.ci     upr.ci  lwr.ci upr.ci
wald    -0.01593588 -0.1161839 0.08431216 -0.1162 0.0843
waldcc  -0.01593588 -0.1258963 0.09402456 -0.1259 0.0940
hal     -0.01593588 -0.1152115 0.08344028 -0.1152 0.0834
jp      -0.01593588 -0.1160196 0.08425300 -0.1160 0.0843
mee     -0.01593588 -0.1188116 0.08575506 -0.1188 0.0858
mn      -0.01593588 -0.1190646 0.08599690 -0.1191 0.0860
score   -0.01593588 -0.1177496 0.08507135 -0.1177 0.0851
scorecc -0.01593588 -0.1244598 0.09177173 -0.1245 0.0918
ha      -0.01593588 -0.1216254 0.08975363 -0.1216 0.0898
ac      -0.01593588 -0.1167570 0.08499946 -0.1168 0.0850
blj     -0.01593588 -0.1169742 0.08515773 -0.1170 0.0852
AndriSignorell commented 1 year ago

All results seem ok, from my point of view... Next one:

x <- unname(round(BinomDiffCI(56, 70, 48, 80, method = meth), 4)[, -1])
z <-   cbind(c(0.0575, 0.0441, 0.0535, 0.0531, 0.0534, 
               0.0528, 0.0524, 0.0428, 0.0494, 0.0525, 0.054), 
             c(0.3425, 0.3559, 0.3351, 0.3355, 0.3377, 
               0.3382, 0.3339, 0.3422, 0.3506, 0.3358, 0.34))

x - z

str(x)
str(z)

class(x)
class(z)
barracuda156 commented 1 year ago
> meth <- c("wald","waldcc","hal","jp","mee","mn","score","scorecc","ha","ac","blj")
> x <- unname(round(BinomDiffCI(56, 70, 48, 80, method = meth), 4)[, -1])
> z <-   cbind(c(0.0575, 0.0441, 0.0535, 0.0531, 0.0534, 
+                0.0528, 0.0524, 0.0428, 0.0494, 0.0525, 0.054), 
+              c(0.3425, 0.3559, 0.3351, 0.3355, 0.3377, 
+                0.3382, 0.3339, 0.3422, 0.3506, 0.3358, 0.34))
> x - z
      [,1]          [,2]
 [1,]    0  5.551115e-17
 [2,]    0  0.000000e+00
 [3,]    0  0.000000e+00
 [4,]    0  0.000000e+00
 [5,]    0  0.000000e+00
 [6,]    0  0.000000e+00
 [7,]    0 -5.551115e-17
 [8,]    0  0.000000e+00
 [9,]    0  0.000000e+00
[10,]    0  0.000000e+00
[11,]    0  0.000000e+00
> str(x)
 num [1:11, 1:2] 0.0575 0.0441 0.0535 0.0531 0.0534 0.0528 0.0524 0.0428 0.0494 0.0525 ...
> str(z)
 num [1:11, 1:2] 0.0575 0.0441 0.0535 0.0531 0.0534 0.0528 0.0524 0.0428 0.0494 0.0525 ...
> class(x)
[1] "matrix" "array" 
> class(z)
[1] "matrix" "array"
AndriSignorell commented 1 year ago

Ok, got it... Last one:

all.equal(x, z)
all(IsZero(x-z))
barracuda156 commented 1 year ago
> library(DescTools)
> meth <- c("wald","waldcc","hal","jp","mee","mn","score","scorecc","ha","ac","blj")
> x <- unname(round(BinomDiffCI(56, 70, 48, 80, method = meth), 4)[, -1])
> z <-   cbind(c(0.0575, 0.0441, 0.0535, 0.0531, 0.0534, 
+                0.0528, 0.0524, 0.0428, 0.0494, 0.0525, 0.054), 
+              c(0.3425, 0.3559, 0.3351, 0.3355, 0.3377, 
+                0.3382, 0.3339, 0.3422, 0.3506, 0.3358, 0.34))
> all.equal(x, z)
[1] TRUE
> all(IsZero(x-z))
[1] TRUE
AndriSignorell commented 1 year ago

ok, fixed. I will update with the next version...

barracuda156 commented 1 year ago

@AndriSignorell Thank you!

What turned out to be the cause though?

AndriSignorell commented 1 year ago

Well, testing whether floating point quantities are identical is not really recommended, especially the test may fail for builds without long doubles. I mean PowerPC belongs to this group.... To reliably test for x=z floats we are advised not to test x=z, but x-z < machine.eps. The reported difference above is lower than the machine eps on your system but not 0.

My mistake! ;-)

barracuda156 commented 1 year ago

Well, testing whether floating point quantities are identical is not really recommended, especially the test may fail for builds without long doubles. I mean PowerPC belongs to this group.... To reliably test for x=z floats we are advised not to test x=z, but x-z < machine.eps. The reported difference above is lower than the machine eps on your system but not 0.

My mistake! ;-)

Thank you!

P. S. PowerPC has long double, but IBM format of it (it is composed of two doubles).

AndriSignorell commented 1 year ago

This should be fixed in DescTools 0.99.49 which is meanwhile awailable. Could you check and close if ok?

barracuda156 commented 1 year ago

@AndriSignorell Thank you, can be closed!