influxdata / influxdb

Scalable datastore for metrics, events, and real-time analytics
https://influxdata.com
Apache License 2.0
28.84k stars 3.55k forks source link

add aggregate functions: skewness & kurtosis #5088

Closed hillar closed 5 years ago

hillar commented 8 years ago

Please add aggregate functions to measure the shape: 1) skewness (amount and direction of skew) 2) kurtosis (how tall and sharp)

hillar commented 8 years ago
package main

import "fmt"
import "math"

func Kurtosis(data []float64) interface{} {

    /*

        Kurtosis is a measure of how outlier-prone a distribution is.
        The kurtosis of the normal distribution is 3. Distributions
        that are more outlier-prone than the normal distribution have
        kurtosis greater than 3; distributions that are less
        outlier-prone have kurtosis less than 3.

        ! In this function we subtract 3 from the computed value,
        so that the normal distribution has kurtosis of 0.
        Like in R.

    */

    // shape of vector len of three, imagine that ;)
    if len(data) < 4 {
        return nil
    }

    // Get the mean
    var mean float64
    var count int
    for _, v := range data {
        count++
        mean += (v - mean) / float64(count)
    }
    // Get the variance
    var variance float64
    for _, v := range data {
        dif := v - mean
        sq := math.Pow(dif, 2)
        variance += sq
    }
    variance = variance / float64(count-1)

    // Get the kurtosis
    sum := 0.0
    for _, v := range data {
        delta := v - mean
        sum += delta * delta * delta * delta
    }

    kurtosis := sum / (variance * variance) / float64(count)

    return kurtosis - 3

}

func main() {

    /*

        > library(e1071)
        > d <- c(3.567, 4.500, 4.150, 3.817, 3.917, 4.450, 2.000, 4.283, 4.767, 4.533, 1.850, 4.250)
        > kurtosis(d)
        [1] -0.1419239

        TODO check that against MathLab ....

    */

    R := -0.1419239
    act := []float64{3.567, 4.500, 4.150, 3.817, 3.917, 4.450, 2.000, 4.283, 4.767, 4.533, 1.850, 4.250}
    k := Kurtosis(act)

    fmt.Println(R)
    fmt.Println(k)

}
hillar commented 8 years ago
package main

import "fmt"
import "math"

func Skewness(data []float64) interface{} {

    /*

        Skewness is a measure of the asymmetry of the data around the sample mean.
        If skewness is negative, the data are spread out more to the left of the
        mean than to the right. If skewness is positive, the data are spread out
        more to the right.
        The skewness of the normal distribution is zero.

    */

    // shape of vector len of three, imagine that ;)
    if len(data) < 4 {
        return nil
    }

    // Get the mean
    var mean float64
    var count int
    for _, v := range data {
        count++
        mean += (v - mean) / float64(count)
    }
    // Get the variance
    var variance float64
    for _, v := range data {
        dif := v - mean
        sq := math.Pow(dif, 2)
        variance += sq
    }
    variance = math.Sqrt(variance / float64(count-1))

    // Get the skewness
    sum := 0.0
    for _, v := range data {
        delta := v - mean
        sum += delta * delta * delta
    }

    skewness := sum / float64(count) / (variance * variance * variance)

    return skewness

}

func main() {

    /*

        > library(e1071)
        > d <- c(3.567, 4.500, 4.150, 3.817, 3.917, 4.450, 2.000, 4.283, 4.767, 4.533, 1.850, 4.250)
        > skewness(d)
        [1] -1.177458

        TODO check that against MathLab ....

    */

    R := -1.177458
    act := []float64{3.567, 4.500, 4.150, 3.817, 3.917, 4.450, 2.000, 4.283, 4.767, 4.533, 1.850, 4.250}
    s := Skewness(act)

    fmt.Println(R)
    fmt.Println(s)

}
russorat commented 5 years ago

we have no plans to add these to influxql. please follow the issues in Flux for resolution.