swift-server / swift-prometheus

Prometheus client library for Swift
https://swiftpackageindex.com/swift-server/swift-prometheus
Apache License 2.0
142 stars 30 forks source link

Inconsistent: Summary and Histogram deal with dimensional values differently than Counter #77

Open fabianfett opened 2 years ago

fabianfett commented 2 years ago

When I was using the Summary and Histogram type I noticed, that if I record a value for a metric with name and dimensions, the value is also recorded for the dimension less version of the metric. This is in contrast to the Counter, where an increment on a Counter that was created with a dimension, does not increment the dimension less counter.

We should have a consistent behavior here. @ktoso do you know by any chance what is correct?

Steps to reproduce

    func testHistogramWithLabels() {
        let prom = PrometheusClient()
        var config = PrometheusMetricsFactory.Configuration()
        config.timerImplementation = .histogram()

        let metricsFactory = PrometheusMetricsFactory(client: prom)
        let timer = metricsFactory.makeTimer(label: "duration_nanos", dimensions: [("foo", "bar")])
        timer.recordNanoseconds(1)

        let counter = metricsFactory.makeCounter(label: "simple_counter", dimensions: [("foo", "bar")])
        counter.increment(by: 1)

        prom.collect() { (output: String) in
            print(output)

            XCTAssert(output.utf8.hasSubSequence(#"duration_nanos_count 1"#.utf8)) // <--- 🚨 inconsistent part 1
            XCTAssert(output.utf8.hasSubSequence(#"duration_nanos_count{foo="bar"} 1"#.utf8))

            XCTAssert(output.utf8.hasSubSequence(#"simple_counter 0"#.utf8)) // <--- 🚨 inconsistent part 1
            XCTAssert(output.utf8.hasSubSequence(#"simple_counter{foo="bar"} 1"#.utf8))
        }
    }

    func testSummaryWithLabels() {
        let prom = PrometheusClient()
        var config = PrometheusMetricsFactory.Configuration()
        config.timerImplementation = .summary()

        let metricsFactory = PrometheusMetricsFactory(client: prom)
        let timer = metricsFactory.makeTimer(label: "duration_nanos", dimensions: [("foo", "bar")])
        timer.recordNanoseconds(1)

        let counter = metricsFactory.makeCounter(label: "simple_counter", dimensions: [("foo", "bar")])
        counter.increment(by: 1)

        prom.collect() { (output: String) in
            print(output)

            XCTAssert(output.utf8.hasSubSequence(#"duration_nanos_count 1"#.utf8)) // <--- 🚨 inconsistent part 2
            XCTAssert(output.utf8.hasSubSequence(#"duration_nanos_count{foo="bar"} 1"#.utf8))

            XCTAssert(output.utf8.hasSubSequence(#"simple_counter 0"#.utf8)) // <--- 🚨 inconsistent part 2
            XCTAssert(output.utf8.hasSubSequence(#"simple_counter{foo="bar"} 1"#.utf8))
        }
    }
}

extension Sequence {
    func hasSubSequence<Other: Collection>(_ other: Other) -> Bool where Other.Element == Self.Element, Other.Element: Equatable {
        var otherIterator = other.makeIterator()

        for element in self {
            let next = otherIterator.next()
            if element == next {
                continue
            } else if next == nil {
                return true
            } else {
                // unequal. reset iterator
                otherIterator = other.makeIterator()
            }
        }

        // check if other is also done
        if otherIterator.next() == nil {
            return true
        }

        return false
    }
}

Environment

SwiftPrometheus: 1.0.0

ktoso commented 2 years ago

Hmmm, yeah we should definitely strive for consistent behavior; I don't know which way is the right one to correct towards though, we'd have to check other impls and see.

armandgrillet commented 1 year ago

I've simplified the tests a bit and compared to the official Go client so that we see what an official implementation returns.

The tests

In Swift, the test file looks like this:

import XCTest
import Prometheus
@testable import SwiftPrometheus77

final class SwiftPrometheus77Tests: XCTestCase {
    func testHistogramWithLabels() {
        let prom = PrometheusClient()
        let hist = prom.createHistogram(forType: Int.self, named: "duration_nanos")
        hist.observe(1, [("foo", "bar")])

        prom.collect() { (output: String) in
            print(output)
        }

        print("---")
    }

    func testCounterWithLabels() {
        let prom = PrometheusClient()
        let counter = prom.createCounter(forType: Int.self, named: "simple_counter")
        counter.inc(1, [("foo", "bar")])

        prom.collect() { (output: String) in
            print(output)
        }

        print("---")
    }
}

In Go, the test file looks like this:

package main

import (
    "bytes"
    "fmt"
    "testing"

    "github.com/prometheus/client_golang/prometheus"
    "github.com/prometheus/common/expfmt"
)

func TestHistogramWithLabels(t *testing.T) {
    registry := prometheus.NewRegistry()

    timer := prometheus.NewHistogramVec(
        prometheus.HistogramOpts{
            Name: "duration_nanos",
        },
        []string{"foo"},
    )
    registry.MustRegister(timer)
    timer.WithLabelValues("bar").Observe(1)

    printMetrics(registry)

    fmt.Println("---")
}

func TestCounterWithLabels(t *testing.T) {
    registry := prometheus.NewRegistry()

    counter := prometheus.NewCounterVec(
        prometheus.CounterOpts{
            Name: "simple_counter",
        },
        []string{"foo"},
    )
    registry.MustRegister(counter)
    counter.WithLabelValues("bar").Add(1)

    printMetrics(registry)
}

func printMetrics(registry *prometheus.Registry) {
    gathering, err := registry.Gather()
    if err != nil {
        fmt.Println(err)
    }

    out := &bytes.Buffer{}
    for _, mf := range gathering {
        if _, err := expfmt.MetricFamilyToText(out, mf); err != nil {
            panic(err)
        }
    }
    fmt.Print(out.String())
}

The results

Test counter

Swift:

# TYPE simple_counter counter
simple_counter 0
simple_counter{foo="bar"} 1

Go:

# HELP simple_counter
# TYPE simple_counter counter
simple_counter{foo="bar"} 1

Test histogram

Swift:

# TYPE duration_nanos histogram
duration_nanos_bucket{le="0.005"} 0
duration_nanos_bucket{le="0.01"} 0
duration_nanos_bucket{le="0.025"} 0
duration_nanos_bucket{le="0.05"} 0
duration_nanos_bucket{le="0.1"} 0
duration_nanos_bucket{le="0.25"} 0
duration_nanos_bucket{le="0.5"} 0
duration_nanos_bucket{le="1.0"} 1
duration_nanos_bucket{le="2.5"} 1
duration_nanos_bucket{le="5.0"} 1
duration_nanos_bucket{le="10.0"} 1
duration_nanos_bucket{le="+Inf"} 1
duration_nanos_count 1
duration_nanos_sum 1
duration_nanos_bucket{le="0.005", foo="bar"} 0
duration_nanos_bucket{le="0.01", foo="bar"} 0
duration_nanos_bucket{le="0.025", foo="bar"} 0
duration_nanos_bucket{le="0.05", foo="bar"} 0
duration_nanos_bucket{le="0.1", foo="bar"} 0
duration_nanos_bucket{le="0.25", foo="bar"} 0
duration_nanos_bucket{le="0.5", foo="bar"} 0
duration_nanos_bucket{le="1.0", foo="bar"} 1
duration_nanos_bucket{le="2.5", foo="bar"} 1
duration_nanos_bucket{le="5.0", foo="bar"} 1
duration_nanos_bucket{le="10.0", foo="bar"} 1
duration_nanos_bucket{le="+Inf", foo="bar"} 1
duration_nanos_count{foo="bar"} 1
duration_nanos_sum{foo="bar"} 1

Go:

# HELP duration_nanos
# TYPE duration_nanos histogram
duration_nanos_bucket{foo="bar",le="0.005"} 0
duration_nanos_bucket{foo="bar",le="0.01"} 0
duration_nanos_bucket{foo="bar",le="0.025"} 0
duration_nanos_bucket{foo="bar",le="0.05"} 0
duration_nanos_bucket{foo="bar",le="0.1"} 0
duration_nanos_bucket{foo="bar",le="0.25"} 0
duration_nanos_bucket{foo="bar",le="0.5"} 0
duration_nanos_bucket{foo="bar",le="1"} 1
duration_nanos_bucket{foo="bar",le="2.5"} 1
duration_nanos_bucket{foo="bar",le="5"} 1
duration_nanos_bucket{foo="bar",le="10"} 1
duration_nanos_bucket{foo="bar",le="+Inf"} 1
duration_nanos_sum{foo="bar"} 1
duration_nanos_count{foo="bar"} 1

I hope this helps.

fabianfett commented 1 year ago

Thank you @armandgrillet! That’s an awesome contribution!

hassila commented 1 year ago

See also https://github.com/swift-server-community/SwiftPrometheus/issues/80 which includes a possible fix in the link.