Analyse QPS data for robustness tests

What would you like to be added?

Prow page for a test includes "Artifacts" link, which includes a a file with log. https://gcsweb.k8s.io/gcs/kubernetes-ci-logs/logs/ci-etcd-robustness-main-amd64/1858942059486908416/artifacts/

What if we could parse the file and track the progress over time? No longer guesses over what QPS is good What if we could do analysis for different dimensions? We could find test scenarios that might need improvemnt What if we could visualize it? Like https://perf-dash.k8s.io/#/?jobname=gce-5000Nodes&metriccategoryname=APIServer&metricname=LoadResponsiveness_Prometheus&Resource=pods&Scope=cluster&Subresource=&Verb=LIST

Script to parse it

import json

with open("test.out") as file:
  for line in file.readlines():
    if 'Reporting traffic before failure injection' not in line:
      continue
    log_data = json.loads(line)
    metrics_data = json.loads(log_data["Output"].split('\t')[-1])
    print(log_data["Test"], float(metrics_data["qps"]))

Why is this needed?

Track impact of QPS for tests over time.

Rewrote in golang

package main

import (
    "bufio"
    "encoding/json"
    "fmt"
    "io"
    "net/http"
    "strings"
)

type objects struct {
    kind string
    //items []object
}

type object struct {
    mediaLink   string
    timeCreated string
}

func main() {
    resp, err := http.Get("https://storage.googleapis.com/storage/v1/b/kubernetes-ci-logs/o/?prefix=logs/ci-etcd-robustness-&matchGlob=**/artifacts/*.stdout")
    if err != nil {
        panic(fmt.Sprintf("Failed to fetch GCS items"))
    }
    defer resp.Body.Close()
    data, err := io.ReadAll(resp.Body)
    if err != nil {
        panic(fmt.Sprintf("Failed to read GCS items"))
    }
    var objs map[string]interface{}
    err = json.Unmarshal(data, &objs)
    if err != nil {
        panic(fmt.Sprintf("Failed to parse GCS items"))
    }
    items := objs["items"].([]interface{})
    fmt.Printf("Collected %d\n", len(items))
    for _, item := range items[:1] {
        obj := item.(map[string]interface{})
        resp, err := http.Get(obj["mediaLink"].(string))
        if err != nil {
            fmt.Printf("Failed to read build-log.txt file\n")
            continue
        }
        defer resp.Body.Close()
        scan := bufio.NewScanner(resp.Body)
        type TestLog struct {
            Output string
            Test   string
        }
        var log TestLog
        for scan.Scan() {
            err := json.Unmarshal(scan.Bytes(), &log)
            if err != nil {
                continue
            }
            if !strings.Contains(log.Output, "Reporting traffic before failure injection") {
                continue
            }
            stats := strings.SplitN(log.Output, "Reporting traffic before failure injection", 2)[1]
            parsedStats := map[string]interface{}{}
            err = json.Unmarshal([]byte(stats), &parsedStats)
            if err != nil {
                fmt.Printf("Failed to parse traffic stats\n")
            }
            fmt.Printf("%s %f\n", log.Test, parsedStats["qps"])
        }
    }
}

etcd-io / etcd

Analyse QPS data for robustness tests #18931

What would you like to be added?

Why is this needed?