In theory, when ErrorPercentThreshold is bigger than 100, the circuit should be always closed. But there are some exception. If you run the code below, you will find out that the circuit may possibly be open.
package main
import (
"fmt"
"github.com/afex/hystrix-go/hystrix"
"log"
"math/rand"
"sync"
"time"
)
func main() {
f := time.Millisecond * 200
cun := 1000
num := cun * 3
hystrix.SetLogger(log.Default())
hystrix.ConfigureCommand("my_command", hystrix.CommandConfig{
Timeout: int(f.Milliseconds()),
MaxConcurrentRequests: cun,
RequestVolumeThreshold: 30,
SleepWindow: 30,
ErrorPercentThreshold: 120,
})
var lock sync.Mutex
var wg sync.WaitGroup
rand.Seed(int64(time.Now().Nanosecond()))
mm := make(map[string]int)
mark := 0
now := time.Now()
for i := 0; i < num; i++ {
wg.Add(1)
go func() {
defer wg.Done()
err := hystrix.Do("my_command", func() error {
a := rand.Intn(5)
if a < 1 {
time.Sleep(time.Millisecond * 10)
return fmt.Errorf("internal error")
} else if a >= 4 {
time.Sleep(time.Millisecond * 300)
return nil
} else {
time.Sleep(time.Millisecond * 9)
lock.Lock()
if _, found := mm["success"]; found {
mm["success"]++
} else {
mm["success"] = 1
}
lock.Unlock()
return nil
}
}, func(err error) error {
if err != nil {
lock.Lock()
if err.Error() == "hystrix: circuit open" && mark == 0 {
log.Println("circuit on at: ", time.Now().Sub(now).Milliseconds())
mark = 1
}
if _, found := mm[err.Error()]; found {
mm[err.Error()]++
} else {
mm[err.Error()] = 1
}
lock.Unlock()
} else {
panic("err is nil")
}
return nil
})
if err != nil {
fmt.Println("err: ", err)
}
}()
}
wg.Wait()
log.Println("end at: ", time.Now().Sub(now).Milliseconds())
count := 0
for key, value := range mm {
count += value
fmt.Printf("%s: %f\n", key, float32(value) / float32(num))
}
if count != num {
panic("don't match")
}
}
Bug description
In theory, when ErrorPercentThreshold is bigger than 100, the circuit should be always closed. But there are some exception. If you run the code below, you will find out that the circuit may possibly be open.
In https://github.com/afex/hystrix-go/blob/fa1af6a1f4f56e0e50d427fe901cd604d8c6fb8a/hystrix/metrics.go#L138, the value of errs may be larger than that of reqs because the code calculate the errs later than reqs.
Resolution
One simple way to fix the problem is to make the circuit always heathy when ErrorPercentThreshold is bigger than 100 like: