coreos / etcd-operator

etcd operator creates/configures/manages etcd clusters atop Kubernetes
https://coreos.com/blog/introducing-the-etcd-operator.html
Apache License 2.0
1.75k stars 740 forks source link

use etcd client url when add member or update member #2148

Open wppzxc opened 4 years ago

wppzxc commented 4 years ago

Sometimes the etcd pod is not ready but running. At this time, etcd-operator adding members may get a timeout response, but this time the request can also be processed by etcd. In this case, etcd will have one more exception member. Etcd-operator will keep reporting "failed to reconcile: lost quorum".

wppzxc commented 4 years ago

etcd-operator error logs like:

time="2019-12-25T00:17:26Z"_** level=info msg="cluster created with seed member (etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7-dl9f8c67sd)" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster

time="2019-12-25T00:17:27Z" level=info msg="start running..." cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster

time="2019-12-25T00:17:28Z" level=info msg="Start reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=206b330f-7df3-4a29-8a52-20f9fc3207b7 pkg=cluster

time="2019-12-25T00:17:28Z" level=info msg="Finish reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=206b330f-7df3-4a29-8a52-20f9fc3207b7 pkg=cluster time="2019-12-25T00:17:35Z" level=info msg="Start reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:17:35Z" level=info msg="running members: etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7-dl9f8c67sd" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:17:35Z" level=info msg="cluster membership: etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7-dl9f8c67sd" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:17:36Z" level=info msg="Start reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=206b330f-7df3-4a29-8a52-20f9fc3207b7 pkg=cluster time="2019-12-25T00:17:36Z" level=info msg="Finish reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=206b330f-7df3-4a29-8a52-20f9fc3207b7 pkg=cluster time="2019-12-25T00:17:40Z" level=info msg="Finish reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:17:40Z" level=error msg="failed to reconcile: fail to add new member (etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7-jd8hb59r9k): context deadline exceeded" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:17:44Z" level=info msg="Start reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=206b330f-7df3-4a29-8a52-20f9fc3207b7 pkg=cluster time="2019-12-25T00:17:44Z" level=info msg="Finish reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=206b330f-7df3-4a29-8a52-20f9fc3207b7 pkg=cluster time="2019-12-25T00:17:52Z" level=info msg="Start reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=206b330f-7df3-4a29-8a52-20f9fc3207b7 pkg=cluster time="2019-12-25T00:17:52Z" level=info msg="Finish reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=206b330f-7df3-4a29-8a52-20f9fc3207b7 pkg=cluster time="2019-12-25T00:17:53Z" level=error msg="failed to update members: list members failed: creating etcd client failed: dial tcp: lookup etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7-dl9f8c67sd.etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7.default.svc on 10.254.0.10:53: no such host" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:18:00Z" level=info msg="Start reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=206b330f-7df3-4a29-8a52-20f9fc3207b7 pkg=cluster time="2019-12-25T00:18:00Z" level=info msg="Finish reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=206b330f-7df3-4a29-8a52-20f9fc3207b7 pkg=cluster time="2019-12-25T00:18:01Z" level=info msg="Start reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:18:01Z" level=info msg="running members: etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7-dl9f8c67sd" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:18:01Z" level=info msg="cluster membership: etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7-jd8hb59r9k,etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7-dl9f8c67sd" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:18:01Z" level=info msg="Finish reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:18:01Z" level=error msg="failed to reconcile: lost quorum" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:18:08Z" level=info msg="Start reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=206b330f-7df3-4a29-8a52-20f9fc3207b7 pkg=cluster time="2019-12-25T00:18:08Z" level=info msg="Finish reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=206b330f-7df3-4a29-8a52-20f9fc3207b7 pkg=cluster time="2019-12-25T00:18:09Z" level=info msg="Start reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:18:09Z" level=info msg="running members: etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7-dl9f8c67sd" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:18:09Z" level=info msg="cluster membership: etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7-jd8hb59r9k,etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7-dl9f8c67sd" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:18:09Z" level=info msg="Finish reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:18:09Z" level=error msg="failed to reconcile: lost quorum" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:18:16Z" level=info msg="Start reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=206b330f-7df3-4a29-8a52-20f9fc3207b7 pkg=cluster time="2019-12-25T00:18:16Z" level=info msg="Finish reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=206b330f-7df3-4a29-8a52-20f9fc3207b7 pkg=cluster time="2019-12-25T00:18:17Z" level=info msg="Start reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:18:17Z" level=info msg="running members: etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7-dl9f8c67sd" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:18:17Z" level=info msg="cluster membership: etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7-jd8hb59r9k,etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7-dl9f8c67sd" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:18:17Z" level=info msg="Finish reconciling" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster time="2019-12-25T00:18:17Z" level=error msg="failed to reconcile: lost quorum" cluster-name=etcd-206b330f-7df3-4a29-8a52-20f9fc3207b7 cluster-namespace=default pkg=cluster