Closed deviantony closed 4 years ago
A look at the route inside that container:
root@b2b8ccba8fe8:/# ip route
default via 172.18.0.1 dev eth1
10.0.1.0/24 dev eth0 proto kernel scope link src 10.0.1.20
172.18.0.0/16 dev eth1 proto kernel scope link src 172.18.0.3
root@b2b8ccba8fe8:/# ping 10.0.1.20
PING 10.0.1.20 (10.0.1.20): 56 data bytes
64 bytes from 10.0.1.20: icmp_seq=0 ttl=64 time=0.045 ms
64 bytes from 10.0.1.20: icmp_seq=1 ttl=64 time=0.044 ms
64 bytes from 10.0.1.20: icmp_seq=2 ttl=64 time=0.045 ms
^C--- 10.0.1.20 ping statistics ---
3 packets transmitted, 3 packets received, 0% packet loss
round-trip min/avg/max/stddev = 0.044/0.045/0.045/0.000 ms
Inspection of the overlay network:
docker -H <SWARM_MASTER>:4000 network inspect myOverlayNetwork
[
{
"Name": "myOverlayNetwork",
"Id": "9a2e391d4b0e6d03a7ff86d9285287b61df153c748f9222d7aeba4a5f0ab9a5c",
"Scope": "global",
"Driver": "overlay",
"EnableIPv6": false,
"IPAM": {
"Driver": "default",
"Options": {},
"Config": [
{
"Subnet": "10.0.1.0/24",
"Gateway": "10.0.1.1/24"
}
]
},
"Internal": false,
"Containers": {
"114138de4c2bc2386364220a04419d2a3be2d635ec76c54df7ebed80e7b51806": {
"Name": "container1",
"EndpointID": "43c26bbc64c7e0a3ed2bf444025cca111d678b07117c80596e4545844d8cf74b",
"MacAddress": "02:42:0a:00:01:0d",
"IPv4Address": "10.0.1.13/24",
"IPv6Address": ""
},
"17058c3b6581d634cf15e9e77d348f025d5e8ee6035d0a1d6d7db6a6925cbd27": {
"Name": "container2",
"EndpointID": "0b6abf162b1f9ce2fdc78d1476fdfe3f1f359d7687e613063a5bf64eabe87919",
"MacAddress": "02:42:0a:00:01:0c",
"IPv4Address": "10.0.1.12/24",
"IPv6Address": ""
},
"2cf7f30588de36237e80e8609a4d65176602cbfa3ac3a71bb1dfff558a540c53": {
"Name": "container3",
"EndpointID": "3197d4fc33ff1859d094905a31fd36a2b08637636392ce7e1fcc1b68c782d97b",
"MacAddress": "02:42:0a:00:01:12",
"IPv4Address": "10.0.1.18/24",
"IPv6Address": ""
},
"3a0b2bdfdcc6c17fc8dd372375b1f261b6b6fcbb979ced632aaafcd1775ff803": {
"Name": "container4",
"EndpointID": "a4b66dcbf22608e0bf24d0e192594e1d34715db19bb95045656b3ca42ce91723",
"MacAddress": "02:42:0a:00:01:03",
"IPv4Address": "10.0.1.3/24",
"IPv6Address": ""
},
"4b10fb9111531568e336fa622533d00ab40057d73e221d374506c5beb7740468": {
"Name": "container5",
"EndpointID": "1e5b3767906ec9abe3e2ceb8a973394264e9147afcb89a48b7de85d1e0606f8e",
"MacAddress": "02:42:0a:00:01:0a",
"IPv4Address": "10.0.1.10/24",
"IPv6Address": ""
},
"4dd69a2ffe96cc6caa6c2edc95445361faa65e61742c69272ed819478f54195e": {
"Name": "container6",
"EndpointID": "1bf431ebea140e4045d58a8c69bf68499bbdadfab33d444b3cba38941a97d462",
"MacAddress": "02:42:0a:00:01:13",
"IPv4Address": "10.0.1.19/24",
"IPv6Address": ""
},
"500360c93596ac2c5a912effd6d4680595c50536cd429770aab9efed6010025b": {
"Name": "container7",
"EndpointID": "d741114de2f1668794fe6a3e4ac8e352db9b86b424e4b7be1a8a339aee7a0203",
"MacAddress": "02:42:0a:00:01:10",
"IPv4Address": "10.0.1.16/24",
"IPv6Address": ""
},
"a9c72bfd0e32340474f18b27a29dfb1ea0094c2534143f3cb1efbf39414b3530": {
"Name": "container8",
"EndpointID": "e8f48eeee32b34901c2a85a4c8585bdb26a4820e9e67b3e13161a8ff317bacba",
"MacAddress": "02:42:0a:00:01:08",
"IPv4Address": "10.0.1.8/24",
"IPv6Address": ""
},
"ab67b57b39515d64dbfd98c227fd9d537b4df6e10ab70e1740bb317057167e51": {
"Name": "container9",
"EndpointID": "421699d322e2c0a4e93e37b48a6f4f6374558a2ab8c9b0bb891dfcb5be7f4b0e",
"MacAddress": "02:42:0a:00:01:05",
"IPv4Address": "10.0.1.5/24",
"IPv6Address": ""
},
"b2b8ccba8fe81c04d469e63fbc59547a5b4a0d97461197472826291ecb76c917": {
"Name": "container10",
"EndpointID": "2ca0601548ad7ef491b68e7ad03d09a08c5e8c2622020206829789aa2a0b8c1d",
"MacAddress": "02:42:0a:00:01:06",
"IPv4Address": "10.0.1.6/24",
"IPv6Address": ""
},
"b435e70748a05144158c035ac11e1766e7a056377f246a10b8f7605675368807": {
"Name": "container11",
"EndpointID": "9e7f3796b5857f156a94ae64f175f24ef04f6c52b1993f7cb19f74b4b2070fdd",
"MacAddress": "02:42:0a:00:01:0f",
"IPv4Address": "10.0.1.15/24",
"IPv6Address": ""
},
"b66fc584dae7c53b0c723dfd82a42e44c4b4d889cd3a6f07f4f31f7aaee78006": {
"Name": "container12",
"EndpointID": "5f0836545b102b883d690f2de5250c901f96ab03d3f0a82f5472c2ddbfb40c09",
"MacAddress": "02:42:0a:00:01:0b",
"IPv4Address": "10.0.1.11/24",
"IPv6Address": ""
},
"d50e02b35bc6f84daa794836cf983473f4ae6b67386c992b90acbe0827f49571": {
"Name": "container13",
"EndpointID": "b779f9b5103cb0b642694a6c6ca07197afc114f9c341583daec75e2337d617ca",
"MacAddress": "02:42:0a:00:01:02",
"IPv4Address": "10.0.1.2/24",
"IPv6Address": ""
},
"e7156882be28fd203db78786c65242668385e3d932c33e6dc8d5a925ce191001": {
"Name": "container14",
"EndpointID": "5b27db54da8f1db364d396b4e7866d1544fe2aa2970b77ce866722b626353854",
"MacAddress": "02:42:0a:00:01:07",
"IPv4Address": "10.0.1.7/24",
"IPv6Address": ""
},
"ebcaec0f1baeb651ba04b1a95ff08537f4bdfa9b8200fc612a47cfc2b3bfb67f": {
"Name": "container15",
"EndpointID": "8af22ee991c7411f82869a1197c3b60a996af5d074e307efc8e9757e75ed7aaa",
"MacAddress": "02:42:0a:00:01:04",
"IPv4Address": "10.0.1.4/24",
"IPv6Address": ""
},
"f3ceec0af7593c1e7d1cccdbdf776f6370d5c4300a856129314af3382da9173f": {
"Name": "container16",
"EndpointID": "99481c90adbec0d8fa008212ea39e799d1637cf39f365ff8533a47d70cf088fa",
"MacAddress": "02:42:0a:00:01:0e",
"IPv4Address": "10.0.1.14/24",
"IPv6Address": ""
},
"f5622cfba2ffa2761c0a77eee984334a1b32fe6436481114771ce9f5ffadf007": {
"Name": "container17",
"EndpointID": "8136756797e20cb86a738e46509247649f648cc9660a343ada9aa9486a1ac0ee",
"MacAddress": "02:42:0a:00:01:11",
"IPv4Address": "10.0.1.17/24",
"IPv6Address": ""
},
"fe696781382c8d773c9439991afe1652173e10fd50143021dfe4b086bc469a49": {
"Name": "container18",
"EndpointID": "4cda037b50721062fb9607ca30113bdb38274ea95d9204763b36d7122364523c",
"MacAddress": "02:42:0a:00:01:09",
"IPv4Address": "10.0.1.9/24",
"IPv6Address": ""
}
},
"Options": {},
"Labels": {}
}
]
After some testing, it seems that containers on the NODE6 can't communicate with any containers on the NODE1 only.
The tests that I've made before with container15 and 10.0.1.13 are IPs from containers on NODE1.
@deviantony Is this the same as #2161 ?
I'm receiving similar issues. Sometimes, the connection between containers times out.
There is any workaround or ETA to fix?
@doronp I'm not using Docker UCP but the described behaviour in that issue seems to be the same as mine.
Just found out the issue, a network issue on my side, Swarm node6 wasn't able to communicate with Swarm node1.
This explains this behaviour.
I'm reopening the issue cause this happened again this morning (I did solve the node connectivity issue)/
From time to time, some containers seems to be unable to communicate with others containers. Re-creating the containers having connectivity issues sometimes solves the problem, sometimes not.
I also had this weird behaviour two days ago:
Try to ping containerB from containerA: KO.
$ docker -H <SWARM_MANAGER>:4000 exec -i -t containerA bash -l -c 'ping containerB'
PING containerB (10.0.1.12) 56(84) bytes of data.
^C
--- db_balancer ping statistics ---
2 packets transmitted, 0 received, 100% packet loss, time 1007ms
Try to ping containerA from containerB: OK
$ docker -H <SWARM_MANAGER>:4000 exec -i -t containerB bash -l -c 'ping containerA'
PING containerA (10.0.1.8): 56 data bytes
64 bytes from 10.0.1.8: icmp_seq=0 ttl=64 time=0.559 ms
64 bytes from 10.0.1.8: icmp_seq=1 ttl=64 time=0.306 ms
64 bytes from 10.0.1.8: icmp_seq=2 ttl=64 time=0.301 ms
64 bytes from 10.0.1.8: icmp_seq=3 ttl=64 time=0.387 ms
64 bytes from 10.0.1.8: icmp_seq=4 ttl=64 time=0.276 ms
^C--- web2 ping statistics ---
5 packets transmitted, 5 packets received, 0% packet loss
round-trip min/avg/max/stddev = 0.276/0.366/0.559/0.104 ms
And the weirdest part, just after that, try again to ping containerB from containerA: OK
$ docker -H <SWARM_MANAGER>:4000 exec -i -t containerA bash -l -c 'ping containerB'
PING containerB (10.0.1.12) 56(84) bytes of data.
64 bytes from containerB.myOverlayNet (10.0.1.12): icmp_seq=1 ttl=64 time=0.342 ms
64 bytes from containerB.myOverlayNet (10.0.1.12): icmp_seq=2 ttl=64 time=0.226 ms
64 bytes from containerB.myOverlayNet (10.0.1.12): icmp_seq=3 ttl=64 time=0.247 ms
^C
--- db_balancer ping statistics ---
3 packets transmitted, 3 received, 0% packet loss, time 1998ms
rtt min/avg/max/mdev = 0.226/0.271/0.342/0.053 ms
Any news on this? Does it needs more information? I'm still having this issue where suddenly, one container is unable to speak to another and I can solve this by pinging the container in the reverse order (see my example above with containerA / containerB).
But I'm also having the following issues with some containers not able to reach other containers (and the ping solution does not work here):
$ docker -H <SWARM_MANAGER>:4000 exec -it containerC sh -c 'ping containerD'
PING containerD (10.0.1.6) 56(84) bytes of data.
From containerC (10.0.1.3) icmp_seq=1 Destination Host Unreachable
From containerC (10.0.1.3) icmp_seq=2 Destination Host Unreachable
From containerC (10.0.1.3) icmp_seq=3 Destination Host Unreachable
From containerC (10.0.1.3) icmp_seq=4 Destination Host Unreachable
$ docker -H <SWARM_MANAGER>:4000 exec -it containerA sh -c 'ping containerD'
PING containerD (10.0.1.6) 56(84) bytes of data.
64 bytes from containerD.myOverlayNetwork (10.0.1.6): icmp_seq=1 ttl=64 time=0.305 ms
64 bytes from containerD.myOverlayNetwork (10.0.1.6): icmp_seq=2 ttl=64 time=0.716 ms
64 bytes from containerD.myOverlayNetwork (10.0.1.6): icmp_seq=3 ttl=64 time=0.393 ms*
@deviantony hello,have you solved this question? i just have the same issue like yours, i hope that you can give me some suggestion, thank you very much
@Zeniubius Nope, we were heavily using the overlay network for our internal container communication when this issue occured so we switched to another container architecture where we .are now less using the overlay and never saw the issue occured again.
@deviantony @Zeniubius It looks like a one-direction MAC discovery failure in your example. Did you see bulk sync
errors in log, like Bulk sync to node ip-172-31-19-38 timed out
?
@Zeniubius what docker version are you using? If you have a one-direction ping problem, can you run docker -H <SWARM_MANAGER>:4000 exec -it containerA sh -c 'arp'
from both containers to see if the MAC for the destination is showing or not?
@dongluochen hi,it‘s a pity i do not know how to see bulk sync errors in log,here is my docker version
kfzs@test1:~$ docker version
Client:
Version: 1.12.1
API version: 1.24
Go version: go1.6.3
Git commit: 23cf638
Built: Thu Aug 18 05:33:38 2016
OS/Arch: linux/amd64
Server:
Version: 1.12.1
API version: 1.24
Go version: go1.6.3
Git commit: 23cf638
Built: Thu Aug 18 05:33:38 2016
OS/Arch: linux/amd64
and it's not a one-direction ping problem,but a both-direction ping problem,for example,i hava containerA,containerB in hostA,and containerC containerD in hostB, i can ping two-way from A and B,two-way from C and D,but i can't ping C or D from A or B,neither can i ping A or B from C or D. By the way,when i run docker -H
kfzs@test1:~$ docker -H :4000 exec -it containerA sh -c 'arp'
Address HWtype HWaddress Flags Mask Iface
kfserver.swarm_overlay ether 02:42:c0:a8:02:03 C eth0
172.18.0.1 ether 02:42:82:f4:01:25 C eth1
3177df75e6e3.swarm_over (incomplete) eth0
kfzs@test1:~$ docker -H :4000 exec -it containerB sh -c 'arp'
Address HWtype HWaddress Flags Mask Iface
kfclient.swarm_overlay ether 02:42:c0:a8:02:02 C eth0
kfserver.swarm_overlay (incomplete) eth0
a3f4fecc0d5f.swarm_over (incomplete) eth0
the swarm_overlay is a overlay network that i created,but it's also a pity i do not know what dose incomplete means,i hope you can help me,thank you very much.
@Zeniubius You can search for bulk sync
in your docker logs. On different systems daemon log location is different. See http://stackoverflow.com/questions/30969435/where-is-the-docker-daemon-log .
I suspect your problem is not opening corresponding network ports. You need to make sure the following ports are allowed in your firewalls.
Protocol Port Description
udp 4789 Data plane (VXLAN)
tcp/udp 7946 Control plane
@dongluochen oh yes,i think so ,i have checked the 4789 and 7946 port before, i found that this two ports are allowed in containerA,but just 4789 in containerB,so i tried to open 7946 by
iptables -A INPUT -p tcp --dport 7946 -j ACCEPT
iptables -A INPUT -p udp --dport 7946 -j ACCEPT
but i failed,and i gave me nothing for message or logs
Hello there,
I've got an issue in my swarm cluster with some containers unable to communicate with containers in others nodes via an overlay network.
Here is my setup:
Info:
The issue is only present on the containers inside Swarm NODE6. I can't ping any containers inside NODE6 from the others nodes.
Now, if I log inside a container on the NODE6, I can ping some containers but not every of them.
It seems that I can't ping containers that are running on a particular node of the Swarm cluster (NODE1).
Note that it works if I try from any other node (containers on NODE2, NODE3, NODE4, NODE5).
I've created the overlay network using the default command:
Any ideas?