Closed kimbyungeun closed 2 years ago
hi @kimbyungeun - so all of these processes are running on the same machine? If you have two nodes running on identical IP + Port combinations that's not really supported by TCP as a server can only really listen in on a single port. The socket.bind
operation should have failed here and prevented the second ActorSystem
from binding to the same port - especially given that you're running on port 0 in your HOCON configuration, which tells the OS to give you a random, but available high-order port.
So I'm not totally clear on what the issue is here because this configuration looks like it shouldn't happen due to how the underlying network stack works - how are both of these clients binding to the same port?
hi @Aaronontheweb Thank you for answer.
dot-netty.tcp.port = 0
, I did not specify a Port.Node App 1 execute process.start()
var process = new Process
{
StartInfo = new ProcessStartInfo("Test.Executor.exe", arguments)
};
process.StartInfo.UseShellExecute = false;
process.StartInfo.RedirectStandardOutput = true;
process.Start()
Are all of nodes running inside the same docker image? how do you wire all these docker images?
Ah, I missed the detail about Docker - so both of the clients are running in separate Docker containers?
@Aaronontheweb @Arkatufus Thank you for answer.
Sorry. I'm missing some very important information.
I use docker container like virtual os.
Create a bash docker container and docker container run the Node App1, Client App1, Client App2
from inside.
[
{
"Id": "c99f44445544e90f2f7c534712e035c9f4e51d8258ba1c9df376f8130343e722",
"Created": "2022-07-21T00:32:28.175648383Z",
"Path": "sh",
"Args": [
"-c",
"/home/mirero/boot.sh&&/bin/bash"
],
"State": {
"Status": "running",
"Running": true,
"Paused": false,
"Restarting": false,
"OOMKilled": false,
"Dead": false,
"Pid": 40757,
"ExitCode": 0,
"Error": "",
"StartedAt": "2022-08-03T01:08:12.791583189Z",
"FinishedAt": "2022-08-03T01:08:08.398863143Z"
},
"Image": "sha256:5d05d3322fa915c4e8428ef1c1def99512c60e66556dae788123f0ea1b09932e",
"ResolvConfPath": "/var/lib/docker/containers/c99f44445544e90f2f7c534712e035c9f4e51d8258ba1c9df376f8130343e722/resolv.conf",
"HostnamePath": "/var/lib/docker/containers/c99f44445544e90f2f7c534712e035c9f4e51d8258ba1c9df376f8130343e722/hostname",
"HostsPath": "/var/lib/docker/containers/c99f44445544e90f2f7c534712e035c9f4e51d8258ba1c9df376f8130343e722/hosts",
"LogPath": "/var/lib/docker/containers/c99f44445544e90f2f7c534712e035c9f4e51d8258ba1c9df376f8130343e722/c99f44445544e90f2f7c534712e035c9f4e51d8258ba1c9df376f8130343e722-json.log",
"Name": "/stupefied_colden",
"RestartCount": 0,
"Driver": "overlay2",
"Platform": "linux",
"MountLabel": "",
"ProcessLabel": "",
"AppArmorProfile": "docker-default",
"ExecIDs": null,
"HostConfig": {
"Binds": [
"/home/mirero/mls_system:/home/mirero/mls_system"
],
"ContainerIDFile": "",
"LogConfig": {
"Type": "json-file",
"Config": {}
},
"NetworkMode": "default",
"PortBindings": {
"137/tcp": [
{
"HostIp": "",
"HostPort": "137"
}
],
"21/tcp": [
{
"HostIp": "",
"HostPort": "21"
}
],
"22/tcp": [
{
"HostIp": "",
"HostPort": "2222"
}
],
"445/tcp": [
{
"HostIp": "",
"HostPort": "445"
}
],
"5000/tcp": [
{
"HostIp": "",
"HostPort": "5000"
}
],
"5901/tcp": [
{
"HostIp": "",
"HostPort": "5901"
}
],
"6006/tcp": [
{
"HostIp": "",
"HostPort": "6006"
}
],
"8888/tcp": [
{
"HostIp": "",
"HostPort": "8888"
}
],
"8901/tcp": [
{
"HostIp": "",
"HostPort": "8901"
}
],
"8902/tcp": [
{
"HostIp": "",
"HostPort": "8902"
}
],
"8903/tcp": [
{
"HostIp": "",
"HostPort": "8903"
}
],
"8904/tcp": [
{
"HostIp": "",
"HostPort": "8904"
}
],
"8905/tcp": [
{
"HostIp": "",
"HostPort": "8905"
}
],
"8931/tcp": [
{
"HostIp": "",
"HostPort": "8931"
}
],
"8932/tcp": [
{
"HostIp": "",
"HostPort": "8932"
}
],
"8933/tcp": [
{
"HostIp": "",
"HostPort": "8933"
}
],
"8934/tcp": [
{
"HostIp": "",
"HostPort": "8934"
}
],
"8935/tcp": [
{
"HostIp": "",
"HostPort": "8935"
}
],
"8951/tcp": [
{
"HostIp": "",
"HostPort": "8951"
}
],
"8952/tcp": [
{
"HostIp": "",
"HostPort": "8952"
}
],
"8953/tcp": [
{
"HostIp": "",
"HostPort": "8953"
}
],
"8954/tcp": [
{
"HostIp": "",
"HostPort": "8954"
}
],
"8955/tcp": [
{
"HostIp": "",
"HostPort": "8955"
}
],
"9141/tcp": [
{
"HostIp": "",
"HostPort": "9141"
}
],
"9142/tcp": [
{
"HostIp": "",
"HostPort": "9142"
}
],
"9161/tcp": [
{
"HostIp": "",
"HostPort": "9161"
}
],
"9162/tcp": [
{
"HostIp": "",
"HostPort": "9162"
}
]
},
"RestartPolicy": {
"Name": "always",
"MaximumRetryCount": 0
},
"AutoRemove": false,
"VolumeDriver": "",
"VolumesFrom": null,
"CapAdd": null,
"CapDrop": null,
"Dns": [],
"DnsOptions": [],
"DnsSearch": [],
"ExtraHosts": null,
"GroupAdd": null,
"IpcMode": "shareable",
"Cgroup": "",
"Links": null,
"OomScoreAdj": 0,
"PidMode": "",
"Privileged": false,
"PublishAllPorts": false,
"ReadonlyRootfs": false,
"SecurityOpt": null,
"UTSMode": "",
"UsernsMode": "",
"ShmSize": 67108864,
"Runtime": "nvidia",
"ConsoleSize": [
0,
0
],
"Isolation": "",
"CpuShares": 0,
"Memory": 0,
"NanoCpus": 0,
"CgroupParent": "",
"BlkioWeight": 0,
"BlkioWeightDevice": [],
"BlkioDeviceReadBps": null,
"BlkioDeviceWriteBps": null,
"BlkioDeviceReadIOps": null,
"BlkioDeviceWriteIOps": null,
"CpuPeriod": 0,
"CpuQuota": 0,
"CpuRealtimePeriod": 0,
"CpuRealtimeRuntime": 0,
"CpusetCpus": "",
"CpusetMems": "",
"Devices": [],
"DeviceCgroupRules": null,
"DiskQuota": 0,
"KernelMemory": 0,
"MemoryReservation": 0,
"MemorySwap": 0,
"MemorySwappiness": null,
"OomKillDisable": false,
"PidsLimit": 0,
"Ulimits": null,
"CpuCount": 0,
"CpuPercent": 0,
"IOMaximumIOps": 0,
"IOMaximumBandwidth": 0,
"MaskedPaths": [
"/proc/acpi",
"/proc/kcore",
"/proc/keys",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/proc/scsi",
"/sys/firmware"
],
"ReadonlyPaths": [
"/proc/asound",
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger"
]
},
"GraphDriver": {
"Data": {
"LowerDir": "/var/lib/docker/overlay2/a7ceb4e085f72e54b66b61df3258960ac3bc88aafab5a60efcfbeb8203597db4-init/diff:/var/lib/docker/overlay2/770b19665218649d077ed1a137fd4d8c58d66d9f4196a1f2fc34fa5a3596ebdd/diff:/var/lib/docker/overlay2/38f86670a4a1b03bc401665354f6efb00fb640403d56324831d5498d56737cf5/diff:/var/lib/docker/overlay2/0dadb1207aa38e122aef7939ee148c3aee31f6acb2ae603004b29f8f675c781b/diff:/var/lib/docker/overlay2/1fc8b621d943cc444044bc4c21a9f0906ce6245b930f6b96862ffdaf8229a514/diff:/var/lib/docker/overlay2/ab11380a1881b28394d966878888c3f0c858e72d741705babb4978e3fd3a6a29/diff:/var/lib/docker/overlay2/b54beede1e992e5c4f25062b4db0d379c5743d614f03e46d6bafa28b70cf7c48/diff:/var/lib/docker/overlay2/65943e8e983287aebfa4e1dd451787c81aff8f9902bd25d43514da772556ae77/diff:/var/lib/docker/overlay2/8165210dd57e91b3abebb89b31b5c54a70a9c6ea775091aa11c87520bdfec196/diff:/var/lib/docker/overlay2/f58d445d486c60497768d3729e315ec6000a23367099442d9b437eea1c312ba7/diff:/var/lib/docker/overlay2/8023e8bafff5505650bde0f220771637cca5a26d72c6d08ba73c464a4f9412b0/diff:/var/lib/docker/overlay2/126aee4412463377e027ed5db07455b9b915d9b9ebc9489b3c6617909b738eed/diff:/var/lib/docker/overlay2/7d30d98da3e800a2674abc7d27b61f3d85d0ea5c2cef5e85f0e8a9c5fc20de64/diff:/var/lib/docker/overlay2/3c05d92a12b39dd9b96f776c3baaa93887310c6b4f4ff9d06af5ad7d382afef2/diff:/var/lib/docker/overlay2/414dd5d8a8a4d2a0a5bd6254b7137dbf78a18205fb761da8e4866eebab6d7aa0/diff:/var/lib/docker/overlay2/5fac71d28d0067f6fc4a608d1ddf88c488dd55ecfa65c07b6121605fc5bddad9/diff:/var/lib/docker/overlay2/bd5ecbc6fbf072754d95f4b7b6f0491ea2916d469fa6fc365b558fcb06ab37b5/diff:/var/lib/docker/overlay2/7a134a533226c24d6cd2ae016b53f884252ef4da9ff09c3d8f9225212997fb95/diff:/var/lib/docker/overlay2/0be4061891b9002577a43a2d5315257d75774e377d83c08e053758dd524f6eb9/diff:/var/lib/docker/overlay2/0cb10681b083a20de9c030fdbc7bb06aaa6cbb3f37da1824be619baf4ca25195/diff:/var/lib/docker/overlay2/24f24194efed8d1e64c406ba1c1c4cb45c1a9f5256125bb4d4956c92e531bbe5/diff:/var/lib/docker/overlay2/6814733ebced937e9c27d6cefdacd6ac38770588dee9fa3ab6cdd713194bd527/diff:/var/lib/docker/overlay2/db61eeab24952d14b5e4a81bfe1dbbc79be93a9ab1b2c641a4b28c36a7b92868/diff:/var/lib/docker/overlay2/9bc881a93206aef155c4b706a6d3c20cc8a30e27813acc28f9650b6f760777a3/diff",
"MergedDir": "/var/lib/docker/overlay2/a7ceb4e085f72e54b66b61df3258960ac3bc88aafab5a60efcfbeb8203597db4/merged",
"UpperDir": "/var/lib/docker/overlay2/a7ceb4e085f72e54b66b61df3258960ac3bc88aafab5a60efcfbeb8203597db4/diff",
"WorkDir": "/var/lib/docker/overlay2/a7ceb4e085f72e54b66b61df3258960ac3bc88aafab5a60efcfbeb8203597db4/work"
},
"Name": "overlay2"
},
"Mounts": [
{
"Type": "bind",
"Source": "/home/mirero/mls_system",
"Destination": "/home/mirero/mls_system",
"Mode": "",
"RW": true,
"Propagation": "rprivate"
}
],
"Config": {
"Hostname": "c99f44445544",
"Domainname": "",
"User": "",
"AttachStdin": false,
"AttachStdout": false,
"AttachStderr": false,
"ExposedPorts": {
"137/tcp": {},
"21/tcp": {},
"22/tcp": {},
"445/tcp": {},
"5000/tcp": {},
"5901/tcp": {},
"6006/tcp": {},
"8888/tcp": {},
"8901/tcp": {},
"8902/tcp": {},
"8903/tcp": {},
"8904/tcp": {},
"8905/tcp": {},
"8931/tcp": {},
"8932/tcp": {},
"8933/tcp": {},
"8934/tcp": {},
"8935/tcp": {},
"8951/tcp": {},
"8952/tcp": {},
"8953/tcp": {},
"8954/tcp": {},
"8955/tcp": {},
"9141/tcp": {},
"9142/tcp": {},
"9161/tcp": {},
"9162/tcp": {}
},
"Tty": true,
"OpenStdin": true,
"StdinOnce": false,
"Env": [
"PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"NVARCH=x86_64",
"NVIDIA_REQUIRE_CUDA=cuda>=10.0 brand=tesla,driver>=384,driver<385 brand=tesla,driver>=410,driver<411",
"NV_CUDA_CUDART_VERSION=10.0.130-1",
"NV_ML_REPO_ENABLED=1",
"NV_ML_REPO_URL=https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64",
"CUDA_VERSION=10.0.130",
"LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64",
"NVIDIA_VISIBLE_DEVICES=all",
"NVIDIA_DRIVER_CAPABILITIES=compute,utility",
"NV_CUDA_LIB_VERSION=10.0.130-1",
"NV_NVTX_VERSION=10.0.130-1",
"NV_LIBNPP_VERSION=10.0.130-1",
"NV_LIBCUSPARSE_VERSION=10.0.130-1",
"NV_LIBCUBLAS_PACKAGE_NAME=cuda-cublas-10-0",
"NV_LIBCUBLAS_VERSION=10.0.130-1",
"NV_LIBCUBLAS_PACKAGE=cuda-cublas-10-0=10.0.130-1",
"NV_LIBNCCL_PACKAGE_NAME=libnccl2",
"NV_LIBNCCL_PACKAGE_VERSION=2.6.4-1",
"NCCL_VERSION=2.6.4-1",
"NV_LIBNCCL_PACKAGE=libnccl2=2.6.4-1+cuda10.0",
"NV_CUDA_CUDART_DEV_VERSION=10.0.130-1",
"NV_NVML_DEV_VERSION=10.0.130-1",
"NV_LIBCUSPARSE_DEV_VERSION=10.0.130-1",
"NV_LIBNPP_DEV_VERSION=10.0.130-1",
"NV_LIBCUBLAS_DEV_PACKAGE_NAME=cuda-cublas-dev-10-0",
"NV_LIBCUBLAS_DEV_VERSION=10.0.130-1",
"NV_LIBCUBLAS_DEV_PACKAGE=cuda-cublas-dev-10-0=10.0.130-1",
"NV_LIBNCCL_DEV_PACKAGE_NAME=libnccl-dev",
"NV_LIBNCCL_DEV_VERSION=2.6.4-1",
"NV_LIBNCCL_DEV_PACKAGE=libnccl-dev=2.6.4-1+cuda10.0",
"LIBRARY_PATH=/usr/local/cuda/lib64/stubs",
"NV_CUDNN_PACKAGE_VERSION=7.6.5.32-1",
"NV_CUDNN_VERSION=7.6.5.32",
"NV_CUDNN_PACKAGE_NAME=libcudnn7",
"NV_CUDNN_PACKAGE=libcudnn7=7.6.5.32-1+cuda10.0",
"NV_CUDNN_PACKAGE_DEV=libcudnn7-dev=7.6.5.32-1+cuda10.0",
"CUDNN_VERSION=7.6.5.32"
],
"Cmd": [
"sh",
"-c",
"/home/mirero/boot.sh&&/bin/bash"
],
"Image": "mirero/adc40:cuda10.0-cudnn7-devel-ubuntu16.04",
"Volumes": null,
"WorkingDir": "",
"Entrypoint": null,
"OnBuild": null,
"Labels": {
"com.nvidia.cudnn.version": "7.6.5.32",
"maintainer": "NVIDIA CORPORATION <cudatools@nvidia.com>"
}
},
"NetworkSettings": {
"Bridge": "",
"SandboxID": "f726af1e4a2ba14cad04214a4bfdc1de981b735e09a6dbb7d923581d8f1303db",
"HairpinMode": false,
"LinkLocalIPv6Address": "",
"LinkLocalIPv6PrefixLen": 0,
"Ports": {
"137/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "137"
}
],
"21/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "21"
}
],
"22/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "2222"
}
],
"445/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "445"
}
],
"5000/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "5000"
}
],
"5901/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "5901"
}
],
"6006/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "6006"
}
],
"8888/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "8888"
}
],
"8901/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "8901"
}
],
"8902/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "8902"
}
],
"8903/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "8903"
}
],
"8904/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "8904"
}
],
"8905/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "8905"
}
],
"8931/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "8931"
}
],
"8932/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "8932"
}
],
"8933/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "8933"
}
],
"8934/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "8934"
}
],
"8935/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "8935"
}
],
"8951/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "8951"
}
],
"8952/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "8952"
}
],
"8953/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "8953"
}
],
"8954/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "8954"
}
],
"8955/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "8955"
}
],
"9141/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "9141"
}
],
"9142/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "9142"
}
],
"9161/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "9161"
}
],
"9162/tcp": [
{
"HostIp": "0.0.0.0",
"HostPort": "9162"
}
]
},
"SandboxKey": "/var/run/docker/netns/f726af1e4a2b",
"SecondaryIPAddresses": null,
"SecondaryIPv6Addresses": null,
"EndpointID": "045ad830ef9ab565d1ee183097b402cdb6d68ae7a6ce81b6dbaf22158dccdab2",
"Gateway": "172.17.0.1",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"IPAddress": "172.17.0.2",
"IPPrefixLen": 16,
"IPv6Gateway": "",
"MacAddress": "02:42:ac:11:00:02",
"Networks": {
"bridge": {
"IPAMConfig": null,
"Links": null,
"Aliases": null,
"NetworkID": "2ecfef5e617f2a705e5726350004b6c56d4da5f3df7ebd30f5924f0402f38c08",
"EndpointID": "045ad830ef9ab565d1ee183097b402cdb6d68ae7a6ce81b6dbaf22158dccdab2",
"Gateway": "172.17.0.1",
"IPAddress": "172.17.0.2",
"IPPrefixLen": 16,
"IPv6Gateway": "",
"GlobalIPv6Address": "",
"GlobalIPv6PrefixLen": 0,
"MacAddress": "02:42:ac:11:00:02",
"DriverOpts": null
}
}
}
}
]
I don't think this is a port number collision, if we look at the failing log, the logged ActorSystem is listening at port 33919
[2022/09/14 03:28:38.980] ... Remoting now listens on addresses: [akka.tcp://exeutor-system@100.100.100.234:33919]
But then it could not communicate to a remote ActorSystem at port 4061
[2022/09/14 03:28:39.190] ... Association with remote system akka.tcp://cluster-system@100.100.100.230:4061 has failed; address is now gated for 5000 ms. Reason is: [Akka.Remote.EndpointDisassociatedException: Disassociated
@Arkatufus
[2022/09/14 03:03:33.290][ 112][ INFO]akka.tcp://cluster-system@100.100.100.234:8931/remote/akka.tcp/cluster-system@100.100.100.230:4065/user/ApiMasterActor/singleton/InferenceManageActor/c40 [6683][mono-sgen] [2022/09/14 03:03:33.290][11218][ INFO]akka.tcp://exeutor-system@100.100.100.234:33919/user/Actor2 Start [Elapsed time]=0.011568s
[2022/09/14 03:28:38.980][ 119][ INFO]akka.tcp://cluster-system@100.100.100.234:8931/remote/akka.tcp/cluster-system@100.100.100.230:4065/user/ApiMasterActor/singleton/InferenceManageActor/c34 [22596][mono-sgen] [2022/09/14 03:28:38.980][ 59][ INFO]remoting (akka://exeutor-system) Remoting started; listening on addresses : [akka.tcp://exeutor-system@100.100.100.234:33919]
[2022/09/14 03:28:39.192][ 7063][ INFO]akka.tcp://cluster-system@100.100.100.234:8931/remote/akka.tcp/cluster-system@100.100.100.230:4065/user/ApiMasterActor/singleton/InferenceManageActor/c40 [6683][mono-sgen] [2022/09/14 03:28:39.190][11207][ WARN]akka.tcp://exeutor-system@100.100.100.234:33919/system/endpointManager/reliableEndpointWriter-akka.tcp%3A%2F%2Fcluster-system%40100.100.100.230%3A4061-1 Association with remote system akka.tcp://cluster-system@100.100.100.230:4061 has failed; address is now gated for 5000 ms. Reason is: [Akka.Remote.EndpointDisassociatedException: Disassociated
That is weird, is it possible for you to update to .NET Core? Is there a specific reason why you needed to to use .NET Framework, Mono, and Ubuntu 16.04?
It was developed 3 years ago and cannot be changed. Change to version Akka.NET1.4.41 solve the problem?
No, it would not, the underlying Socket.Bind()
code will be the same, I'm more concerned with the Mono implementation of Socket.Bind()
in Linux at this point.
I try update mono 5.20.1.19
-> mono 6.12.0
@kimbyungeun is this still an issue after you upgraded?
@Aaronontheweb
mono 6.12.0
I upgraded, but issue still occurs.ubuntu 16.04 kernel 4.4.0-87
-> ubuntu 16.04 kernel 4.10.0-28
today. No issues found after kernel update. Advice helped me solve the problem. @Aaronontheweb @Arkatufus Thank you.
We are going to change .NET 6, Ubuntu 20.04.
Version Information
Describe the bug
Application
(Old) Mono Client App 1 Log
(New) Mono Client App 2 Error Log
exeutor-system hocon
dot-netty.tcp.port = 0
Environment
6.12.05.20.1.19$ cat /proc/sys/net/ipv4/ip_local_port_range 32768 60999