Open suikast42 opened 1 year ago
Hi @suikast42 π
I have not been able to reproduce this problem. Does your Nomad client change IP address between reboots? If that's the case, would it be possible to keep the same IP and see if the problem still happens?
Hi @suikast42 π
I have not been able to reproduce this problem. Does your Nomad client change IP address between reboots? If that's the case, would it be possible to keep the same IP and see if the problem still happens?
I have configured my vm over vagrant. eth0: is dhcp eth1: is configured with static IP 10.21.21.x
Nomad Server.hcl
log_level = "DEBUG"
name = "master-01"
datacenter = "nomadder1"
data_dir = "/opt/services/core/nomad/data"
server {
enabled = true
bootstrap_expect = 1
encrypt = "4PRfoE6Mj9dHTLpnzmYD1+THdlyAo2Ji4U6ewMumpAw="
rejoin_after_leave = true
server_join {
retry_join = ["10.21.21.41"]
retry_max = 0
retry_interval = "15s"
}
}
bind_addr = "0.0.0.0" # the default
advertise {
# Defaults to the first private IP address.
http = "10.21.21.41"
rpc = "10.21.21.41"
serf = "10.21.21.41"
}
tls {
http = true
rpc = true
ca_file = "/usr/local/share/ca-certificates/cloudlocal/cluster-ca-bundle.pem"
cert_file = "/etc/opt/certs/nomad/nomad.pem"
key_file = "/etc/opt/certs/nomad/nomad-key.pem"
verify_server_hostname = true
verify_https_client = true
}
ui {
enabled = true
label {
text = "ππ FenerbaΓ§he 1907 ππ"
background_color = "#163962"
text_color = "#ffed00"
}
consul {
ui_url = "https://consul.cloud.private"
}
vault {
ui_url = "https://vault.cloud.private"
}
}
consul{
ssl= true
address = "127.0.0.1:8501"
grpc_address = "127.0.0.1:8503"
# this works only with ACL enabled
allow_unauthenticated= true
ca_file = "/usr/local/share/ca-certificates/cloudlocal/cluster-ca-bundle.pem"
grpc_ca_file = "/usr/local/share/ca-certificates/cloudlocal/cluster-ca-bundle.pem"
cert_file = "/etc/opt/certs/consul/consul.pem"
key_file = "/etc/opt/certs/consul/consul-key.pem"
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}
Nomad Client.hcl
log_level = "DEBUG"
name = "worker-01"
datacenter = "nomadder1"
data_dir = "/opt/services/core/nomad/data"
bind_addr = "0.0.0.0" # the default
advertise {
# Defaults to the first private IP address.
http = "10.21.21.42"
rpc = "10.21.21.42"
serf = "10.21.21.42"
}
client {
enabled = true
meta {
node_type= "worker"
connect.log_level = "debug"
# connect.sidecar_image: "registry.cloud.private/envoyproxy/envoy:v1.25.3"
}
server_join {
retry_join = ["10.21.21.41"]
retry_max = 0
retry_interval = "15s"
}
host_volume "ca_cert" {
path = "/usr/local/share/ca-certificates/cloudlocal"
read_only = true
}
host_volume "cert_ingress" {
path = "/etc/opt/certs/ingress"
read_only = true
}
## Cert consul client
## Needed for consul_sd_configs
## Should be deleted after resolve https://github.com/suikast42/nomadder/issues/100
host_volume "cert_consul" {
path = "/etc/opt/certs/consul"
read_only = true
}
host_network "private" {
interface = "eth0"
#cidr = "203.0.113.0/24"
#reserved_ports = "22,80"
}
host_network "default" {
interface = "eth1"
}
host_network "public" {
interface = "eth1"
}
host_network "local" {
interface = "lo"
}
host_network "nomad" {
interface = "nomad"
}
reserved {
# cpu (int: 0) - Specifies the amount of CPU to reserve, in MHz.
# cores (int: 0) - Specifies the number of CPU cores to reserve.
# memory (int: 0) - Specifies the amount of memory to reserve, in MB.
# disk (int: 0) - Specifies the amount of disk to reserve, in MB.
# reserved_ports (string: "") - Specifies a comma-separated list of ports to reserve on all fingerprinted network devices. Ranges can be specified by using a hyphen separating the two inclusive ends. See also host_network for reserving ports on specific host networks.
cpu = 1000
memory = 2048
}
max_kill_timeout = "1m"
}
tls {
http = true
rpc = true
ca_file = "/usr/local/share/ca-certificates/cloudlocal/cluster-ca-bundle.pem"
cert_file = "/etc/opt/certs/nomad/nomad.pem"
key_file = "/etc/opt/certs/nomad/nomad-key.pem"
verify_server_hostname = true
verify_https_client = true
}
consul{
ssl= true
address = "127.0.0.1:8501"
grpc_address = "127.0.0.1:8503"
# this works only with ACL enabled
allow_unauthenticated= true
ca_file = "/usr/local/share/ca-certificates/cloudlocal/cluster-ca-bundle.pem"
grpc_ca_file = "/usr/local/share/ca-certificates/cloudlocal/cluster-ca-bundle.pem"
cert_file = "/etc/opt/certs/consul/consul.pem"
key_file = "/etc/opt/certs/consul/consul-key.pem"
}
telemetry {
collection_interval = "1s"
disable_hostname = true
prometheus_metrics = true
publish_allocation_metrics = true
publish_node_metrics = true
}
plugin "docker" {
config {
allow_privileged = true
# volumes {
# enabled = true
# selinuxlabel = "z"
# }
infra_image = "registry.cloud.private/google_containers/pause-amd64:3.2"
extra_labels = ["job_name", "job_id", "task_group_name", "task_name", "namespace", "node_name", "node_id"]
logging {
type = "journald"
config {
labels-regex =".*"
}
}
gc{
container = true
dangling_containers{
enabled = true
# period = "3m"
# creation_grace = "5m"
}
}
}
}
Would it be possible to share the Vangrantfile? The closest we can get to your environment the better π
Sure
required_plugins = %w(vagrant-vmware-desktop vagrant-timezone)
plugins_to_install = required_plugins.select { |plugin| not Vagrant.has_plugin? plugin }
if not plugins_to_install.empty?
puts "Installing plugins: #{plugins_to_install.join(' ')}"
if system "vagrant plugin install #{plugins_to_install.join(' ')}"
exec "vagrant #{ARGV.join(' ')}"
else
abort "Installation of one or more plugins has failed. Aborting."
end
end
$vm_base_box="suikast42/ubuntu-22.04_64"
$vm_base_box_version="1.1.1"
$vm_mem_master_01 = 8096 # 8 GB RAM for master
$vm_cpus_master_01 = 4
$vm_hostname_master_01 = "master-01"
$vm_master_01_ip = "10.21.21.41"
$vm_mem_worker_01 = 43008 # 42 GB RAM for the workloads
$vm_cpus_worker_01 = 12
$vm_hostname_worker_01 = "worker-01"
$vm_worker_01_ip = "10.21.21.42"
$vm_mem_worker_02 = 14000 # 42 GB RAM for the workloads
$vm_cpus_worker_02 = 4
$vm_hostname_worker_02 = "worker-02"
$vm_worker_02_ip = "10.21.21.43"
$vm_mem_worker_03 = 14000 # 42 GB RAM for the workloads
$vm_cpus_worker_03 = 4
$vm_hostname_worker_03 = "worker-03"
$vm_worker_03_ip = "10.21.21.44"
Vagrant.configure("2") do |config|
# common settings for server and agent
config.vm.box = $vm_base_box
config.vm.synced_folder '.', '/vagrant', disabled: true
config.ssh.forward_agent = true
config.ssh.insert_key = false
vagrant_home_path = ENV["VAGRANT_HOME"] ||= "~/.vagrant.d"
config.ssh.private_key_path = ["#{vagrant_home_path}/insecure_private_key", "../../ssh/id_rsa"]
# create new user, add it to sudoers and allow ssh login over key
config.vm.provision :shell, privileged: true do |s|
vm_user = "cloudmaster"
ssh_pub_key = File.readlines("../../ssh/id_rsa.pub").first.strip
s.inline = <<-SHELL
rm /home/$USER/.ssh/authorized_keys
useradd -m -s /bin/bash -U #{vm_user} -p #{vm_user}
mkdir /home/#{vm_user}/.ssh
echo #{ssh_pub_key} >> /home/#{vm_user}/.ssh/authorized_keys
chown -R #{vm_user}:#{vm_user} /home/#{vm_user}
echo "%#{vm_user} ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/#{vm_user}
SHELL
end
config.timezone.value = :host
#master
config.vm.define "master_01" do |mastercfg|
mastercfg.vm.provider :vmware_desktop do |master_01|
mastercfg.vm.hostname = $vm_hostname_master_01
mastercfg.vm.network :private_network, ip: $vm_master_01_ip, :netmask => "255.255.0.0"
mastercfg.vm.box_version = $vm_base_box_version
master_01.gui=true
master_01.linked_clone = false
master_01.vmx["memsize"] = $vm_mem_master_01
master_01.vmx["numvcpus"] = $vm_cpus_master_01
master_01.vmx['displayname'] =$vm_hostname_master_01
end
end
#worker
config.vm.define "worker_01" do |workercfg|
workercfg.vm.provider :vmware_desktop do |worker_01|
workercfg.vm.hostname = $vm_hostname_worker_01
workercfg.vm.network :private_network, ip: $vm_worker_01_ip, :netmask => "255.255.0.0"
workercfg.vm.box_version = $vm_base_box_version
worker_01.gui=true
worker_01.linked_clone = false
worker_01.vmx["memsize"] = $vm_mem_worker_01
worker_01.vmx["numvcpus"] = $vm_cpus_worker_01
worker_01.vmx['displayname'] =$vm_hostname_worker_01
end
end
# config.vm.define "worker_02" do |workercfg|
# workercfg.vm.provider :vmware_desktop do |worker_02|
# workercfg.vm.hostname = $vm_hostname_worker_02
# workercfg.vm.network :private_network, ip: $vm_worker_02_ip, :netmask => "255.255.0.0"
# workercfg.vm.box_version = $vm_base_box_version
# worker_02.gui=true
# worker_02.linked_clone = false
# worker_02.vmx["memsize"] = $vm_mem_worker_02
# worker_02.vmx["numvcpus"] = $vm_cpus_worker_02
# worker_02.vmx['displayname'] =$vm_hostname_worker_02
# end
# end
# config.vm.define "worker_03" do |workercfg|
# workercfg.vm.provider :vmware_desktop do |worker_03|
# workercfg.vm.hostname = $vm_hostname_worker_03
# workercfg.vm.network :private_network, ip: $vm_worker_03_ip, :netmask => "255.255.0.0"
# workercfg.vm.box_version = $vm_base_box_version
# worker_03.gui=true
# worker_03.linked_clone = false
# worker_03.vmx["memsize"] = $vm_mem_worker_03
# worker_03.vmx["numvcpus"] = $vm_cpus_worker_03
# worker_03.vmx['displayname'] =$vm_hostname_worker_03
# end
# end
# Somehow this part is not working with bento image provisioning
# Let's do it self
$disk_resize = <<-SHELL
echo "Full disk capacity for /dev/sda3"
# Increase the Physical Volume (pv) to max size
pvresize /dev/sda3
# Expand the Logical Volume (LV) to max size to match
lvresize -l +100%FREE /dev/mapper/ubuntu--vg-ubuntu--lv
# Expand the filesystem itself
resize2fs /dev/mapper/ubuntu--vg-ubuntu--lv
SHELL
config.vm.provision "shell", inline: $disk_resize, privileged: true
end
Thanks for the extra info π
At first pass I don't see anything particular wrong with the setup π€
I'm going to mark this for further triaging and roadmapping so we can have the time to investigate it deeper.
Maybe the full project setup can help.
Actually have no readme with build introductions but can support one if need https://github.com/suikast42/nomadder
I had same problem (while experimenting with various load balancer tutorials) but on dedicated hardware.
Rescheduling jobs fixes it for me: nomad job restart -reschedule $JOB_NAME
, now running this on boot after Nomad starts.
The bells are ringing!
This issue raises a big question about the feasibility of using Nomad in production. We have encountered it more than five times in different cluster configurations, all within just a week.
The easiest way to reproduce it is just restart Docker engine (or reboot node):
sudo systemctl restart docker
Then, connect to any container using docker exec -it
and see the address disappeared along with the interface
Yes, I know that restarting the Docker engine is not the best idea till Nomad is running. However, it also happens when node is just rebooting
docker
driver and the bridge
network mode ip a
directly from containerThis job is deployment-ready, just change the DC name.
After deployment, you should be able to do telnet to localhost:1883
job "mqtt-broker" {
datacenters = ["demo-mesh"]
type = "system"
group "brokers" {
network {
mode = "bridge"
port "mqtt" { static = 1883 }
}
task "broker-1" {
driver = "docker"
config {
image = "eclipse-mosquitto"
ports = ["mqtt"]
command = "/usr/sbin/mosquitto"
args = ["-c", "local/mosquitto.conf"]
}
template {
change_mode = "signal"
change_signal = "SIGHUP"
data = <<EOH
listener 1883
log_dest stdout
allow_anonymous true
EOH
destination = "local/mosquitto.conf"
}
service {
name = "mqtt-broker"
port = "mqtt"
tags = ["mqtt","mosquitto","broker"]
check {
type = "tcp"
port = "mqtt"
interval = "2s"
timeout = "2s"
}
}
}
}
}
# nomad --version
Nomad v1.5.6
BuildDate 2023-05-19T18:26:13Z
Revision 8af70885c02ab921dedbdf6bc406a1e886866f80
Arch: arm64 Ubuntu 20.04.6 LTS (Focal Fossa) NetworkManager is used (sic!)
# docker --version
Docker version 20.10.21, build 20.10.21-0ubuntu1~20.04.2
Pretty sure this is related to #17959
We're also seeing this issue with nomad jobs using bridge mode networking losing their network interfaces after a host reboot.
We're currently on Nomad v1.7.5
A workarround is to kill all init containers before restart the worker node.
I can also confirm that the suggested workaround solves the issue for us as well.
Nomad version
Nomad v1.5.1 BuildDate 2023-03-10T22:05:57Z Revision 6c118ddaf95e9bfd3d85f488f255fdc5e14129c7
Issue
Nomad jobs with bridge network hangs after nomad starts if there is no restart and check configured.
The reason that I figured out is that the container internal network misses the eth0 interface after the worker reboots
Expected interfaces
Observed interfaces
Entry of the hosts file in the counter-dashbaord container points to an invalid ip address
A workarround is to kill all init containers before restart the worker node.
After I kill the init containers and restart the worker the first attempt fails and the second attempt deploys
Reproduction steps
Deploy this job and restart the worker node after the counter app ist up
Expected Result
The counter dashbaord deploys successfull
Actual Result
The counter dashbaord is not awailable
Job file (if appropriate)