hashicorp / nomad

Nomad is an easy-to-use, flexible, and performant workload orchestrator that can deploy a mix of microservice, batch, containerized, and non-containerized applications. Nomad is easy to operate and scale and has native Consul and Vault integrations.
https://www.nomadproject.io/
Other
14.98k stars 1.96k forks source link

Nomad and Service Mesh unstable connection #23885

Open fred-gb opened 3 months ago

fred-gb commented 3 months ago

Bonjour πŸ‘‹πŸ»

Nomad version

Nomad v1.6.3
BuildDate 2023-10-30T12:58:10Z
Revision e0497bff14378d68cad76a801cc0eba93ce05039

Operating system and Environment details

Distributor ID: Ubuntu
Description:    Ubuntu 22.04.1 LTS
Release:        22.04
Codename:       jammy

Consul

Consul v1.17.0
Revision 4e3f428b
Build Date 2023-11-03T14:56:56Z
Protocol 2 spoken by default, understands 2 to 3 (agent will automatically use protocol >2 when speaking to compatible agents)

Vault

Vault v1.15.2 (cf1b5cafa047bc8e4a3f93444fcb4011593b92cb), built 2023-11-06T11:33:28Z

Issue

I have a job RocketChat Rocketchat needs mongoDB I use Consul service mesh to connect Rocket to its database mongoDB But after few hours, I need to restart job because RocketChat is unusable , very very slow, messages cannot be sent etc.

I insert a dirty cron command to restart job every day at 5:00 am. It's been working like this for a year.

Two weeks ago, I decided to stop using Consul service mesh and rework job with Consul DNS resolution only. And! It works! And since 13 days, I don't need to restart job. πŸ₯³

I don't know how to explain this.

Now It works without Consul service mesh, it's good, but, I feel disturbate to not use Consul service mesh.

Am I forgetting something?

This is my previous version of job, with Consul service mesh:

job "chat.domain.tld" {
  region = "global"
  datacenters = ["dc1"]
  type = "service"

  group "rocketchat" {

    count = 1

    restart {
      attempts = 10
      interval = "5m"
      delay = "10s"
      mode = "delay"
    }

    network {

      mode = "bridge"

    }

    service {
      name = "chat-domain-tld"

      port = "3000"

      connect {
        sidecar_service {}

        sidecar_task {
          resources {
            cpu    = 256
            memory = 256
          }
        }
      }

      tags = [
        "traefik.enable=true",
"traefik.http.routers.chatdomaintld.tls=true",
"traefik.http.routers.chatdomaintld.tls.certresolver=myresolver",
"traefik.http.routers.chatdomaintld.tls.options=mintls12@file",
"traefik.http.routers.chatdomaintld.entrypoints=https",
"traefik.http.routers.chatdomaintld.rule=Host(`chat.domain.tld`)",

"traefik.http.middlewares.chatdomaintld.redirectscheme.scheme=https",
"traefik.http.middlewares.chatdomaintld.redirectscheme.permanent=true",
"traefik.http.middlewares.chatdomaintld-headers.headers.customResponseHeaders.X-Robots-Tag=noindex,nofollow,nosnippet,noarchive,notranslate,noimageindex",
"traefik.http.middlewares.chatdomaintld-headers.headers.customResponseHeaders.Strict-Transport-Security=max-age=63072000",
"traefik.http.middlewares.chatdomaintld-headers.headers.frameDeny=true",
"traefik.http.middlewares.chatdomaintld-headers.headers.browserXssFilter=true",
"traefik.http.middlewares.chatdomaintld-headers.headers.contentTypeNosniff=true",
"traefik.http.middlewares.chatdomaintld-headers.headers.stsIncludeSubdomains=true",
"traefik.http.middlewares.chatdomaintld-headers.headers.stsPreload=true",
"traefik.http.middlewares.chatdomaintld-headers.headers.stsSeconds=31536000",
"traefik.http.middlewares.chatdomaintld-headers.headers.forceSTSHeader=true",
"traefik.http.middlewares.chatdomaintld-headers.headers.accessControlMaxAge=15552000",
"traefik.http.middlewares.chatdomaintld-headers.headers.customFrameOptionsValue=SAMEORIGIN",
"traefik.http.routers.chatdomaintld.middlewares=chatdomaintld-headers@consulcatalog",
"prometheus.metrics.enable",
"app_name=rocketchat"

      ]
    }

    service {
      name = "chat-domain-tld-client-mesh"

      connect {
        sidecar_service {
          proxy {
            upstreams {
              destination_name = "mongo-chat-domain-tld"
              local_bind_port  = "27017"
            }
          }
        }
        sidecar_task {
          resources {
            cpu    = 256
            memory = 256
          }
        }
      }
    }

    task "rocketchat" {
      driver = "docker"

      config {
        image = "rocketchat/rocket.chat:6.9.2"
      }

      template {
        data = <<EOH
ANSIBLE_FORCE_COLOR=TRUE

MONGO_URL="mongodb://127.0.0.1:27017/rocketchat?replicaSet=rs0&directConnection=true"

MONGO_OPLOG_URL="mongodb://127.0.0.1:27017/local?replicaSet=rs0&directConnection=true"

ROOT_URL=https://chat.domain.tld

PORT=3000

DEPLOY_METHOD=docker

DEPLOY_PLATFORM=${DEPLOY_PLATFORM}

OVERWRITE_SETTING_Show_Setup_Wizard=completed

IGNORE_CHANGE_STREAM=true

EOH
        destination = "secrets/rocketchat.file.env"
        env         = true
      }

      resources {
        cpu    = 2048
        memory = 2048
      }
    }
  }

  group "mongo" {

    count = 1

    restart {
      attempts = 10
      interval = "5m"
      delay = "10s"
      mode = "delay"
    }

    network {

      mode = "bridge"

    }

    service {
      name = "mongo-chat-domain-tld"
      port = "27017"

      connect {
        sidecar_service {}

        sidecar_task {
          resources {
            cpu    = 256
            memory = 256
          }
        }
      }
    }

    task "mongo" {
      driver = "docker"

      config {
        image = "bitnami/mongodb:4.4"

        mount {
          type = "bind"
          target = "/bitnami/mongodb"
          source = "/data/chat.domain.tld/data/bitnami/mongodb"
          readonly = false
          bind_options {
            propagation = "rshared"
          }
        }
      }

      template {
        data = <<EOH
ANSIBLE_FORCE_COLOR=TRUE

MONGODB_REPLICA_SET_MODE=primary

MONGODB_REPLICA_SET_NAME=rs0

MONGODB_PORT_NUMBER=27017

MONGODB_INITIAL_PRIMARY_HOST=127.0.0.1

MONGODB_ADVERTISED_HOSTNAME=127.0.0.1

MONGODB_INITIAL_PRIMARY_PORT_NUMBER=27017

MONGODB_ENABLE_JOURNAL=true

ALLOW_EMPTY_PASSWORD=yes

EOH
        destination = "secrets/mongo.file.env"
        env         = true
      }
/
      resources {
        cpu    = 2048
        memory = 2048
      }
    }
  }

}

And version without Consul service mesh, only Consul DNS resolution:

job "chat.domain.tld" {
  region = "global"
  datacenters = ["dc1"]
  type = "service"

  group "rocketchat" {

    count = 1

    restart {
      attempts = 10
      interval = "5m"
      delay = "10s"
      mode = "delay"
    }

    network {

      mode = "bridge"

    }

    service {
      name = "chat-domain-tld"

      port = "3000"

      connect {
        sidecar_service {}

        sidecar_task {
          resources {
            cpu    = 256
            memory = 256
          }
        }
      }

      tags = [
        "traefik.enable=true",
"traefik.http.routers.chatdomaintld.tls=true",
"traefik.http.routers.chatdomaintld.tls.certresolver=myresolver",
"traefik.http.routers.chatdomaintld.tls.options=mintls12@file",
"traefik.http.routers.chatdomaintld.entrypoints=https",
"traefik.http.routers.chatdomaintld.rule=Host(`chat.domain.tld`)",

"traefik.http.middlewares.chatdomaintld.redirectscheme.scheme=https",
"traefik.http.middlewares.chatdomaintld.redirectscheme.permanent=true",
"traefik.http.middlewares.chatdomaintld-headers.headers.customResponseHeaders.X-Robots-Tag=noindex,nofollow,nosnippet,noarchive,notranslate,noimageindex",
"traefik.http.middlewares.chatdomaintld-headers.headers.customResponseHeaders.Strict-Transport-Security=max-age=63072000",
"traefik.http.middlewares.chatdomaintld-headers.headers.frameDeny=true",
"traefik.http.middlewares.chatdomaintld-headers.headers.browserXssFilter=true",
"traefik.http.middlewares.chatdomaintld-headers.headers.contentTypeNosniff=true",
"traefik.http.middlewares.chatdomaintld-headers.headers.stsIncludeSubdomains=true",
"traefik.http.middlewares.chatdomaintld-headers.headers.stsPreload=true",
"traefik.http.middlewares.chatdomaintld-headers.headers.stsSeconds=31536000",
"traefik.http.middlewares.chatdomaintld-headers.headers.forceSTSHeader=true",
"traefik.http.middlewares.chatdomaintld-headers.headers.accessControlMaxAge=15552000",
"traefik.http.middlewares.chatdomaintld-headers.headers.customFrameOptionsValue=SAMEORIGIN",
"traefik.http.routers.chatdomaintld.middlewares=chatdomaintld-headers@consulcatalog",
"app_name=rocketchat"

      ]
    }

    task "rocketchat" {
      driver = "docker"

      config {
        image = "rocketchat/rocket.chat:6.9.2"
      }

      template {
        data = <<EOH
ANSIBLE_FORCE_COLOR=TRUE

MONGO_URL="mongodb://mongo-internal.service.consul:27017/rocketchat?replicaSet=rs0&directConnection=true"

MONGO_OPLOG_URL="mongodb://mongo-internal.service.consul:27017/local?replicaSet=rs0&directConnection=true"

ROOT_URL=https://chat.domain.tld

PORT=3000

DEPLOY_METHOD=docker

DEPLOY_PLATFORM=${DEPLOY_PLATFORM}

OVERWRITE_SETTING_Show_Setup_Wizard=completed

IGNORE_CHANGE_STREAM=true 

EOH
        destination = "secrets/rocketchat.file.env"
        env         = true
      }

      resources {
        cpu    = 2048
        memory = 2048
      }
    }
  }

  group "mongo" {

    count = 1

    restart {
      attempts = 10
      interval = "5m"
      delay = "10s"
      mode = "delay"
    }

    network {

      mode = "bridge"

      port "mongo" {
        to = 27017
        static = 27017
      }

    }

    service {
      name = "mongo-internal"
      address_mode = "alloc"
      port = "mongo"
    }

    task "mongo" {
      driver = "docker"

      config {
        image = "bitnami/mongodb:4.4"

        mount {
          type = "bind"
          target = "/bitnami/mongodb"
          source = "/data/chat.domain.tld/data/bitnami/mongodb"
          readonly = false
          bind_options {
            propagation = "rshared"
          }
        }

      }

      template {
        data = <<EOH
ANSIBLE_FORCE_COLOR=TRUE

MONGODB_REPLICA_SET_MODE=primary

MONGODB_REPLICA_SET_NAME=rs0

MONGODB_PORT_NUMBER=27017

MONGODB_INITIAL_PRIMARY_HOST=127.0.0.1

MONGODB_ADVERTISED_HOSTNAME=127.0.0.1

MONGODB_INITIAL_PRIMARY_PORT_NUMBER=27017

MONGODB_ENABLE_JOURNAL=true

#MONGODB_ROOT_PASSWORD=pJEkb543XXjh45

ALLOW_EMPTY_PASSWORD=yes

EOH
        destination = "secrets/mongo.file.env"
        env         = true
      }

      resources {
        cpu    = 2048
        memory = 2048
      }
    }
  }

}

Thanks

Juanadelacuesta commented 2 months ago

Hello @fred-gb! Looking at your issue, the most likely culprit is Envoy Proxy configuration error, not Nomad in this case. I would recommend you to take a look at it. You can find some guidance on what to look for here and here. I hope this helps you.