hashicorp / nomad

Nomad is an easy-to-use, flexible, and performant workload orchestrator that can deploy a mix of microservice, batch, containerized, and non-containerized applications. Nomad is easy to operate and scale and has native Consul and Vault integrations.
https://www.nomadproject.io/
Other
14.92k stars 1.95k forks source link

auto_config does not work out of box with tls and service mesh #14458

Open suikast42 opened 2 years ago

suikast42 commented 2 years ago

Nomad version

Nomad v1.3.5

Cunsul version

Consul v1.13.1

I try out the nomad ( consul ) service mesh capabilities. I try out the counter-dashboard example. Without TLS enabled it works as expected. So it counts :-)

After the first success of deployment I follow the guide for consul security.

After this step the envoy proxy is not able to connect the service. The envoy proxy says:

[2022-09-02 19:54:31.353][1][warning][config] [./source/common/config/grpc_stream.h:196] DeltaAggregatedResources gRPC config stream closed since 34s ago: 14, upstream connect error or disconnect/reset before headers. reset reason: connection termination

image

I use cfssl for generating my certificates and not use the consul provided one. but I try the dasboard example with the ca tool of consul as well. Both results are the same.

After some digging I find a workarround.

The auto_config section define the client cert section as shown below.

 "verify_incoming": false,
 "verify_outgoing": true,
 "verify_server_hostname": true,
 "ca_file": "/consul/config/certs/consul-agent-ca.pem"

But as you can see below, I put there my own generated certificates. and add env vars to the nomad.service systemd file

Environment="CONSUL_HTTP_ADDR=127.0.0.1:8501"
Environment="CONSUL_HTTP_SSL=true"

Without adding the confusing and unessarry cert_file and key_file definition in consul client config I got an error from nomad client

2022-09-02T20:23:02.422Z [ERROR] client: error discovering nomad servers: error=\"client.consul: unable to query Consul datacenters: Get \\"https://127.0.0.1:8501/v1/catalog/datacenters\\": x509: certificate signed by unknown authority\""

Without adding systemd env vars the proxy is not connecting.

Is something lacking in the docu or did I miss something ?

For a simple acl I use the management token 'e95b599e-166e-7d80-08ad-aee76e7ddf19'

Consul server:

{
    "node_name": "master-01",
    "datacenter": "nomadder_1",
    "data_dir": "/opt/deployments/core/consul/data",
    "server": true,
    "log_level" : "INFO",
    "bind_addr": "0.0.0.0",
    "advertise_addr": "10.21.21.41",
    "client_addr": "0.0.0.0",
    "encrypt": "G1CHAD7wwu0tU28BlKkirSahTJ/Tqpo9ClOAycQAUwE=",
    "ui_config": {
        "enabled" : true
    },
    "addresses": {
        "grpc" : "127.0.0.1"
    },
    "ports": {
        "grpc" : 8502,
        "https": 8501
    },
    "connect": {
        "enabled": true
    },
    "retry_join":["10.21.21.41"],
    "bootstrap_expect": 1,
        "acl": {
                "enabled": true,
                "default_policy": "deny",
                "enable_token_persistence": true,
                "tokens": {
                        "initial_management": "e95b599e-166e-7d80-08ad-aee76e7ddf19",
                        "agent": "e95b599e-166e-7d80-08ad-aee76e7ddf19"
                }
        },
        "performance" :{
           "raft_multiplier" : 1
        },
        "auto_config": {
                "authorization": {
                        "enabled": true,
                        "static": {
                                "jwt_validation_pub_keys": ["-----BEGIN PUBLIC KEY-----\nMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEjv+sWRoGN8SDkNuPxRoKpJ3YRj90\n+2g9VW9eBwNkxRM0womm13NqY/tTA9VYWTWFOV5JAhAA6mFGinNGdOkcLA==\n-----END PUBLIC KEY-----\n"],
                                "bound_issuer": "secint",
                                "bound_audiences": ["consul-cluster-nomadder_1"],
                                "claim_mappings": {
                                        "sub": "node_name"
                                },
                                "claim_assertions": [
                                        "value.node_name == \"${node}\""
                                ]
                        }
                }
        },
    "node_meta": {
        "node_type": "server"
    },
    "tls":{
      "defaults":{
        "ca_file": "/usr/local/share/ca-certificates/cloudlocal/cluster-ca-bundle.pem",
        "cert_file": "/etc/opt/certs/consul/consul.pem",
        "key_file": "/etc/opt/certs/consul/consul-key.pem",
        "verify_incoming": true,
        "verify_outgoing": true
      },
     "internal_rpc":{
         "verify_server_hostname": true
     }
    }
}

nomad server

log_level = "DEBUG"
name = "master-01"
datacenter = "nomadder_1"
data_dir =  "/opt/deployments/core/nomad/data"

server {
  enabled = true
  bootstrap_expect =  1
  encrypt = "4PRfoE6Mj9dHTLpnzmYD1+THdlyAo2Ji4U6ewMumpAw="
}

tls {
  http = true
  rpc  = true

  ca_file   = "/usr/local/share/ca-certificates/cloudlocal/cluster-ca-bundle.pem"
  cert_file = "/etc/opt/certs/nomad/nomad.pem"
  key_file  = "/etc/opt/certs/nomad/nomad-key.pem"

  verify_server_hostname = true
  verify_https_client    = false
}

consul{
    token = "e95b599e-166e-7d80-08ad-aee76e7ddf19"
    allow_unauthenticated = false
}

ui {
  enabled =  true

  consul {
    ui_url = "http://10.21.21.41:8500/ui"
  }

  vault {
    ui_url = "https://10.21.21.41:8200/ui"
  }
}

Consul client

{
    "node_name": "worker-01",
    "datacenter": "nomadder_1",
    "data_dir": "/opt/deployments/core/consul/data",
        "ports": {"https":8501},
        "bind_addr": "0.0.0.0",
    "advertise_addr": "10.21.21.42",
    "auto_config":{
        "enabled": true,
        "intro_token_file": "/opt/deployments/core/consul/data/tokens/jwt/worker-01.jwt",
        "server_addresses":["10.21.21.41"]
    },
    "node_meta": {
        "node_type": "worker"
    },
    "ports": {
        "grpc" : 8502,
        "https": 8501
    },
    "connect": {
        "enabled": true
    },
    "tls":{
      "defaults":{
        "ca_file": "/usr/local/share/ca-certificates/cloudlocal/cluster-ca-bundle.pem",
        "cert_file": "/etc/opt/certs/consul/consul.pem",
        "key_file": "/etc/opt/certs/consul/consul-key.pem",
        "verify_incoming": false,
        "verify_outgoing": true
      },
      "internal_rpc":{
         "verify_server_hostname": true
      }
    }
}

Nomad client

log_level = "DEBUG"
name = "worker-01"
datacenter = "nomadder_1"
data_dir =  "/opt/deployments/core/nomad/data"

client {
  enabled = true
}

tls {
  http = true
  rpc  = true

  ca_file   = "/usr/local/share/ca-certificates/cloudlocal/cluster-ca-bundle.pem"
  cert_file = "/etc/opt/certs/nomad/nomad.pem"
  key_file  = "/etc/opt/certs/nomad/nomad-key.pem"

  verify_server_hostname = true
  verify_https_client    = true
}

consul{
    token = "e95b599e-166e-7d80-08ad-aee76e7ddf19"
    allow_unauthenticated = false
}

plugin "docker" {
  config {
    volumes {
      enabled      = true
    }
  }
}
DerekStrickland commented 2 years ago

Hi @suikast42 . Thanks for sharing your experience. I'm glad that you ultimately figured out how to get things working. I'm sorry the documentation wasn't as clear as you hoped. I'll take a look and see if there is something that needs improvement.