Something wrong on rancher 2.2.1 (2.1.7 2.1.8)

Negashev commented 5 years ago

1) start rancher HA on rke add catalog https://github.com/rancher/submariner-charts

2) create cluster broker

one node (etcd, control panel, worker)
deploy submariner-k8s-broker

addon_job_timeout: 30
authentication: 
  strategy: "x509"
bastion_host: 
  ssh_agent_auth: false
ignore_docker_version: true
# 
#   # Currently only nginx ingress provider is supported.
#   # To disable ingress controller, set `provider: none`
#   # To enable ingress on specific nodes, use the node_selector, eg:
#      provider: nginx
#      node_selector:
#        app: ingress
# 
ingress: 
  provider: "nginx"
kubernetes_version: "v1.13.5-rancher1-2"
monitoring: 
  provider: "metrics-server"
# 
#   # If you are using calico on AWS
# 
#      network:
#        plugin: calico
#        calico_network_provider:
#          cloud_provider: aws
# 
#   # To specify flannel interface
# 
#      network:
#        plugin: flannel
#        flannel_network_provider:
#          iface: eth1
# 
#   # To specify flannel interface for canal plugin
# 
#      network:
#        plugin: canal
#        canal_network_provider:
#          iface: eth1
# 
network: 
  options: 
    flannel_backend_type: "vxlan"
  plugin: "canal"
restore: 
  restore: false
# 
#      services:
#        kube-api:
#          service_cluster_ip_range: 10.43.0.0/16
#        kube-controller:
#          cluster_cidr: 10.42.0.0/16
#          service_cluster_ip_range: 10.43.0.0/16
#        kubelet:
#          cluster_domain: cluster.local
#          cluster_dns_server: 10.43.0.10
# 
services: 
  etcd: 
    backup_config: 
      enabled: true
      interval_hours: 12
      retention: 6
    creation: "12h"
    extra_args: 
      election-timeout: "5000"
      heartbeat-interval: "500"
    retention: "72h"
    snapshot: false
  kube-api: 
    always_pull_images: false
    pod_security_policy: false
    service_node_port_range: "30000-32767"
  kubelet: 
    fail_swap_on: false
ssh_agent_auth: false
# 
#   # Rancher Config
# 
docker_root_dir: "/var/lib/docker"
enable_cluster_alerting: false
enable_cluster_monitoring: false
enable_network_policy: false
local_cluster_auth_endpoint: 
  enabled: false
name: "test-submariner-broker"

3) create east cluster

two nodes (1 - etcd, worker; 2 - control panel, worker)
deploy submariner

deploy simple nginx:alpine (global)

addon_job_timeout: 30
authentication: 
strategy: "x509"
bastion_host: 
ssh_agent_auth: false
dns: 
provider: "kube-dns"
ignore_docker_version: true
# 
#   # Currently only nginx ingress provider is supported.
#   # To disable ingress controller, set `provider: none`
#   # To enable ingress on specific nodes, use the node_selector, eg:
#      provider: nginx
#      node_selector:
#        app: ingress
# 
ingress: 
provider: "nginx"
kubernetes_version: "v1.13.5-rancher1-2"
monitoring: 
provider: "metrics-server"
# 
#   # If you are using calico on AWS
# 
#      network:
#        plugin: calico
#        calico_network_provider:
#          cloud_provider: aws
# 
#   # To specify flannel interface
# 
#      network:
#        plugin: flannel
#        flannel_network_provider:
#          iface: eth1
# 
#   # To specify flannel interface for canal plugin
# 
#      network:
#        plugin: canal
#        canal_network_provider:
#          iface: eth1
# 
network: 
options: 
flannel_backend_type: "vxlan"
plugin: "canal"
restore: 
restore: false
# 
#      services:
#        kube-api:
#          service_cluster_ip_range: 10.43.0.0/16
#        kube-controller:
#          cluster_cidr: 10.42.0.0/16
#          service_cluster_ip_range: 10.43.0.0/16
#        kubelet:
#          cluster_domain: cluster.local
#          cluster_dns_server: 10.43.0.10
# 
services: 
etcd: 
backup_config: 
  enabled: true
  interval_hours: 12
  retention: 6
creation: "12h"
extra_args: 
  election-timeout: "5000"
  heartbeat-interval: "500"
retention: "72h"
snapshot: false
kube-api: 
always_pull_images: false
pod_security_policy: false
service_cluster_ip_range: "10.99.0.0/16"
service_node_port_range: "30000-32767"
kube-controller: 
cluster_cidr: "10.98.0.0/16"
service_cluster_ip_range: "10.99.0.0/16"
kubelet: 
cluster_dns_server: "10.99.0.10"
cluster_domain: "east.local"
fail_swap_on: false
ssh_agent_auth: false
# 
#   # Rancher Config
# 
docker_root_dir: "/var/lib/docker"
enable_cluster_alerting: false
enable_cluster_monitoring: false
enable_network_policy: false
local_cluster_auth_endpoint: 
enabled: false
name: "test-submariner-east"

4) create west cluster

two nodes (1 - etcd, worker; 2 - control panel, worker)
deploy submariner
deploy simple nginx:alpine (global)

addon_job_timeout: 30
authentication: 
  strategy: "x509"
bastion_host: 
  ssh_agent_auth: false
dns: 
  provider: "kube-dns"
ignore_docker_version: true
# 
#   # Currently only nginx ingress provider is supported.
#   # To disable ingress controller, set `provider: none`
#   # To enable ingress on specific nodes, use the node_selector, eg:
#      provider: nginx
#      node_selector:
#        app: ingress
# 
ingress: 
  provider: "nginx"
kubernetes_version: "v1.13.5-rancher1-2"
monitoring: 
  provider: "metrics-server"
# 
#   # If you are using calico on AWS
# 
#      network:
#        plugin: calico
#        calico_network_provider:
#          cloud_provider: aws
# 
#   # To specify flannel interface
# 
#      network:
#        plugin: flannel
#        flannel_network_provider:
#          iface: eth1
# 
#   # To specify flannel interface for canal plugin
# 
#      network:
#        plugin: canal
#        canal_network_provider:
#          iface: eth1
# 
network: 
  options: 
    flannel_backend_type: "vxlan"
  plugin: "canal"
restore: 
  restore: false
# 
#      services:
#        kube-api:
#          service_cluster_ip_range: 10.43.0.0/16
#        kube-controller:
#          cluster_cidr: 10.42.0.0/16
#          service_cluster_ip_range: 10.43.0.0/16
#        kubelet:
#          cluster_domain: cluster.local
#          cluster_dns_server: 10.43.0.10
# 
services: 
  etcd: 
    backup_config: 
      enabled: true
      interval_hours: 12
      retention: 6
    creation: "12h"
    extra_args: 
      election-timeout: "5000"
      heartbeat-interval: "500"
    retention: "72h"
    snapshot: false
  kube-api: 
    always_pull_images: false
    pod_security_policy: false
    service_cluster_ip_range: "10.1.0.0/16"
    service_node_port_range: "30000-32767"
  kube-controller: 
    cluster_cidr: "10.0.0.0/16"
    service_cluster_ip_range: "10.1.0.0/16"
  kubelet: 
    cluster_dns_server: "10.1.0.10"
    cluster_domain: "west.local"
    fail_swap_on: false
ssh_agent_auth: false
# 
#   # Rancher Config
# 
docker_root_dir: "/var/lib/docker"
enable_cluster_alerting: false
enable_cluster_monitoring: false
enable_network_policy: false
local_cluster_auth_endpoint: 
  enabled: false
name: "test-submariner-west"

5) test!

on west we have 2 nginx pods 10.0.0.5 10.0.1.4 on east we have 2 nginx pods 10.98.1.5 10.98.0.4

interest things

east cluster on server when deployed rancher/submariner (engine) we execute shell to pod and wget wests pods 10.98.1.5 and 10.98.0.4 its okay 200ok! but on server without submariner (engine) wget freeze and timeout
west cluster like east

conclusion:

Cross cluster network works only on the machine on which the engine is running, as a result, only one server sees all the pods of another cluster

Negashev commented 5 years ago

Test again with flannel on west and east, nothing changed Rancher

Oats87 commented 5 years ago

Are your nodes within the same subnet?

Negashev commented 5 years ago

@Oats87 Yes, all machines under 10.6.x.x (openstack)

And physically all 3 clusters in one data center (10Gb network)

Negashev commented 5 years ago

Repeated on rancher/rancher:v2.1.8 (what am I doing wrong)

Negashev commented 5 years ago

@Oats87 Okay play with rancher/rancher:v2.1.7 (Repeated :sob: :scream: )

start rancher server, add node driver and chart https://github.com/rancher/submariner-charts
start broker cluster machines: 1) Hostname submariner-broker-1 (etcd Control Plane Worker) IP Address 10.6.193.133 Kubelet Version v1.13.4 Kube Proxy Version v1.13.4 Docker Version 18.9.4 Kernel Version 4.15.0-45-generic Operating System Ubuntu 18.04.1 LTS Created 1:42 PM

addon_job_timeout: 30
authentication: 
  strategy: "x509"
bastion_host: 
  ssh_agent_auth: false
ignore_docker_version: true
ingress: 
  provider: "nginx"
kubernetes_version: "v1.13.4-rancher1-1"
monitoring: 
  provider: "metrics-server"
network: 
  options: 
    flannel_backend_type: "vxlan"
  plugin: "canal"
services: 
  etcd: 
    creation: "12h"
    extra_args: 
      election-timeout: "5000"
      heartbeat-interval: "500"
    retention: "72h"
    snapshot: true
  kube-api: 
    pod_security_policy: false
    service_node_port_range: "30000-32767"
  kubelet: 
    fail_swap_on: false
ssh_agent_auth: false

start first cluster (submariner-c1) machines: 1) Hostname submariner-c1-c1 (Control Plane Worker) IP Address 10.6.193.69 Kubelet Version v1.13.4 Kube Proxy Version v1.13.4 Docker Version 18.9.4 Kernel Version 4.15.0-45-generic Operating System Ubuntu 18.04.1 LTS Created 1:43 PM 2) Hostname submariner-c1-e1 (etcd Worker) IP Address 10.6.193.143 Kubelet Version v1.13.4 Kube Proxy Version v1.13.4 Docker Version 18.9.4 Kernel Version 4.15.0-45-generic Operating System Ubuntu 18.04.1 LTS Created 1:43 PM

addon_job_timeout: 30
authentication: 
  strategy: "x509"
bastion_host: 
  ssh_agent_auth: false
ignore_docker_version: true
ingress: 
  provider: "nginx"
kubernetes_version: "v1.13.4-rancher1-1"
monitoring: 
  provider: "metrics-server"
network: 
  options: 
    flannel_backend_type: "vxlan"
  plugin: "canal"
services: 
  etcd: 
    creation: "12h"
    extra_args: 
      election-timeout: "5000"
      heartbeat-interval: "500"
    retention: "72h"
    snapshot: true
  kube-api: 
    pod_security_policy: false
    service_cluster_ip_range: "10.61.0.0/16"
    service_node_port_range: "30000-32767"
  kube-controller: 
    cluster_cidr: "10.51.0.0/16"
    service_cluster_ip_range: "10.61.0.0/16"
  kubelet: 
    cluster_dns_server: "10.61.0.10"
    cluster_domain: "cluster1.local"
    fail_swap_on: false
ssh_agent_auth: false

start second cluster (submariner-c2) machines: 1) Hostname submariner-c2-c1 (Control Plane Worker) IP Address 10.6.193.99 Kubelet Version v1.13.4 Kube Proxy Version v1.13.4 Docker Version 18.9.4 Kernel Version 4.15.0-45-generic Operating System Ubuntu 18.04.1 LTS Created 1:44 PM 2) Hostname submariner-c2-e1 (etcd Worker) IP Address 10.6.193.95 Kubelet Version v1.13.4 Kube Proxy Version v1.13.4 Docker Version 18.9.4 Kernel Version 4.15.0-45-generic Operating System Ubuntu 18.04.1 LTS Created 1:44 PM

addon_job_timeout: 30
authentication: 
  strategy: "x509"
bastion_host: 
  ssh_agent_auth: false
ignore_docker_version: true
ingress: 
  provider: "nginx"
kubernetes_version: "v1.13.4-rancher1-1"
monitoring: 
  provider: "metrics-server"
network: 
  options: 
    flannel_backend_type: "vxlan"
  plugin: "canal"
services: 
  etcd: 
    creation: "12h"
    extra_args: 
      election-timeout: "5000"
      heartbeat-interval: "500"
    retention: "72h"
    snapshot: true
  kube-api: 
    pod_security_policy: false
    service_cluster_ip_range: "10.62.0.0/16"
    service_node_port_range: "30000-32767"
  kube-controller: 
    cluster_cidr: "10.52.0.0/16"
    service_cluster_ip_range: "10.62.0.0/16"
  kubelet: 
    cluster_dns_server: "10.62.0.10"
    cluster_domain: "cluster2.local"
    fail_swap_on: false
ssh_agent_auth: false

start broker form catalog on broker cluster (in Default project) on submariner-k8s-broker namespace

submariner-k8s-broker.rbac.create: true
submariner-k8s-broker.crd.create: true
submariner-k8s-broker.serviceAccounts.client.create: true

get all keys from kubectl on broker cluster

# Run kubectl commands inside here
# e.g. kubectl get all
> SUBMARINER_BROKER_NS=submariner-k8s-broker
> kubectl -n default get endpoints kubernetes -o jsonpath="{.subsets[0].addresses[0].ip}:{.subsets[0].ports[0].port}"
10.6.193.133:6443>
> kubectl -n ${SUBMARINER_BROKER_NS} get secrets -o jsonpath="{.items[?(@.metadata.annotations['kubernetes\.io/service-account\.name']=='${SUBMARINER_BROKER_NS}-client')].data['ca\.crt']}"
LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUN3akNDQWFxZ0F3SUJBZ0lCQURBTkJna3Foa2lHOXcwQkFRc0ZBREFTTVJBd0RnWURWUVFERXdkcmRXSmwKTFdOaE1CNFhEVEU1TURRd09URXdOVEF6TWxvWERUSTVNRFF3TmpFd05UQXpNbG93RWpFUU1BNEdBMVVFQXhNSAphM1ZpWlMxallUQ0NBU0l3RFFZSktvWklodmNOQVFFQkJRQURnZ0VQQURDQ0FRb0NnZ0VCQU1QNnhyTllVWkFSCmRDWlFzWEhqSkhGZGhaK3dhWWg2Qjd3Q2Z0Mm5qcTFmYndKV0ZoUzlITmJSeER0S3plWmU3MndQR3BLcGhHSEEKWGVRZUJkTHVQV0EzTjFBaFhBZkF0RVJMNFljS21kanpNa2psZUFHU09nMndYdXI4MitlN2k1STlOeitvVDRSLwo2Vmh2K0xvdkJnaUhUVGJuZGxSU25FVklIMGxpdzFKbUh4TVA3U2tjc0dwaG9VTTA4NVF1WnRxdVFTMkVaMWViClNTRnJVdGZCdWdVbE8wa2JKU3NUN2Z4NW8rWUNNUjM2NmZ5SVZPVnNCOWhtVzJSdHl2Nmxic0VIT3MxQmMxUWkKUTlUOEpCdHZrMUhjNWZVWVRMRE5xTVRnYTRrZDA2aTZFQ3lTR3IvZExKRFdSa3FsbERrQWxsaSt0QUhVYzdnbgphZ3B5OEJNMHl6Y0NBd0VBQWFNak1DRXdEZ1lEVlIwUEFRSC9CQVFEQWdLa01BOEdBMVVkRXdFQi93UUZNQU1CCkFmOHdEUVlKS29aSWh2Y05BUUVMQlFBRGdnRUJBRzFtRitXV2d4cEZYMjBqNS9FNTNDdmFST3B0NGszNHhCZloKa3d6cjNRY0xvRUNPV2pDZXRBUTJ2eHh4VXhidlRETE9wODBoRlE5aUtXQXA1b1lHLzhrV005ckphRTJwZUlTOApMRDV1RTZiZCszRXV5ZkhhU1Y5YzJqRmx3c2d2RDhKWnU0VlAyWjdpWURjWXZTRlA0SUVnYXBUVG01NUo3MnJaClU5QnRnWW5tbTk0NlZ6MmI1RmtvbklhR0RwdHk0a0tObWtIdGRvak5CUTJJcjRWejVjYW03Wi9WclVZQW41Z0oKNUJkd3p3Q2FjRnFscWdjNzYwMzUwQlh2eUNGWUVIcVc3T2gwTGZWaEdOK1U2dmg3Z2REMHpWMkc4eDM2QjVzMgo5N2ErUXZQTHdWSGZFd3RNQXd2ZnlHcDN6ZDdCbnVvazhRUGk0bFRvT3BVaFZzY281Y2s9Ci0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K>
> kubectl -n ${SUBMARINER_BROKER_NS} get secrets -o jsonpath="{.items[?(@.metadata.annotations['kubernetes\.io/service-account\.name']=='${SUBMARINER_BROKER_NS}-client')].data.token}"|base64 --decode
eyJhbGciOiJSUzI1NiIsImtpZCI6IiJ9.eyJpc3MiOiJrdWJlcm5ldGVzL3NlcnZpY2VhY2NvdW50Iiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9uYW1lc3BhY2UiOiJzdWJtYXJpbmVyLWs4cy1icm9rZXIiLCJrdWJlcm5ldGVzLmlvL3NlcnZpY2VhY2NvdW50L3NlY3JldC5uYW1lIjoic3VibWFyaW5lci1rOHMtYnJva2VyLWNsaWVudC10b2tlbi00amtxOSIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VydmljZS1hY2NvdW50Lm5hbWUiOiJzdWJtYXJpbmVyLWs4cy1icm9rZXItY2xpZW50Iiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9zZXJ2aWNlLWFjY291bnQudWlkIjoiY2ZmNWQ3NWItNWFiNi0xMWU5LTg2ZTMtZmExNjNlZTEzNjg4Iiwic3ViIjoic3lzdGVtOnNlcnZpY2VhY2NvdW50OnN1Ym1hcmluZXItazhzLWJyb2tlcjpzdWJtYXJpbmVyLWs4cy1icm9rZXItY2xpZW50In0.Jh7PCeN-rSx_M8e078AtfVqlYgIzoftMipg6pTxBKtEZ1lQ8GxmzhnCSMnFB_oMeH-ZA8YnQPAzsd0sQMAKVrJCcN_fSv9NjXEiBmd__eRRsc_AV_BwWKFwY2wKQjRkix8cq7Qz8WUwTdFh38xH6ePpcu8MVT5VIHEGIZUNmYulMHDsjrM1yZM-qtvGbwTxP0QDi1HY7vIs5NG4zdCcJ633Flp4ojkkcmA4noPy780DxntavFYJHvV9TGbHQS093-GbBLYlorrdzAmnAiJsSo6hXeWJ_f_UyJapBfW3hdWMouc-urs6zjdh3L8qocUxjFhpZ4VbSLtV1OCKLEr3_Ig>
> cat /dev/urandom | LC_CTYPE=C tr -dc 'a-zA-Z0-9' | fold -w 64 | head -n 1
l02qqUdYCZtbW8oGcwShOKGznzN9rP0hc2UDbgjRgxX5NyXBR9HqzQEL2USO1uy0
>

start submariner form catalog on cluster 1 (in Default project) on submariner namespace engine.nodeSelectorEnabled: 'false' <====== !!!!! (i'm think it's not problem)

defaultEngineImage: true
engine.image.repository: rancher/submariner
engine.image.tag: v0.0.1
defaultRouteAgentImage: true
routeAgent.image.repository: rancher/submariner-route-agent
routeAgent.image.tag: v0.0.1
engine.nodeSelectorEnabled: 'false'
submariner.clusterId: cluster1
ipsec.psk: l02qqUdYCZtbW8oGcwShOKGznzN9rP0hc2UDbgjRgxX5NyXBR9HqzQEL2USO1uy0
broker.type: k8s
broker.server: '10.6.193.133:6443'
broker.insecure: 'false'
broker.ca: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUN3akNDQWFxZ0F3SUJBZ0lCQURBTkJna3Foa2lHOXcwQkFRc0ZBREFTTVJBd0RnWURWUVFERXdkcmRXSmwKTFdOaE1CNFhEVEU1TURRd09URXdOVEF6TWxvWERUSTVNRFF3TmpFd05UQXpNbG93RWpFUU1BNEdBMVVFQXhNSAphM1ZpWlMxallUQ0NBU0l3RFFZSktvWklodmNOQVFFQkJRQURnZ0VQQURDQ0FRb0NnZ0VCQU1QNnhyTllVWkFSCmRDWlFzWEhqSkhGZGhaK3dhWWg2Qjd3Q2Z0Mm5qcTFmYndKV0ZoUzlITmJSeER0S3plWmU3MndQR3BLcGhHSEEKWGVRZUJkTHVQV0EzTjFBaFhBZkF0RVJMNFljS21kanpNa2psZUFHU09nMndYdXI4MitlN2k1STlOeitvVDRSLwo2Vmh2K0xvdkJnaUhUVGJuZGxSU25FVklIMGxpdzFKbUh4TVA3U2tjc0dwaG9VTTA4NVF1WnRxdVFTMkVaMWViClNTRnJVdGZCdWdVbE8wa2JKU3NUN2Z4NW8rWUNNUjM2NmZ5SVZPVnNCOWhtVzJSdHl2Nmxic0VIT3MxQmMxUWkKUTlUOEpCdHZrMUhjNWZVWVRMRE5xTVRnYTRrZDA2aTZFQ3lTR3IvZExKRFdSa3FsbERrQWxsaSt0QUhVYzdnbgphZ3B5OEJNMHl6Y0NBd0VBQWFNak1DRXdEZ1lEVlIwUEFRSC9CQVFEQWdLa01BOEdBMVVkRXdFQi93UUZNQU1CCkFmOHdEUVlKS29aSWh2Y05BUUVMQlFBRGdnRUJBRzFtRitXV2d4cEZYMjBqNS9FNTNDdmFST3B0NGszNHhCZloKa3d6cjNRY0xvRUNPV2pDZXRBUTJ2eHh4VXhidlRETE9wODBoRlE5aUtXQXA1b1lHLzhrV005ckphRTJwZUlTOApMRDV1RTZiZCszRXV5ZkhhU1Y5YzJqRmx3c2d2RDhKWnU0VlAyWjdpWURjWXZTRlA0SUVnYXBUVG01NUo3MnJaClU5QnRnWW5tbTk0NlZ6MmI1RmtvbklhR0RwdHk0a0tObWtIdGRvak5CUTJJcjRWejVjYW03Wi9WclVZQW41Z0oKNUJkd3p3Q2FjRnFscWdjNzYwMzUwQlh2eUNGWUVIcVc3T2gwTGZWaEdOK1U2dmg3Z2REMHpWMkc4eDM2QjVzMgo5N2ErUXZQTHdWSGZFd3RNQXd2ZnlHcDN6ZDdCbnVvazhRUGk0bFRvT3BVaFZzY281Y2s9Ci0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K
broker.token: eyJhbGciOiJSUzI1NiIsImtpZCI6IiJ9.eyJpc3MiOiJrdWJlcm5ldGVzL3NlcnZpY2VhY2NvdW50Iiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9uYW1lc3BhY2UiOiJzdWJtYXJpbmVyLWs4cy1icm9rZXIiLCJrdWJlcm5ldGVzLmlvL3NlcnZpY2VhY2NvdW50L3NlY3JldC5uYW1lIjoic3VibWFyaW5lci1rOHMtYnJva2VyLWNsaWVudC10b2tlbi00amtxOSIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VydmljZS1hY2NvdW50Lm5hbWUiOiJzdWJtYXJpbmVyLWs4cy1icm9rZXItY2xpZW50Iiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9zZXJ2aWNlLWFjY291bnQudWlkIjoiY2ZmNWQ3NWItNWFiNi0xMWU5LTg2ZTMtZmExNjNlZTEzNjg4Iiwic3ViIjoic3lzdGVtOnNlcnZpY2VhY2NvdW50OnN1Ym1hcmluZXItazhzLWJyb2tlcjpzdWJtYXJpbmVyLWs4cy1icm9rZXItY2xpZW50In0.Jh7PCeN-rSx_M8e078AtfVqlYgIzoftMipg6pTxBKtEZ1lQ8GxmzhnCSMnFB_oMeH-ZA8YnQPAzsd0sQMAKVrJCcN_fSv9NjXEiBmd__eRRsc_AV_BwWKFwY2wKQjRkix8cq7Qz8WUwTdFh38xH6ePpcu8MVT5VIHEGIZUNmYulMHDsjrM1yZM-qtvGbwTxP0QDi1HY7vIs5NG4zdCcJ633Flp4ojkkcmA4noPy780DxntavFYJHvV9TGbHQS093-GbBLYlorrdzAmnAiJsSo6hXeWJ_f_UyJapBfW3hdWMouc-urs6zjdh3L8qocUxjFhpZ4VbSLtV1OCKLEr3_Ig
broker.namespace: submariner-k8s-broker
submariner.clusterCidr: 10.51.0.0/16
submariner.serviceCidr: 10.61.0.0/16
submariner.natEnabled: 'false'
crd.create: true
submariner.debug: 'false'
ipsec.debug: 'false'

start submariner form catalog on cluster 2 (in Default project) on submariner namespace engine.nodeSelectorEnabled: 'false' <====== !!!!! (i'm think it's not problem)

defaultEngineImage: true
engine.image.repository: rancher/submariner
engine.image.tag: v0.0.1
defaultRouteAgentImage: true
routeAgent.image.repository: rancher/submariner-route-agent
routeAgent.image.tag: v0.0.1
engine.nodeSelectorEnabled: 'false'
submariner.clusterId: cluster2
ipsec.psk: l02qqUdYCZtbW8oGcwShOKGznzN9rP0hc2UDbgjRgxX5NyXBR9HqzQEL2USO1uy0
broker.type: k8s
broker.server: '10.6.193.133:6443'
broker.insecure: 'false'
broker.ca: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSUN3akNDQWFxZ0F3SUJBZ0lCQURBTkJna3Foa2lHOXcwQkFRc0ZBREFTTVJBd0RnWURWUVFERXdkcmRXSmwKTFdOaE1CNFhEVEU1TURRd09URXdOVEF6TWxvWERUSTVNRFF3TmpFd05UQXpNbG93RWpFUU1BNEdBMVVFQXhNSAphM1ZpWlMxallUQ0NBU0l3RFFZSktvWklodmNOQVFFQkJRQURnZ0VQQURDQ0FRb0NnZ0VCQU1QNnhyTllVWkFSCmRDWlFzWEhqSkhGZGhaK3dhWWg2Qjd3Q2Z0Mm5qcTFmYndKV0ZoUzlITmJSeER0S3plWmU3MndQR3BLcGhHSEEKWGVRZUJkTHVQV0EzTjFBaFhBZkF0RVJMNFljS21kanpNa2psZUFHU09nMndYdXI4MitlN2k1STlOeitvVDRSLwo2Vmh2K0xvdkJnaUhUVGJuZGxSU25FVklIMGxpdzFKbUh4TVA3U2tjc0dwaG9VTTA4NVF1WnRxdVFTMkVaMWViClNTRnJVdGZCdWdVbE8wa2JKU3NUN2Z4NW8rWUNNUjM2NmZ5SVZPVnNCOWhtVzJSdHl2Nmxic0VIT3MxQmMxUWkKUTlUOEpCdHZrMUhjNWZVWVRMRE5xTVRnYTRrZDA2aTZFQ3lTR3IvZExKRFdSa3FsbERrQWxsaSt0QUhVYzdnbgphZ3B5OEJNMHl6Y0NBd0VBQWFNak1DRXdEZ1lEVlIwUEFRSC9CQVFEQWdLa01BOEdBMVVkRXdFQi93UUZNQU1CCkFmOHdEUVlKS29aSWh2Y05BUUVMQlFBRGdnRUJBRzFtRitXV2d4cEZYMjBqNS9FNTNDdmFST3B0NGszNHhCZloKa3d6cjNRY0xvRUNPV2pDZXRBUTJ2eHh4VXhidlRETE9wODBoRlE5aUtXQXA1b1lHLzhrV005ckphRTJwZUlTOApMRDV1RTZiZCszRXV5ZkhhU1Y5YzJqRmx3c2d2RDhKWnU0VlAyWjdpWURjWXZTRlA0SUVnYXBUVG01NUo3MnJaClU5QnRnWW5tbTk0NlZ6MmI1RmtvbklhR0RwdHk0a0tObWtIdGRvak5CUTJJcjRWejVjYW03Wi9WclVZQW41Z0oKNUJkd3p3Q2FjRnFscWdjNzYwMzUwQlh2eUNGWUVIcVc3T2gwTGZWaEdOK1U2dmg3Z2REMHpWMkc4eDM2QjVzMgo5N2ErUXZQTHdWSGZFd3RNQXd2ZnlHcDN6ZDdCbnVvazhRUGk0bFRvT3BVaFZzY281Y2s9Ci0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K
broker.token: eyJhbGciOiJSUzI1NiIsImtpZCI6IiJ9.eyJpc3MiOiJrdWJlcm5ldGVzL3NlcnZpY2VhY2NvdW50Iiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9uYW1lc3BhY2UiOiJzdWJtYXJpbmVyLWs4cy1icm9rZXIiLCJrdWJlcm5ldGVzLmlvL3NlcnZpY2VhY2NvdW50L3NlY3JldC5uYW1lIjoic3VibWFyaW5lci1rOHMtYnJva2VyLWNsaWVudC10b2tlbi00amtxOSIsImt1YmVybmV0ZXMuaW8vc2VydmljZWFjY291bnQvc2VydmljZS1hY2NvdW50Lm5hbWUiOiJzdWJtYXJpbmVyLWs4cy1icm9rZXItY2xpZW50Iiwia3ViZXJuZXRlcy5pby9zZXJ2aWNlYWNjb3VudC9zZXJ2aWNlLWFjY291bnQudWlkIjoiY2ZmNWQ3NWItNWFiNi0xMWU5LTg2ZTMtZmExNjNlZTEzNjg4Iiwic3ViIjoic3lzdGVtOnNlcnZpY2VhY2NvdW50OnN1Ym1hcmluZXItazhzLWJyb2tlcjpzdWJtYXJpbmVyLWs4cy1icm9rZXItY2xpZW50In0.Jh7PCeN-rSx_M8e078AtfVqlYgIzoftMipg6pTxBKtEZ1lQ8GxmzhnCSMnFB_oMeH-ZA8YnQPAzsd0sQMAKVrJCcN_fSv9NjXEiBmd__eRRsc_AV_BwWKFwY2wKQjRkix8cq7Qz8WUwTdFh38xH6ePpcu8MVT5VIHEGIZUNmYulMHDsjrM1yZM-qtvGbwTxP0QDi1HY7vIs5NG4zdCcJ633Flp4ojkkcmA4noPy780DxntavFYJHvV9TGbHQS093-GbBLYlorrdzAmnAiJsSo6hXeWJ_f_UyJapBfW3hdWMouc-urs6zjdh3L8qocUxjFhpZ4VbSLtV1OCKLEr3_Ig
broker.namespace: submariner-k8s-broker
submariner.clusterCidr: 10.52.0.0/16
submariner.serviceCidr: 10.62.0.0/16
submariner.natEnabled: 'false'
crd.create: true
submariner.debug: 'false'
ipsec.debug: 'false'

Oats87 commented 5 years ago

@Negashev can you check the routing table on the other nodes (that are not the gateway host) to see if they were properly installed? You should see routing rules for the other cluster service/cluster CIDR's

Negashev commented 5 years ago

@Oats87 You mean ip route on machines? It's show route with cidr from another cluster only on one machine, on which he first started

Negashev commented 5 years ago

pod_on_cluster_1_machine_with_SUBmaster traceroute to pod_on_cluster_2 / # traceroute 10.98.0.14 traceroute to 10.98.0.14 (10.98.0.14), 30 hops max, 46 byte packets 1 10.76.0.1 (10.76.0.1) 0.009 ms 0.007 ms 0.092 ms 2 machine_with_SUBmaster_CLUSTER_2 (10.6.193.143) 0.554 ms 0.510 ms 0.403 ms 3 10.98.0.14 (10.98.0.14) 0.273 ms 0.560 ms 0.159 ms

pod_on_cluster_1_machine_WITHOUTSUBmaster / # traceroute 10.98.0.14 traceroute to 10.98.0.14 (10.98.0.14), 30 hops max, 46 byte packets 1 10.76.2.1 (10.76.2.1) 0.019 ms 0.010 ms 0.005 ms 2 machinewith_SUBmaster_CLUSTER_1 (10.6.193.144) 0.520 ms 0.536 ms 0.396 ms 3 4 5 6 7 8 9 10 11 *

Negashev commented 5 years ago

Repeate on Rancher 2.2.1 with hetzner cloud, Ubuntu 16 and flannel

Negashev commented 5 years ago

We play with tcpdump and ping. Find problem with reply packets to ping

it lose when node 1 (cluster 2) send reply to node 2 (cluster 2), but node 2 give nothing

Rancher (2)

Negashev commented 5 years ago

Repeat on Rancher 2.2.2 RKE flannel CentOS Linux 7 3.10.0-957.1.3.el7.x86_64

Oats87 commented 5 years ago

@Negashev Does the Hetzler Cloud enforce strict IP src/dst checks?

https://docs.aws.amazon.com/vpc/latest/userguide/VPC_NAT_Instance.html#EIP_Disable_SrcDestCheck https://cloud.google.com/vpc/docs/using-routes#canipforward https://docs.microsoft.com/en-us/azure/virtual-network/virtual-network-network-interface#enable-or-disable-ip-forwarding

for the major 3 US clouds

Negashev commented 5 years ago

@Oats87 Wow ... hard question, I don't have extensive knowledge in this area, but I think not.

Negashev commented 5 years ago

@Oats87 We didn't use NAT on hetzner cloud and on our local cloud with 10.6.x.x cidr for machines

Negashev commented 5 years ago

We are turn off port-security on openstack and it helped (with reload nodes)

Negashev commented 2 years ago

Still does not work in hetzner (with rke2)

submariner-io / submariner