Docker registry on Glusterfs storage error

Asgoret commented 7 years ago

Description

Can`t deploy docker registry on glusterfs storage.

Version

ansible 2.3.1.0
  config file = /etc/ansible/ansible.cfg
  configured module search path = Default w/o overrides
  python version = 2.7.5 (default, Nov  6 2016, 00:28:07) [GCC 4.8.5 20150623 (Red Hat 4.8.5-11)]

git describe:
openshift-ansible-3.7.0-0.125.0-2-g3384369

uname -a:
Linux openshift-%name% 3.10.0-514.el7.x86_64 #1 SMP Tue Nov 22 16:42:41 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux

Steps To Reproduce

Edit /etc/ansible/inventory


[OSEv3:children]
masters
nodes
etcd
glusterfs
glusterfs_registry

[OSEv3:vars] ansible_ssh_user=root openshift_deployment_type=origin containerized=false osm_use_cockpit=true openshift_storage_glusterfs_is_native=False openshift_storage_glusterfs_heketi_url=10.5.135.185 <- master openshift_hosted_registry_storage_kind=glusterfs

[masters] openshift-master

[etcd] openshift-master

[nodes] openshift-master openshift_schedulable=false openshift-node1 openshift_node_labels="{'region': 'primary', 'zone': 'firstzone'}" openshift-node2 openshift_node_labels="{'region': 'primary', 'zone': 'secondzone'}" openshift-gluster1 openshift_schedulable=true openshift_node_labels="{'region': 'infra'}" openshift-gluster2 openshift_schedulable=true openshift_node_labels="{'region': 'infra'}" openshift-gluster3 openshift_schedulable=true openshift_node_labels="{'region': 'infra'}"

[glusterfs] openshift-gluster4 glusterfs_devices='[ "/dev/sdb" ]' openshift-gluster5 glusterfs_devices='[ "/dev/sdb" ]' openshift-gluster6 glusterfs_devices='[ "/dev/sdb" ]'

[glusterfs_registry] openshift-gluster1 glusterfs_devices='[ "/dev/sdb" ]' openshift-gluster2 glusterfs_devices='[ "/dev/sdb" ]' openshift-gluster3 glusterfs_devices='[ "/dev/sdb" ]'

2. From /etc/ansible run:

ansible-playbook -i ./inventory /opt/env/openshift-ansible/playbooks/byo/config.yml


##### Expected Results

Error:

TASK [openshift_hosted : Wait for registry pods] ** FAILED - RETRYING: Wait for registry pods (60 retries left). ... FAILED - RETRYING: Wait for registry pods (1 retries left). fatal: [openshift-master]: FAILED! => {"attempts": 60, "changed": false, "failed": true, "results": {"cmd": "/usr/bin/oc get pod --selector=docker-registry=default -o json -n default", "results": [{"apiVersion": "v1", "items": [{"apiVersion": "v1", "kind": "Pod", "metadata": {"annotations": {"kubernetes.io/created-by": "{\"kind\":\"SerializedReference\",\"apiVersion\":\"v1\",\"reference\":{\"kind\":\"ReplicationController\",\"namespace\":\"default\",\"name\":\"docker-registry-1\",\"uid\":\"1a224e3d-8da1-11e7-9026-00505693371a\",\"apiVersion\":\"v1\",\"resourceVersion\":\"1867\"}}\n", "openshift.io/deployment-config.latest-version": "1", "openshift.io/deployment-config.name": "docker-registry", "openshift.io/deployment.name": "docker-registry-1", "openshift.io/scc": "hostnetwork"}, "creationTimestamp": "2017-08-30T16:35:40Z", "generateName": "docker-registry-1-", "labels": {"deployment": "docker-registry-1", "deploymentconfig": "docker-registry", "docker-registry": "default"}, "name": "docker-registry-1-9pks4", "namespace": "default", "ownerReferences": [{"apiVersion": "v1", "blockOwnerDeletion": true, "controller": true, "kind": "ReplicationController", "name": "docker-registry-1", "uid": "1a224e3d-8da1-11e7-9026-00505693371a"}], "resourceVersion": "1879", "selfLink": "/api/v1/namespaces/default/pods/docker-registry-1-9pks4", "uid": "42930ff7-8da1-11e7-9026-00505693371a"}, "spec": {"containers": [{"env": [{"name": "REGISTRY_HTTP_ADDR", "value": ":5000"}, {"name": "REGISTRY_HTTP_NET", "value": "tcp"}, {"name": "REGISTRY_HTTP_SECRET", "value": "BGzdoN8TjdXyko7FZJBQAWZ7lYeBKDYfyJOBhHhCkhs="}, {"name": "REGISTRY_MIDDLEWARE_REPOSITORY_OPENSHIFT_ENFORCEQUOTA", "value": "false"}, {"name": "OPENSHIFT_DEFAULT_REGISTRY", "value": "docker-registry.default.svc:5000"}, {"name": "REGISTRY_HTTP_TLS_KEY", "value": "/etc/secrets/registry.key"}, {"name": "REGISTRY_HTTP_TLS_CERTIFICATE", "value": "/etc/secrets/registry.crt"}], "image": "openshift/origin-docker-registry:v3.6.0", "imagePullPolicy": "IfNotPresent", "livenessProbe": {"failureThreshold": 3, "httpGet": {"path": "/healthz", "port": 5000, "scheme": "HTTPS"}, "initialDelaySeconds": 10, "periodSeconds": 10, "successThreshold": 1, "timeoutSeconds": 5}, "name": "registry", "ports": [{"containerPort": 5000, "protocol": "TCP"}], "readinessProbe": {"failureThreshold": 3, "httpGet": {"path": "/healthz", "port": 5000, "scheme": "HTTPS"}, "periodSeconds": 10, "successThreshold": 1, "timeoutSeconds": 5}, "resources": {"requests": {"cpu": "100m", "memory": "256Mi"}}, "securityContext": {"capabilities": {"drop": ["KILL", "MKNOD", "SETGID", "SETUID", "SYS_CHROOT"]}, "privileged": false, "runAsUser": 1000030000, "seLinuxOptions": {"level": "s0:c6,c0"}}, "terminationMessagePath": "/dev/termination-log", "terminationMessagePolicy": "File", "volumeMounts": [{"mountPath": "/registry", "name": "registry-storage"}, {"mountPath": "/etc/secrets", "name": "registry-certificates"}, {"mountPath": "/var/run/secrets/kubernetes.io/serviceaccount", "name": "registry-token-j83qn", "readOnly": true}]}], "dnsPolicy": "ClusterFirst", "imagePullSecrets": [{"name": "registry-dockercfg-jpnq9"}], "nodeName": "openshift-gluster2", "nodeSelector": {"region": "infra"}, "restartPolicy": "Always", "schedulerName": "default-scheduler", "securityContext": {"fsGroup": 1000030000, "seLinuxOptions": {"level": "s0:c6,c0"}, "supplementalGroups": [1000030000]}, "serviceAccount": "registry", "serviceAccountName": "registry", "terminationGracePeriodSeconds": 30, "volumes": [{"name": "registry-storage", "persistentVolumeClaim": {"claimName": "registry-claim"}}, {"name": "registry-certificates", "secret": {"defaultMode": 420, "secretName": "registry-certificates"}}, {"name": "registry-token-j83qn", "secret": {"defaultMode": 420, "secretName": "registry-token-j83qn"}}]}, "status": {"conditions": [{"lastProbeTime": null, "lastTransitionTime": "2017-09-05T14:27:37Z", "status": "True", "type": "Initialized"}, {"lastProbeTime": null, "lastTransitionTime": "2017-09-05T14:27:37Z", "message": "containers with unready status: [registry]", "reason": "ContainersNotReady", "status": "False", "type": "Ready"}, {"lastProbeTime": null, "lastTransitionTime": "2017-08-30T16:35:40Z", "status": "True", "type": "PodScheduled"}], "containerStatuses": [{"image": "openshift/origin-docker-registry:v3.6.0", "imageID": "", "lastState": {}, "name": "registry", "ready": false, "restartCount": 0, "state": {"waiting": {"reason": "ContainerCreating"}}}], "hostIP": "10.5.135.170", "phase": "Pending", "qosClass": "Burstable", "startTime": "2017-09-05T14:27:37Z"}}, {"apiVersion": "v1", "kind": "Pod", "metadata": {"annotations": {"kubernetes.io/created-by": "{\"kind\":\"SerializedReference\",\"apiVersion\":\"v1\",\"reference\":{\"kind\":\"ReplicationController\",\"namespace\":\"default\",\"name\":\"docker-registry-1\",\"uid\":\"1a224e3d-8da1-11e7-9026-00505693371a\",\"apiVersion\":\"v1\",\"resourceVersion\":\"1867\"}}\n", "openshift.io/deployment-config.latest-version": "1", "openshift.io/deployment-config.name": "docker-registry", "openshift.io/deployment.name": "docker-registry-1", "openshift.io/scc": "hostnetwork"}, "creationTimestamp": "2017-08-30T16:35:40Z", "generateName": "docker-registry-1-", "labels": {"deployment": "docker-registry-1", "deploymentconfig": "docker-registry", "docker-registry": "default"}, "name": "docker-registry-1-ppzqk", "namespace": "default", "ownerReferences": [{"apiVersion": "v1", "blockOwnerDeletion": true, "controller": true, "kind": "ReplicationController", "name": "docker-registry-1", "uid": "1a224e3d-8da1-11e7-9026-00505693371a"}], "resourceVersion": "1881", "selfLink": "/api/v1/namespaces/default/pods/docker-registry-1-ppzqk", "uid": "42930c52-8da1-11e7-9026-00505693371a"}, "spec": {"containers": [{"env": [{"name": "REGISTRY_HTTP_ADDR", "value": ":5000"}, {"name": "REGISTRY_HTTP_NET", "value": "tcp"}, {"name": "REGISTRY_HTTP_SECRET", "value": "BGzdoN8TjdXyko7FZJBQAWZ7lYeBKDYfyJOBhHhCkhs="}, {"name": "REGISTRY_MIDDLEWARE_REPOSITORY_OPENSHIFT_ENFORCEQUOTA", "value": "false"}, {"name": "OPENSHIFT_DEFAULT_REGISTRY", "value": "docker-registry.default.svc:5000"}, {"name": "REGISTRY_HTTP_TLS_KEY", "value": "/etc/secrets/registry.key"}, {"name": "REGISTRY_HTTP_TLS_CERTIFICATE", "value": "/etc/secrets/registry.crt"}], "image": "openshift/origin-docker-registry:v3.6.0", "imagePullPolicy": "IfNotPresent", "livenessProbe": {"failureThreshold": 3, "httpGet": {"path": "/healthz", "port": 5000, "scheme": "HTTPS"}, "initialDelaySeconds": 10, "periodSeconds": 10, "successThreshold": 1, "timeoutSeconds": 5}, "name": "registry", "ports": [{"containerPort": 5000, "protocol": "TCP"}], "readinessProbe": {"failureThreshold": 3, "httpGet": {"path": "/healthz", "port": 5000, "scheme": "HTTPS"}, "periodSeconds": 10, "successThreshold": 1, "timeoutSeconds": 5}, "resources": {"requests": {"cpu": "100m", "memory": "256Mi"}}, "securityContext": {"capabilities": {"drop": ["KILL", "MKNOD", "SETGID", "SETUID", "SYS_CHROOT"]}, "privileged": false, "runAsUser": 1000030000, "seLinuxOptions": {"level": "s0:c6,c0"}}, "terminationMessagePath": "/dev/termination-log", "terminationMessagePolicy": "File", "volumeMounts": [{"mountPath": "/registry", "name": "registry-storage"}, {"mountPath": "/etc/secrets", "name": "registry-certificates"}, {"mountPath": "/var/run/secrets/kubernetes.io/serviceaccount", "name": "registry-token-j83qn", "readOnly": true}]}], "dnsPolicy": "ClusterFirst", "imagePullSecrets": [{"name": "registry-dockercfg-jpnq9"}], "nodeName": "openshift-gluster3", "nodeSelector": {"region": "infra"}, "restartPolicy": "Always", "schedulerName": "default-scheduler", "securityContext": {"fsGroup": 1000030000, "seLinuxOptions": {"level": "s0:c6,c0"}, "supplementalGroups": [1000030000]}, "serviceAccount": "registry", "serviceAccountName": "registry", "terminationGracePeriodSeconds": 30, "volumes": [{"name": "registry-storage", "persistentVolumeClaim": {"claimName": "registry-claim"}}, {"name": "registry-certificates", "secret": {"defaultMode": 420, "secretName": "registry-certificates"}}, {"name": "registry-token-j83qn", "secret": {"defaultMode": 420, "secretName": "registry-token-j83qn"}}]}, "status": {"conditions": [{"lastProbeTime": null, "lastTransitionTime": "2017-09-05T14:29:59Z", "status": "True", "type": "Initialized"}, {"lastProbeTime": null, "lastTransitionTime": "2017-09-05T14:29:59Z", "message": "containers with unready status: [registry]", "reason": "ContainersNotReady", "status": "False", "type": "Ready"}, {"lastProbeTime": null, "lastTransitionTime": "2017-08-30T16:35:40Z", "status": "True", "type": "PodScheduled"}], "containerStatuses": [{"image": "openshift/origin-docker-registry:v3.6.0", "imageID": "", "lastState": {}, "name": "registry", "ready": false, "restartCount": 0, "state": {"waiting": {"reason": "ContainerCreating"}}}], "hostIP": "10.5.135.169", "phase": "Pending", "qosClass": "Burstable", "startTime": "2017-09-05T14:29:59Z"}}, {"apiVersion": "v1", "kind": "Pod", "metadata": {"annotations": {"kubernetes.io/created-by": "{\"kind\":\"SerializedReference\",\"apiVersion\":\"v1\",\"reference\":{\"kind\":\"ReplicationController\",\"namespace\":\"default\",\"name\":\"docker-registry-1\",\"uid\":\"1a224e3d-8da1-11e7-9026-00505693371a\",\"apiVersion\":\"v1\",\"resourceVersion\":\"1867\"}}\n", "openshift.io/deployment-config.latest-version": "1", "openshift.io/deployment-config.name": "docker-registry", "openshift.io/deployment.name": "docker-registry-1", "openshift.io/scc": "hostnetwork"}, "creationTimestamp": "2017-08-30T16:35:40Z", "generateName": "docker-registry-1-", "labels": {"deployment": "docker-registry-1", "deploymentconfig": "docker-registry", "docker-registry": "default"}, "name": "docker-registry-1-vtf5r", "namespace": "default", "ownerReferences": [{"apiVersion": "v1", "blockOwnerDeletion": true, "controller": true, "kind": "ReplicationController", "name": "docker-registry-1", "uid": "1a224e3d-8da1-11e7-9026-00505693371a"}], "resourceVersion": "1877", "selfLink": "/api/v1/namespaces/default/pods/docker-registry-1-vtf5r", "uid": "4292f440-8da1-11e7-9026-00505693371a"}, "spec": {"containers": [{"env": [{"name": "REGISTRY_HTTP_ADDR", "value": ":5000"}, {"name": "REGISTRY_HTTP_NET", "value": "tcp"}, {"name": "REGISTRY_HTTP_SECRET", "value": "BGzdoN8TjdXyko7FZJBQAWZ7lYeBKDYfyJOBhHhCkhs="}, {"name": "REGISTRY_MIDDLEWARE_REPOSITORY_OPENSHIFT_ENFORCEQUOTA", "value": "false"}, {"name": "OPENSHIFT_DEFAULT_REGISTRY", "value": "docker-registry.default.svc:5000"}, {"name": "REGISTRY_HTTP_TLS_KEY", "value": "/etc/secrets/registry.key"}, {"name": "REGISTRY_HTTP_TLS_CERTIFICATE", "value": "/etc/secrets/registry.crt"}], "image": "openshift/origin-docker-registry:v3.6.0", "imagePullPolicy": "IfNotPresent", "livenessProbe": {"failureThreshold": 3, "httpGet": {"path": "/healthz", "port": 5000, "scheme": "HTTPS"}, "initialDelaySeconds": 10, "periodSeconds": 10, "successThreshold": 1, "timeoutSeconds": 5}, "name": "registry", "ports": [{"containerPort": 5000, "protocol": "TCP"}], "readinessProbe": {"failureThreshold": 3, "httpGet": {"path": "/healthz", "port": 5000, "scheme": "HTTPS"}, "periodSeconds": 10, "successThreshold": 1, "timeoutSeconds": 5}, "resources": {"requests": {"cpu": "100m", "memory": "256Mi"}}, "securityContext": {"capabilities": {"drop": ["KILL", "MKNOD", "SETGID", "SETUID", "SYS_CHROOT"]}, "privileged": false, "runAsUser": 1000030000, "seLinuxOptions": {"level": "s0:c6,c0"}}, "terminationMessagePath": "/dev/termination-log", "terminationMessagePolicy": "File", "volumeMounts": [{"mountPath": "/registry", "name": "registry-storage"}, {"mountPath": "/etc/secrets", "name": "registry-certificates"}, {"mountPath": "/var/run/secrets/kubernetes.io/serviceaccount", "name": "registry-token-j83qn", "readOnly": true}]}], "dnsPolicy": "ClusterFirst", "imagePullSecrets": [{"name": "registry-dockercfg-jpnq9"}], "nodeName": "openshift-gluster1", "nodeSelector": {"region": "infra"}, "restartPolicy": "Always", "schedulerName": "default-scheduler", "securityContext": {"fsGroup": 1000030000, "seLinuxOptions": {"level": "s0:c6,c0"}, "supplementalGroups": [1000030000]}, "serviceAccount": "registry", "serviceAccountName": "registry", "terminationGracePeriodSeconds": 30, "volumes": [{"name": "registry-storage", "persistentVolumeClaim": {"claimName": "registry-claim"}}, {"name": "registry-certificates", "secret": {"defaultMode": 420, "secretName": "registry-certificates"}}, {"name": "registry-token-j83qn", "secret": {"defaultMode": 420, "secretName": "registry-token-j83qn"}}]}, "status": {"conditions": [{"lastProbeTime": null, "lastTransitionTime": "2017-09-05T14:25:11Z", "status": "True", "type": "Initialized"}, {"lastProbeTime": null, "lastTransitionTime": "2017-09-05T14:25:11Z", "message": "containers with unready status: [registry]", "reason": "ContainersNotReady", "status": "False", "type": "Ready"}, {"lastProbeTime": null, "lastTransitionTime": "2017-08-30T16:35:40Z", "status": "True", "type": "PodScheduled"}], "containerStatuses": [{"image": "openshift/origin-docker-registry:v3.6.0", "imageID": "", "lastState": {}, "name": "registry", "ready": false, "restartCount": 0, "state": {"waiting": {"reason": "ContainerCreating"}}}], "hostIP": "10.5.135.171", "phase": "Pending", "qosClass": "Burstable", "startTime": "2017-09-05T14:25:11Z"}}], "kind": "List", "metadata": {}, "resourceVersion": "", "selfLink": ""}], "returncode": 0}, "state": "list"} to retry, use: --limit @/opt/env/openshift-ansible/playbooks/byo/config.retry

PLAY RECAP **** localhost : ok=13 changed=0 unreachable=0 failed=0
openshift-gluster1 : ok=158 changed=58 unreachable=0 failed=0
openshift-gluster2 : ok=158 changed=58 unreachable=0 failed=0
openshift-gluster3 : ok=158 changed=58 unreachable=0 failed=0
openshift-gluster4 : ok=35 changed=6 unreachable=0 failed=0
openshift-gluster5 : ok=35 changed=6 unreachable=0 failed=0
openshift-gluster6 : ok=35 changed=6 unreachable=0 failed=0
openshift-master : ok=518 changed=192 unreachable=0 failed=1
openshift-node1 : ok=160 changed=61 unreachable=0 failed=0
openshift-node2 : ok=160 changed=61 unreachable=0 failed=0

Failure summary:

1. Hosts:    openshift-master
   Play:     Create Hosted Resources
   Task:     Wait for registry pods
   Message:  Failed without returning a message.


Additional information:
Host:openshift-gluster1-6
Command: gluster volume list
Result:

No volumes present in cluster

Host: openshift-gluster1-3 (Registry)
Command: gluster peer status

Result: Number of Peers: 2

Hostname: openshift-gluster2 Uuid: 7693ed1f-1074-4529-805f-8c96fac44cf6 State: Peer in Cluster (Connected)

Hostname: openshift-gluster3 Uuid: 70481524-e2c2-49e9-9b4f-7199086fd21c State: Peer in Cluster (Connected)

Host: openshift-gluster4-6 (storage)
Command: gluster peer status
Result:
```Number of Peers: 2

Hostname: openshift-gluster5
Uuid: f7965f67-35e5-40c6-91b1-a620e462f4b7
State: Peer in Cluster (Connected)

Hostname: openshift-gluster6
Uuid: 930042d4-310f-402c-96d8-f87fad6bf07b
State: Peer in Cluster (Connected)

Host: openshift-master Command: oc get storageclass Result:

NAME                TYPE
glusterfs-storage   kubernetes.io/glusterfs

Command: oc get nodes Result:

NAME                 STATUS                     AGE       VERSION
openshift-gluster1   Ready                      44m       v1.6.1+5115d708d7
openshift-gluster2   Ready                      44m       v1.6.1+5115d708d7
openshift-gluster3   Ready                      44m       v1.6.1+5115d708d7
openshift-master     Ready,SchedulingDisabled   44m       v1.6.1+5115d708d7
openshift-node1      Ready                      44m       v1.6.1+5115d708d7
openshift-node2      Ready                      44m       v1.6.1+5115d708d7

Command: oc get pods Result:

NAME                       READY     STATUS    RESTARTS   AGE
docker-registry-1-deploy   0/1       Error     0          40m
router-1-4vt0w             1/1       Running   0          40m
router-1-9nf2f             1/1       Running   0          40m
router-1-ddgdx             1/1       Running   0          40m

sdodson commented 7 years ago

Can you please provide oc describe for the failing pods which should show why the registry is failing to deploy?

Asgoret commented 7 years ago

@sdodson Hi, sure. Host: openshift-master Command: oc describe pod/registry-pod Result: Error from server (NotFound): pods "registry-pod" not found

Coomand: oc get all Result:

NAME                 REVISION   DESIRED   CURRENT   TRIGGERED BY
dc/docker-registry   1          3         3         config
dc/router            1          3         3         config

NAME                   DESIRED   CURRENT   READY     AGE
rc/docker-registry-1   3         3         0         8m
rc/router-1            3         3         3         8m

NAME                     HOST/PORT                                                  PATH      SERVICES          PORT      TERMINATION   WILDCARD
routes/docker-registry   docker-registry-default.router.default.svc.cluster.local             docker-registry   <all>     passthrough   None

NAME                               CLUSTER-IP       EXTERNAL-IP   PORT(S)                   AGE
svc/docker-registry                172.30.192.1     <none>        5000/TCP                  8m
svc/glusterfs-registry-endpoints   172.30.66.45     <none>        1/TCP                     9m
svc/kubernetes                     172.30.0.1       <none>        443/TCP,53/UDP,53/TCP     17m
svc/router                         172.30.253.253   <none>        80/TCP,443/TCP,1936/TCP   8m

NAME                          READY     STATUS              RESTARTS   AGE
po/docker-registry-1-8jbz7    0/1       ContainerCreating   0          7m
po/docker-registry-1-deploy   1/1       Running             0          8m
po/docker-registry-1-hpnkh    0/1       ContainerCreating   0          7m
po/docker-registry-1-pqx1v    0/1       ContainerCreating   0          7m
po/router-1-0hq3l             1/1       Running             0          7m
po/router-1-33wfh             1/1       Running             0          7m
po/router-1-btm8j             1/1       Running             0          7m

sdodson commented 7 years ago

So, oc describe po/docker-registry-1-8jbz7 oc describe po/docker-registry-1-hpnkh oc describe po/docker-registry-1-pqx1v ? it looks like it's going but just very slowly? IT may have had to pull images.

sdodson commented 7 years ago

Also, oc logs po/docker-registry-1-deploy

Asgoret commented 7 years ago

@sdodson

[root@openshift-master ~]# oc describe po/docker-registry-1-8jbz7
Error from server (NotFound): pods "docker-registry-1-8jbz7" not found
[root@openshift-master ~]# oc describe po/docker-registry-1-hpnkh
Error from server (NotFound): pods "docker-registry-1-hpnkh" not found
[root@openshift-master ~]# oc describe po/docker-registry-1-pqx1v
Error from server (NotFound): pods "docker-registry-1-pqx1v" not found

[root@openshift-master ~]# oc logs po/docker-registry-1-deploy
--> Scaling docker-registry-1 to 3
--> Waiting up to 10m0s for pods in rc docker-registry-1 to become ready
error: update acceptor rejected docker-registry-1: pods for rc "docker-registry-1" took longer than 600 seconds to become ready

Asgoret commented 7 years ago

@sdodson Its three different nodes, like says in documentation /dev/sdb/ havent got any partitions,LVM\PVc. On this nodes also work docker, and set docker storage on /dev/sdc1.

jarrpa commented 7 years ago

@asgoret Please retry the deployment, make sure the pods exist, then do an oc describe po/<POD_NAME>. You should be getting back output if the pods still exist.

jarrpa commented 7 years ago

@Asgoret Also, I believe you'll need to provide glusterfs_ip for all your GlusterFS nodes in your inventory file. See here:

https://github.com/openshift/openshift-ansible/blob/master/inventory/byo/hosts.byo.glusterfs.external.example

Asgoret commented 7 years ago

@jarrpa State of installing:

TASK [openshift_hosted : Wait for registry pods] **************************************************************************************************************************************************************
FAILED - RETRYING: Wait for registry pods (60 retries left).
...
FAILED - RETRYING: Wait for registry pods (28 retries left).

Host: openshift-master Command: oc get all Result:

NAME                 REVISION   DESIRED   CURRENT   TRIGGERED BY
dc/docker-registry   1          3         3         config
dc/router            1          3         3         config

NAME                   DESIRED   CURRENT   READY     AGE
rc/docker-registry-1   3         3         0         4m
rc/router-1            3         3         3         4m

NAME                     HOST/PORT                                                  PATH      SERVICES          PORT      TERMINATION   WILDCARD
routes/docker-registry   docker-registry-default.router.default.svc.cluster.local             docker-registry   <all>     passthrough   None

NAME                               CLUSTER-IP      EXTERNAL-IP   PORT(S)                   AGE
svc/docker-registry                172.30.53.245   <none>        5000/TCP                  4m
svc/glusterfs-registry-endpoints   172.30.45.69    <none>        1/TCP                     5m
svc/kubernetes                     172.30.0.1      <none>        443/TCP,53/UDP,53/TCP     13m
svc/router                         172.30.28.13    <none>        80/TCP,443/TCP,1936/TCP   4m

NAME                          READY     STATUS              RESTARTS   AGE
po/docker-registry-1-ctxd4    0/1       ContainerCreating   0          3m
po/docker-registry-1-deploy   1/1       Running             0          4m
po/docker-registry-1-hqp86    0/1       ContainerCreating   0          3m
po/docker-registry-1-zcqdv    0/1       ContainerCreating   0          3m
po/router-1-90167             1/1       Running             0          3m
po/router-1-99v0w             1/1       Running             0          3m
po/router-1-rdwbl             1/1       Running             0          3m

Command:oc logs po/docker-registry-1-deploy Result:

--> Scaling docker-registry-1 to 3
--> Waiting up to 10m0s for pods in rc docker-registry-1 to become ready

Asgoret commented 7 years ago

@jarrpa

[root@openshift-master ~]# oc logs po/docker-registry-1-ctxd4
Error from server (BadRequest): container "registry" in pod "docker-registry-1-ctxd4" is waiting to start: ContainerCreating

[root@openshift-master ~]# oc logs po/docker-registry-1-hqp86  
Error from server (BadRequest): container "registry" in pod "docker-registry-1-hqp86" is waiting to start: ContainerCreating

[root@openshift-master ~]# oc logs po/docker-registry-1-zcqdv
Error from server (BadRequest): container "registry" in pod "docker-registry-1-zcqdv" is waiting to start: ContainerCreating

Asgoret commented 7 years ago

@jarrpa Command:oc describe po/docker-registry-1-3cwbs

Name:           docker-registry-1-3cwbs
Namespace:      default
Security Policy:    hostnetwork
Node:           openshift-gluster2/10.5.135.170
Start Time:     Tue, 05 Sep 2017 10:06:00 -0400
Labels:         deployment=docker-registry-1
            deploymentconfig=docker-registry
            docker-registry=default
Annotations:        kubernetes.io/created-by={"kind":"SerializedReference","apiVersion":"v1","reference":{"kind":"ReplicationController","namespace":"default","name":"docker-registry-1","uid":"29e567f0-8d9c-11e7-a569-005...
            openshift.io/deployment-config.latest-version=1
            openshift.io/deployment-config.name=docker-registry
            openshift.io/deployment.name=docker-registry-1
            openshift.io/scc=hostnetwork
Status:         Pending
IP:         
Controllers:        ReplicationController/docker-registry-1
Containers:
  registry:
    Container ID:   
    Image:      openshift/origin-docker-registry:v3.6.0
    Image ID:       
    Port:       5000/TCP
    State:      Waiting
      Reason:       ContainerCreating
    Ready:      False
    Restart Count:  0
    Requests:
      cpu:  100m
      memory:   256Mi
    Liveness:   http-get https://:5000/healthz delay=10s timeout=5s period=10s #success=1 #failure=3
    Readiness:  http-get https://:5000/healthz delay=0s timeout=5s period=10s #success=1 #failure=3
    Environment:
      REGISTRY_HTTP_ADDR:                   :5000
      REGISTRY_HTTP_NET:                    tcp
      REGISTRY_HTTP_SECRET:                 TirdSgP8crfsatEtfhKr88TpIg02rYIHkqREz7Qrzqo=
      REGISTRY_MIDDLEWARE_REPOSITORY_OPENSHIFT_ENFORCEQUOTA:    false
      OPENSHIFT_DEFAULT_REGISTRY:               docker-registry.default.svc:5000
      REGISTRY_HTTP_TLS_KEY:                    /etc/secrets/registry.key
      REGISTRY_HTTP_TLS_CERTIFICATE:                /etc/secrets/registry.crt
    Mounts:
      /etc/secrets from registry-certificates (rw)
      /registry from registry-storage (rw)
      /var/run/secrets/kubernetes.io/serviceaccount from registry-token-6615n (ro)
Conditions:
  Type      Status
  Initialized   True 
  Ready     False 
  PodScheduled  True 
Volumes:
  registry-storage:
    Type:   PersistentVolumeClaim (a reference to a PersistentVolumeClaim in the same namespace)
    ClaimName:  registry-claim
    ReadOnly:   false
  registry-certificates:
    Type:   Secret (a volume populated by a Secret)
    SecretName: registry-certificates
    Optional:   false
  registry-token-6615n:
    Type:   Secret (a volume populated by a Secret)
    SecretName: registry-token-6615n
    Optional:   false
QoS Class:  Burstable
Node-Selectors: region=infra
Tolerations:    <none>
Events:
  FirstSeen LastSeen    Count   From                SubObjectPath   Type        Reason      Message
  --------- --------    -----   ----                -------------   --------    ------      -------
  3m        3m      1   default-scheduler               Normal      Scheduled   Successfully assigned docker-registry-1-3cwbs to openshift-gluster2
  <invalid> <invalid>   1   kubelet, openshift-gluster2         Warning     FailedMount MountVolume.SetUp failed for volume "kubernetes.io/glusterfs/5296ac74-8d9c-11e7-a569-00505693371a-registry-volume" (spec.Name: "registry-volume") pod "5296ac74-8d9c-11e7-a569-00505693371a" (UID: "5296ac74-8d9c-11e7-a569-00505693371a") with: glusterfs: mount failed: mount failed: exit status 1
Mounting command: mount
Mounting arguments: 10.5.135.169:glusterfs-registry-volume /var/lib/origin/openshift.local.volumes/pods/5296ac74-8d9c-11e7-a569-00505693371a/volumes/kubernetes.io~glusterfs/registry-volume glusterfs [log-level=ERROR log-file=/var/lib/origin/openshift.local.volumes/plugins/kubernetes.io/glusterfs/registry-volume/docker-registry-1-3cwbs-glusterfs.log backup-volfile-servers=10.5.135.169:10.5.135.170:10.5.135.171]
Output: Mount failed. Please check the log file for more details.

 the following error information was pulled from the glusterfs log to help diagnose this issue: 
[2017-09-05 14:06:07.888235] E [glusterfsd-mgmt.c:1779:mgmt_getspec_cbk] 0-glusterfs: failed to get the 'volume file' from server
[2017-09-05 14:06:07.888266] E [glusterfsd-mgmt.c:1879:mgmt_getspec_cbk] 0-mgmt: failed to fetch volume file (key:glusterfs-registry-volume)

  <invalid> <invalid>   1   kubelet, openshift-gluster2     Warning FailedMount MountVolume.SetUp failed for volume "kubernetes.io/glusterfs/5296ac74-8d9c-11e7-a569-00505693371a-registry-volume" (spec.Name: "registry-volume") pod "5296ac74-8d9c-11e7-a569-00505693371a" (UID: "5296ac74-8d9c-11e7-a569-00505693371a") with: glusterfs: mount failed: mount failed: exit status 1
Mounting command: mount
Mounting arguments: 10.5.135.169:glusterfs-registry-volume /var/lib/origin/openshift.local.volumes/pods/5296ac74-8d9c-11e7-a569-00505693371a/volumes/kubernetes.io~glusterfs/registry-volume glusterfs [backup-volfile-servers=10.5.135.169:10.5.135.170:10.5.135.171 log-level=ERROR log-file=/var/lib/origin/openshift.local.volumes/plugins/kubernetes.io/glusterfs/registry-volume/docker-registry-1-3cwbs-glusterfs.log]
Output: Mount failed. Please check the log file for more details.

 the following error information was pulled from the glusterfs log to help diagnose this issue: 
[2017-09-05 14:06:14.638527] E [glusterfsd-mgmt.c:1779:mgmt_getspec_cbk] 0-glusterfs: failed to get the 'volume file' from server
[2017-09-05 14:06:14.638572] E [glusterfsd-mgmt.c:1879:mgmt_getspec_cbk] 0-mgmt: failed to fetch volume file (key:glusterfs-registry-volume)

  <invalid> <invalid>   1   kubelet, openshift-gluster2     Warning FailedMount MountVolume.SetUp failed for volume "kubernetes.io/glusterfs/5296ac74-8d9c-11e7-a569-00505693371a-registry-volume" (spec.Name: "registry-volume") pod "5296ac74-8d9c-11e7-a569-00505693371a" (UID: "5296ac74-8d9c-11e7-a569-00505693371a") with: glusterfs: mount failed: mount failed: exit status 1
Mounting command: mount
Mounting arguments: 10.5.135.169:glusterfs-registry-volume /var/lib/origin/openshift.local.volumes/pods/5296ac74-8d9c-11e7-a569-00505693371a/volumes/kubernetes.io~glusterfs/registry-volume glusterfs [log-level=ERROR log-file=/var/lib/origin/openshift.local.volumes/plugins/kubernetes.io/glusterfs/registry-volume/docker-registry-1-3cwbs-glusterfs.log backup-volfile-servers=10.5.135.169:10.5.135.170:10.5.135.171]
Output: Mount failed. Please check the log file for more details.

 the following error information was pulled from the glusterfs log to help diagnose this issue: 
[2017-09-05 14:06:21.911414] E [glusterfsd-mgmt.c:1779:mgmt_getspec_cbk] 0-glusterfs: failed to get the 'volume file' from server
[2017-09-05 14:06:21.911446] E [glusterfsd-mgmt.c:1879:mgmt_getspec_cbk] 0-mgmt: failed to fetch volume file (key:glusterfs-registry-volume)

  <invalid> <invalid>   1   kubelet, openshift-gluster2     Warning FailedMount MountVolume.SetUp failed for volume "kubernetes.io/glusterfs/5296ac74-8d9c-11e7-a569-00505693371a-registry-volume" (spec.Name: "registry-volume") pod "5296ac74-8d9c-11e7-a569-00505693371a" (UID: "5296ac74-8d9c-11e7-a569-00505693371a") with: glusterfs: mount failed: mount failed: exit status 1
Mounting command: mount
Mounting arguments: 10.5.135.169:glusterfs-registry-volume /var/lib/origin/openshift.local.volumes/pods/5296ac74-8d9c-11e7-a569-00505693371a/volumes/kubernetes.io~glusterfs/registry-volume glusterfs [log-file=/var/lib/origin/openshift.local.volumes/plugins/kubernetes.io/glusterfs/registry-volume/docker-registry-1-3cwbs-glusterfs.log backup-volfile-servers=10.5.135.169:10.5.135.170:10.5.135.171 log-level=ERROR]
Output: Mount failed. Please check the log file for more details.

 the following error information was pulled from the glusterfs log to help diagnose this issue: 
[2017-09-05 14:06:30.080504] E [glusterfsd-mgmt.c:1779:mgmt_getspec_cbk] 0-glusterfs: failed to get the 'volume file' from server
[2017-09-05 14:06:30.080542] E [glusterfsd-mgmt.c:1879:mgmt_getspec_cbk] 0-mgmt: failed to fetch volume file (key:glusterfs-registry-volume)

  <invalid> <invalid>   1   kubelet, openshift-gluster2     Warning FailedMount MountVolume.SetUp failed for volume "kubernetes.io/glusterfs/5296ac74-8d9c-11e7-a569-00505693371a-registry-volume" (spec.Name: "registry-volume") pod "5296ac74-8d9c-11e7-a569-00505693371a" (UID: "5296ac74-8d9c-11e7-a569-00505693371a") with: glusterfs: mount failed: mount failed: exit status 1
Mounting command: mount
Mounting arguments: 10.5.135.169:glusterfs-registry-volume /var/lib/origin/openshift.local.volumes/pods/5296ac74-8d9c-11e7-a569-00505693371a/volumes/kubernetes.io~glusterfs/registry-volume glusterfs [log-level=ERROR log-file=/var/lib/origin/openshift.local.volumes/plugins/kubernetes.io/glusterfs/registry-volume/docker-registry-1-3cwbs-glusterfs.log backup-volfile-servers=10.5.135.169:10.5.135.170:10.5.135.171]
Output: Mount failed. Please check the log file for more details.

 the following error information was pulled from the glusterfs log to help diagnose this issue: 
[2017-09-05 14:06:40.228812] E [glusterfsd-mgmt.c:1779:mgmt_getspec_cbk] 0-glusterfs: failed to get the 'volume file' from server
[2017-09-05 14:06:40.228849] E [glusterfsd-mgmt.c:1879:mgmt_getspec_cbk] 0-mgmt: failed to fetch volume file (key:glusterfs-registry-volume)

  <invalid> <invalid>   1   kubelet, openshift-gluster2     Warning FailedMount MountVolume.SetUp failed for volume "kubernetes.io/glusterfs/5296ac74-8d9c-11e7-a569-00505693371a-registry-volume" (spec.Name: "registry-volume") pod "5296ac74-8d9c-11e7-a569-00505693371a" (UID: "5296ac74-8d9c-11e7-a569-00505693371a") with: glusterfs: mount failed: mount failed: exit status 1
Mounting command: mount
Mounting arguments: 10.5.135.169:glusterfs-registry-volume /var/lib/origin/openshift.local.volumes/pods/5296ac74-8d9c-11e7-a569-00505693371a/volumes/kubernetes.io~glusterfs/registry-volume glusterfs [log-file=/var/lib/origin/openshift.local.volumes/plugins/kubernetes.io/glusterfs/registry-volume/docker-registry-1-3cwbs-glusterfs.log backup-volfile-servers=10.5.135.169:10.5.135.170:10.5.135.171 log-level=ERROR]
Output: Mount failed. Please check the log file for more details.

 the following error information was pulled from the glusterfs log to help diagnose this issue: 
[2017-09-05 14:06:55.392314] E [glusterfsd-mgmt.c:1779:mgmt_getspec_cbk] 0-glusterfs: failed to get the 'volume file' from server
[2017-09-05 14:06:55.392353] E [glusterfsd-mgmt.c:1879:mgmt_getspec_cbk] 0-mgmt: failed to fetch volume file (key:glusterfs-registry-volume)

  <invalid> <invalid>   1   kubelet, openshift-gluster2     Warning FailedMount MountVolume.SetUp failed for volume "kubernetes.io/glusterfs/5296ac74-8d9c-11e7-a569-00505693371a-registry-volume" (spec.Name: "registry-volume") pod "5296ac74-8d9c-11e7-a569-00505693371a" (UID: "5296ac74-8d9c-11e7-a569-00505693371a") with: glusterfs: mount failed: mount failed: exit status 1
Mounting command: mount
Mounting arguments: 10.5.135.169:glusterfs-registry-volume /var/lib/origin/openshift.local.volumes/pods/5296ac74-8d9c-11e7-a569-00505693371a/volumes/kubernetes.io~glusterfs/registry-volume glusterfs [log-level=ERROR log-file=/var/lib/origin/openshift.local.volumes/plugins/kubernetes.io/glusterfs/registry-volume/docker-registry-1-3cwbs-glusterfs.log backup-volfile-servers=10.5.135.169:10.5.135.170:10.5.135.171]
Output: Mount failed. Please check the log file for more details.

 the following error information was pulled from the glusterfs log to help diagnose this issue: 
[2017-09-05 14:07:17.523063] E [glusterfsd-mgmt.c:1779:mgmt_getspec_cbk] 0-glusterfs: failed to get the 'volume file' from server
[2017-09-05 14:07:17.523099] E [glusterfsd-mgmt.c:1879:mgmt_getspec_cbk] 0-mgmt: failed to fetch volume file (key:glusterfs-registry-volume)

  <invalid> <invalid>   1   kubelet, openshift-gluster2     Warning FailedMount MountVolume.SetUp failed for volume "kubernetes.io/glusterfs/5296ac74-8d9c-11e7-a569-00505693371a-registry-volume" (spec.Name: "registry-volume") pod "5296ac74-8d9c-11e7-a569-00505693371a" (UID: "5296ac74-8d9c-11e7-a569-00505693371a") with: glusterfs: mount failed: mount failed: exit status 1
Mounting command: mount
Mounting arguments: 10.5.135.169:glusterfs-registry-volume /var/lib/origin/openshift.local.volumes/pods/5296ac74-8d9c-11e7-a569-00505693371a/volumes/kubernetes.io~glusterfs/registry-volume glusterfs [backup-volfile-servers=10.5.135.169:10.5.135.170:10.5.135.171 log-level=ERROR log-file=/var/lib/origin/openshift.local.volumes/plugins/kubernetes.io/glusterfs/registry-volume/docker-registry-1-3cwbs-glusterfs.log]
Output: Mount failed. Please check the log file for more details.

 the following error information was pulled from the glusterfs log to help diagnose this issue: 
[2017-09-05 14:07:56.661477] E [glusterfsd-mgmt.c:1779:mgmt_getspec_cbk] 0-glusterfs: failed to get the 'volume file' from server
[2017-09-05 14:07:56.661512] E [glusterfsd-mgmt.c:1879:mgmt_getspec_cbk] 0-mgmt: failed to fetch volume file (key:glusterfs-registry-volume)

  <invalid> <invalid>   1   kubelet, openshift-gluster2     Warning FailedMount Unable to mount volumes for pod "docker-registry-1-3cwbs_default(5296ac74-8d9c-11e7-a569-00505693371a)": timeout expired waiting for volumes to attach/mount for pod "default"/"docker-registry-1-3cwbs". list of unattached/unmounted volumes=[registry-storage]
  <invalid> <invalid>   1   kubelet, openshift-gluster2     Warning FailedSync  Error syncing pod
  <invalid> <invalid>   1   kubelet, openshift-gluster2     Warning FailedMount (combined from similar events): MountVolume.SetUp failed for volume "kubernetes.io/glusterfs/5296ac74-8d9c-11e7-a569-00505693371a-registry-volume" (spec.Name: "registry-volume") pod "5296ac74-8d9c-11e7-a569-00505693371a" (UID: "5296ac74-8d9c-11e7-a569-00505693371a") with: glusterfs: mount failed: mount failed: exit status 1
Mounting command: mount
Mounting arguments: 10.5.135.169:glusterfs-registry-volume /var/lib/origin/openshift.local.volumes/pods/5296ac74-8d9c-11e7-a569-00505693371a/volumes/kubernetes.io~glusterfs/registry-volume glusterfs [backup-volfile-servers=10.5.135.169:10.5.135.170:10.5.135.171 log-level=ERROR log-file=/var/lib/origin/openshift.local.volumes/plugins/kubernetes.io/glusterfs/registry-volume/docker-registry-1-3cwbs-glusterfs.log]
Output: Mount failed. Please check the log file for more details.

 the following error information was pulled from the glusterfs log to help diagnose this issue: 
[2017-09-05 14:09:07.753529] E [glusterfsd-mgmt.c:1779:mgmt_getspec_cbk] 0-glusterfs: failed to get the 'volume file' from server
[2017-09-05 14:09:07.753564] E [glusterfsd-mgmt.c:1879:mgmt_getspec_cbk] 0-mgmt: failed to fetch volume file (key:glusterfs-registry-volume)

jarrpa commented 7 years ago

Hmm... looks like the registry volume doesn't exist. You're using external GlusterFS nodes... can you check your ansible output and see if the registry volume creation (in the GlusterFS section) succeeded?

Asgoret commented 7 years ago

@jarrpa What section or task i need find?

jarrpa commented 7 years ago

"Create GlusterFS registry volume"

jarrpa commented 7 years ago

Also check the output of "gluster volume status" from one of the GlusterFS nodes to see if the volume is there.

Asgoret commented 7 years ago

@jarrpa

TASK [openshift_storage_glusterfs : Create GlusterFS registry volume] 
changed: [openshift-master]

It`s strange, because Command: oc get storageclass

NAME                TYPE
glusterfs-storage   kubernetes.io/glusterfs

But on GlusterFS nodes Command: gluster volume status

No volumes present

Same on GlusterFS registry nodes.

Command: oc get pvc

NAME             STATUS    VOLUME            CAPACITY   ACCESSMODES   STORAGECLASS   AGE
registry-claim   Bound     registry-volume   5Gi        RWX                          9m

jarrpa commented 7 years ago

Well, the SC doesn't matter. The registry volume is statically provisioned. But the volume not being present explains why the registry pods won't start. Can you delete the registry dc, rerun the installer with "-vvv", and see what the output of the GlusterFS registry volume creation says?

Asgoret commented 7 years ago

@jarrpa Is there some requiments for gluster? I install gluster 3.10.

jarrpa commented 7 years ago

@Asgoret No, 3.10 should be fine.

Asgoret commented 7 years ago

@jarrpa

TASK [openshift_storage_glusterfs : Create GlusterFS registry volume] *****************************************************************************************************************************************
task path: /opt/env/openshift-ansible/roles/openshift_storage_glusterfs/tasks/glusterfs_registry.yml:79
Using module file /usr/lib/python2.7/site-packages/ansible/modules/commands/command.py
<openshift-master> ESTABLISH SSH CONNECTION FOR USER: root
<openshift-master> SSH: EXEC ssh -C -o ControlMaster=auto -o ControlPersist=60s -o KbdInteractiveAuthentication=no -o PreferredAuthentications=gssapi-with-mic,gssapi-keyex,hostbased,publickey -o PasswordAuthentication=no -o User=root -o ConnectTimeout=10 -o ControlPath=/root/.ansible/cp/a24ebae081 openshift-master '/bin/sh -c '"'"'echo ~ && sleep 0'"'"''
<openshift-master> (0, '/root\n', '')
<openshift-master> ESTABLISH SSH CONNECTION FOR USER: root
<openshift-master> SSH: EXEC ssh -C -o ControlMaster=auto -o ControlPersist=60s -o KbdInteractiveAuthentication=no -o PreferredAuthentications=gssapi-with-mic,gssapi-keyex,hostbased,publickey -o PasswordAuthentication=no -o User=root -o ConnectTimeout=10 -o ControlPath=/root/.ansible/cp/a24ebae081 openshift-master '/bin/sh -c '"'"'( umask 77 && mkdir -p "` echo /root/.ansible/tmp/ansible-tmp-1504114236.98-133705797052267 `" && echo ansible-tmp-1504114236.98-133705797052267="` echo /root/.ansible/tmp/ansible-tmp-1504114236.98-133705797052267 `" ) && sleep 0'"'"''
<openshift-master> (0, 'ansible-tmp-1504114236.98-133705797052267=/root/.ansible/tmp/ansible-tmp-1504114236.98-133705797052267\n', '')
<openshift-master> PUT /tmp/tmpOJakHF TO /root/.ansible/tmp/ansible-tmp-1504114236.98-133705797052267/command.py
<openshift-master> SSH: EXEC sftp -b - -C -o ControlMaster=auto -o ControlPersist=60s -o KbdInteractiveAuthentication=no -o PreferredAuthentications=gssapi-with-mic,gssapi-keyex,hostbased,publickey -o PasswordAuthentication=no -o User=root -o ConnectTimeout=10 -o ControlPath=/root/.ansible/cp/a24ebae081 '[openshift-master]'
<openshift-master> (0, 'sftp> put /tmp/tmpOJakHF /root/.ansible/tmp/ansible-tmp-1504114236.98-133705797052267/command.py\n', '')
<openshift-master> ESTABLISH SSH CONNECTION FOR USER: root
<openshift-master> SSH: EXEC ssh -C -o ControlMaster=auto -o ControlPersist=60s -o KbdInteractiveAuthentication=no -o PreferredAuthentications=gssapi-with-mic,gssapi-keyex,hostbased,publickey -o PasswordAuthentication=no -o User=root -o ConnectTimeout=10 -o ControlPath=/root/.ansible/cp/a24ebae081 openshift-master '/bin/sh -c '"'"'chmod u+x /root/.ansible/tmp/ansible-tmp-1504114236.98-133705797052267/ /root/.ansible/tmp/ansible-tmp-1504114236.98-133705797052267/command.py && sleep 0'"'"''
<openshift-master> (0, '', '')
<openshift-master> ESTABLISH SSH CONNECTION FOR USER: root
<openshift-master> SSH: EXEC ssh -C -o ControlMaster=auto -o ControlPersist=60s -o KbdInteractiveAuthentication=no -o PreferredAuthentications=gssapi-with-mic,gssapi-keyex,hostbased,publickey -o PasswordAuthentication=no -o User=root -o ConnectTimeout=10 -o ControlPath=/root/.ansible/cp/a24ebae081 -tt openshift-master '/bin/sh -c '"'"'/usr/bin/python /root/.ansible/tmp/ansible-tmp-1504114236.98-133705797052267/command.py; rm -rf "/root/.ansible/tmp/ansible-tmp-1504114236.98-133705797052267/" > /dev/null 2>&1 && sleep 0'"'"''
<openshift-master> (0, '\r\n{"changed": true, "end": "2017-08-30 11:46:29.884150", "stdout": "Name: glusterfs-registry-volume\\nSize: 5\\nVolume Id: ff0a40767b7acff097aca07016cb2250\\nCluster Id: 1be8332b8e0ca280f0aabbded41a5e1a\\nMount: 10.5.135.166:glusterfs-registry-volume\\nMount Options: backup-volfile-servers=10.5.135.167,10.5.135.168\\nDurability Type: replicate\\nDistributed+Replica: 3", "cmd": ["heketi-cli", "-s", "http://10.5.135.185:8080", "--user", "admin", "volume", "create", "--size=5", "--name=glusterfs-registry-volume"], "rc": 0, "start": "2017-08-30 11:46:28.799998", "stderr": "", "delta": "0:00:01.084152", "invocation": {"module_args": {"warn": true, "executable": null, "_uses_shell": false, "_raw_params": "heketi-cli -s http://10.5.135.185:8080 --user admin  volume create --size=5 --name=glusterfs-registry-volume", "removes": null, "creates": null, "chdir": null}}, "warnings": []}\r\n', 'Shared connection to openshift-master closed.\r\n')
changed: [openshift-master] => {
    "changed": true, 
    "cmd": [
        "heketi-cli", 
        "-s", 
        "http://10.5.135.185:8080", 
        "--user", 
        "admin", 
        "volume", 
        "create", 
        "--size=5", 
        "--name=glusterfs-registry-volume"
    ], 
    "delta": "0:00:01.084152", 
    "end": "2017-08-30 11:46:29.884150", 
    "invocation": {
        "module_args": {
            "_raw_params": "heketi-cli -s http://10.5.135.185:8080 --user admin  volume create --size=5 --name=glusterfs-registry-volume", 
            "_uses_shell": false, 
            "chdir": null, 
            "creates": null, 
            "executable": null, 
            "removes": null, 
            "warn": true
        }
    }, 
    "rc": 0, 
    "start": "2017-08-30 11:46:28.799998", 
    "stderr": "", 
    "stderr_lines": [], 
    "stdout": "Name: glusterfs-registry-volume\nSize: 5\nVolume Id: ff0a40767b7acff097aca07016cb2250\nCluster Id: 1be8332b8e0ca280f0aabbded41a5e1a\nMount: 10.5.135.166:glusterfs-registry-volume\nMount Options: backup-volfile-servers=10.5.135.167,10.5.135.168\nDurability Type: replicate\nDistributed+Replica: 3", 
    "stdout_lines": [
        "Name: glusterfs-registry-volume", 
        "Size: 5", 
        "Volume Id: ff0a40767b7acff097aca07016cb2250", 
        "Cluster Id: 1be8332b8e0ca280f0aabbded41a5e1a", 
        "Mount: 10.5.135.166:glusterfs-registry-volume", 
        "Mount Options: backup-volfile-servers=10.5.135.167,10.5.135.168", 
        "Durability Type: replicate", 
        "Distributed+Replica: 3"
    ]
}

Asgoret commented 7 years ago

@jarrpa Hmmm....i find some isseu.

    "stdout_lines": [
        "Name: glusterfs-registry-volume", 
...
        "Mount: 10.5.135.166:glusterfs-registry-volume", 
        "Mount Options: backup-volfile-servers=10.5.135.167,10.5.135.168", 
        ...

Issue in IP. It`s IP of gluster storage cluster, not gluster registry cluster.

jarrpa commented 7 years ago

Hmm... but the command succeeded. Is there still no volume created? Can you check the heketi logs oc logs <HEKETI_POD> to see if something went wrong?

Asgoret commented 7 years ago

@jarrpa No, volume in gluster registry isn`t create.

[root@openshift-gluster4 ~]# gluster volume list 
No volumes present in cluster

I will rerun instal and try "kubectl" command. Are there any additional instal-setting for heketi?

[root@openshift-master ~]# heketi-cli volume list
Server must be provided

jarrpa commented 7 years ago

@Asgoret No no, kubectl was a typo, I meants oc, sorry. :) I want to see the heketi logs, so do oc logs <HEKETI_POD>.

To use heketi-cli, you need to do heketi-cli -s http://<HEKETI_ROUTE> --user admin --secret <HEKETI_SECRET>. HEKETI_ROUTE is the router address fo rheketi, which I think you can find with something like oc get route heketi-storage-route. HEKETI_SECRET is the heketi admin secret stored in heketi-storage-secret under key. You want to take the value of key and do echo "<KEY>" | base64 -d - to get the decoded secret.

Asgoret commented 7 years ago

@jarrpa Hmmm...I`m not sure i did all right

[root@openshift-master ~]# heketi-cli volume list
Id:ff0a40767b7acff097aca07016cb2250    Cluster:1be8332b8e0ca280f0aabbded41a5e1a    Name:glusterfs-registry-volume

[root@openshift-master ~]# oc get route heketi-storage-route
Error from server (NotFound): routes.route.openshift.io "heketi-storage-route" not found

About cluster:

[root@openshift-master ~]# heketi-cli cluster list
Clusters:
1be8332b8e0ca280f0aabbded41a5e1a
c8d5f1613269e0656b3e930381bc39b3

[root@openshift-master ~]# heketi-cli node list
Id:3d3d2d8af0bea5366d1884af3d19b961 Cluster:1be8332b8e0ca280f0aabbded41a5e1a
Id:99625227e3b0b47a288f9dae43b70b9e Cluster:1be8332b8e0ca280f0aabbded41a5e1a
Id:9a7eb6d5e6927531113503c3d8ff0d10 Cluster:1be8332b8e0ca280f0aabbded41a5e1a
Id:29152eda31cbba6d032ebf9e9d8dde70 Cluster:c8d5f1613269e0656b3e930381bc39b3
Id:406342159bf74cdb89c4723334ea308f Cluster:c8d5f1613269e0656b3e930381bc39b3
Id:b432ce4433dd5210bf9191b8adfebe94 Cluster:c8d5f1613269e0656b3e930381bc39b3

[root@openshift-master ~]# heketi-cli device info
Error: Device id missing

[root@openshift-master ~]# oc describe svc heketi-storage
Error from server (NotFound): services "heketi-storage" not found

[root@openshift-master ~]# oc describe route heketi-storage
Error from server (NotFound): routes.route.openshift.io "heketi-storage" not found

I think its wrong or dont complete heketi configure...Service create cluster, get ID for nodes but don`t know how to get to device on this nodes... Are there any additional instal-setting for heketi? I use only install command to install heketi\heketi-client\heketi-templates from epel repository.

jarrpa commented 7 years ago

Are you not using openshift-ansible to deploy heketi?

How are you running heketi-cli without specifying the server or admin secret?

jarrpa commented 7 years ago

...OH WAIT. I forgot you are using an external GlusterFS cluster and heketi!! Sorry about that!

Okay, let's step back to here.

It looks like you have one heketi instance for both clusters, is that right? Can you do heketi cluster info <ID> for each of your two clusters?

Can you paste your latest inventory file so I can see what the IPs look like?

Asgoret commented 7 years ago

@jarrpa Yes, you right (one heketi master).

[root@openshift-master ~]# heketi-cli cluster list
Clusters:
1be8332b8e0ca280f0aabbded41a5e1a
c8d5f1613269e0656b3e930381bc39b3

[root@openshift-master ~]# heketi-cli cluster info 1be8332b8e0ca280f0aabbded41a5e1a
Cluster id: 1be8332b8e0ca280f0aabbded41a5e1a
Nodes:
3d3d2d8af0bea5366d1884af3d19b961
99625227e3b0b47a288f9dae43b70b9e
9a7eb6d5e6927531113503c3d8ff0d10
Volumes:
ff0a40767b7acff097aca07016cb2250

[root@openshift-master ~]# heketi-cli cluster info c8d5f1613269e0656b3e930381bc39b3
Cluster id: c8d5f1613269e0656b3e930381bc39b3
Nodes:
29152eda31cbba6d032ebf9e9d8dde70
406342159bf74cdb89c4723334ea308f
b432ce4433dd5210bf9191b8adfebe94
Volumes:

From this information i can propose, that first ID is storage cluster and second if registry.

And inventory:

[OSEv3:children]
masters
nodes
etcd
glusterfs_registry
glusterfs

[OSEv3:vars]
ansible_ssh_user=root
openshift_deployment_type=origin
containerized=false
osm_use_cockpit=true
openshift_hosted_registry_storage_kind=glusterfs
openshift_storage_glusterfs_is_native=False
openshift_storage_glusterfs_heketi_url=10.5.135.185

[masters]
openshift-master

[etcd]
openshift-master

[nodes]
openshift-master openshift_schedulable=False
openshift-node1 openshift_node_labels="{'region': 'primary', 'zone': 'firstzone'}"
openshift-node2 openshift_node_labels="{'region': 'primary', 'zone': 'secondzone'}"
openshift-gluster1 openshift_schedulable=True openshift_node_labels="{'region': 'infra'}"
openshift-gluster2 openshift_schedulable=True openshift_node_labels="{'region': 'infra'}"
openshift-gluster3 openshift_schedulable=True openshift_node_labels="{'region': 'infra'}"

[glusterfs_registry]
openshift-gluster1 glusterfs_ip=10.5.135.171 glusterfs_devices='["/dev/sdb"]'
openshift-gluster2 glusterfs_ip=10.5.135.170 glusterfs_devices='["/dev/sdb"]'
openshift-gluster3 glusterfs_ip=10.5.135.169 glusterfs_devices='["/dev/sdb"]'

[glusterfs]
openshift-gluster4 glusterfs_ip=10.5.135.168 glusterfs_devices='["/dev/sdb"]'
openshift-gluster5 glusterfs_ip=10.5.135.167 glusterfs_devices='["/dev/sdb"]'
openshift-gluster6 glusterfs_ip=10.5.135.166 glusterfs_devices='["/dev/sdb"]'

Heketi installed on master node. It`s installed like service via yum.

jarrpa commented 7 years ago

@Asgoret Okay. Try deleting the existing volume, run the following:

heketi-cli -s http://10.5.135.185:8080 --user admin  volume create --size=5 --name=glusterfs-registry-volume --clusters=1be8332b8e0ca280f0aabbded41a5e1a

Then re-run the openshift-ansible installer.

Asgoret commented 7 years ago

@jarrpa I apologize for the long reaction

[root@openshift-master ~]# heketi-cli -s http://10.5.135.185:8080 --user admin  volume create --size=5 --name=glusterfs-registry-volume --clusters=1be8332b8e0ca280f0aabbded41a5e1a
Error: Name glusterfs-registry-volume is already in use in all available clusters

[root@openshift-master ~]# heketi-cli volume list
Id:ff0a40767b7acff097aca07016cb2250    Cluster:1be8332b8e0ca280f0aabbded41a5e1a    Name:glusterfs-registry-volume

Can this be because install process trying to deploy docker-registry on node, where i haven`t got docker? I mean in my inventory

openshift_storage_glusterfs_is_native=False

So when i runing installing process ansinble trying to deploy docker-registry on storage nodes, not on registry nodes.

Asgoret commented 7 years ago

@jarrpa Ok, i revert VM, and re-run. This is describe of docker-registry which created ansible install:

[root@openshift-master ~]# heketi-cli volume info 74c6b49fae321a50e2197533a6f1090e
Name: glusterfs-registry-volume
Size: 5
Volume Id: 74c6b49fae321a50e2197533a6f1090e
Cluster Id: ab6868d937b8684d484f131be828ed57
Mount: 10.5.135.169:glusterfs-registry-volume
Mount Options: backup-volfile-servers=10.5.135.170,10.5.135.171
Durability Type: replicate
Distributed+Replica: 3

This describe of docker-regisrty which i create:

[root@openshift-master ~]# heketi-cli -s http://10.5.135.185:8080 --user admin  volume create --size=5 --name=glusterfs-registry-volume --clusters=ab6868d937b8684d484f131be828ed57
Name: glusterfs-registry-volume
Size: 5
Volume Id: df5744a151cd8a0eac3e9c7e7e586ab0
Cluster Id: ab6868d937b8684d484f131be828ed57
Mount: 10.5.135.169:glusterfs-registry-volume
Mount Options: backup-volfile-servers=10.5.135.170,10.5.135.171
Durability Type: replicate
Distributed+Replica: 3

I must re-run install or use retry command from log? So command will look so:

ansible-playbook -vvv -i --limit @/opt/env/openshift-ansible/playbooks/byo/config.retry ./inventory /opt/env/openshift-ansible/playbooks/byo/config.yml

Correct?

jarrpa commented 7 years ago

I think I messed up slightly, sorry. Our goal is to manually create the volume on the cluster that corresponds to the glusterfs-registry cluster. So run heketi-cli cluster info to determine which cluster has the IP addresses of the glusterfs-registry nodes, then run the volume create comand using the clusterID of that cluster. THEN rerun the playbook. Your command should be right.

Asgoret commented 7 years ago

@jarrpa

ansible-playbook -vvv -i  ./inventory /opt/env/openshift-ansible/playbooks/byo/config.yml --limit @/opt/env/openshift-ansible/playbooks/byo/config.retry

In file "output.txt" contents output log. output.txt

[root@openshift-master ~]# oc get all
NAME                 REVISION   DESIRED   CURRENT   TRIGGERED BY
dc/docker-registry   1          3         0         config
dc/router            1          3         3         config

NAME                   DESIRED   CURRENT   READY     AGE
rc/docker-registry-1   0         0         0         3d
rc/router-1            3         3         3         3d

NAME                     HOST/PORT                                                  PATH      SERVICES          PORT      TERMINATION   WILDCARD
routes/docker-registry   docker-registry-default.router.default.svc.cluster.local             docker-registry   <all>     passthrough   None

NAME                               CLUSTER-IP       EXTERNAL-IP   PORT(S)                   AGE
svc/docker-registry                172.30.84.177    <none>        5000/TCP                  3d
svc/glusterfs-registry-endpoints   172.30.233.182   <none>        1/TCP                     3d
svc/kubernetes                     172.30.0.1       <none>        443/TCP,53/UDP,53/TCP     3d
svc/router                         172.30.138.78    <none>        80/TCP,443/TCP,1936/TCP   3d

NAME                          READY     STATUS    RESTARTS   AGE
po/docker-registry-1-deploy   0/1       Error     0          3d
po/router-1-2ldhg             1/1       Running   0          3d
po/router-1-5t797             1/1       Running   0          3d
po/router-1-wj066             1/1       Running   0          3d

jarrpa commented 7 years ago

Try it again without the limit.

jarrpa commented 7 years ago

@Asgoret Also run the following commands before re-trying the playbooks:

oc delete dc docker-registry
oc delete rc docker-registry-1
oc delete po docker-registry-1-deploy

Asgoret commented 7 years ago

@jarrpa Before install

[root@openshift-master ~]# oc delete dc docker-registry
deploymentconfig "docker-registry" deleted
[root@openshift-master ~]# oc delete rc docker-registry-1
Error from server (NotFound): replicationcontrollers "docker-registry-1" not found
[root@openshift-master ~]# oc delete po docker-registry-1-deploy
Error from server (NotFound): pods "docker-registry-1-deploy" not found
[root@openshift-master ~]# oc get all
NAME        REVISION   DESIRED   CURRENT   TRIGGERED BY
dc/router   1          1         1         config

NAME          DESIRED   CURRENT   READY     AGE
rc/router-1   1         1         1         3h

NAME                     HOST/PORT                                                  PATH      SERVICES          PORT      TERMINATION   WILDCARD
routes/docker-registry   docker-registry-default.router.default.svc.cluster.local             docker-registry   <all>     passthrough   None

NAME                               CLUSTER-IP       EXTERNAL-IP   PORT(S)                   AGE
svc/docker-registry                172.30.24.240    <none>        5000/TCP                  3h
svc/glusterfs-registry-endpoints   172.30.253.160   <none>        1/TCP                     3h
svc/kubernetes                     172.30.0.1       <none>        443/TCP,53/UDP,53/TCP     3h
svc/router                         172.30.162.149   <none>        80/TCP,443/TCP,1936/TCP   3h

NAME                READY     STATUS    RESTARTS   AGE
po/router-1-v801h   1/1       Running   0          3h

Its doesnt work :(

[root@openshift-master ~]# oc get all
NAME                 REVISION   DESIRED   CURRENT   TRIGGERED BY
dc/docker-registry   1          1         0         config
dc/router            2          1         1         config

NAME                   DESIRED   CURRENT   READY     AGE
rc/docker-registry-1   0         0         0         17m
rc/router-1            0         0         0         3h
rc/router-2            1         1         1         18m

NAME                     HOST/PORT                                                  PATH      SERVICES          PORT      TERMINATION   WILDCARD
routes/docker-registry   docker-registry-default.router.default.svc.cluster.local             docker-registry   <all>     passthrough   None

NAME                               CLUSTER-IP       EXTERNAL-IP   PORT(S)                   AGE
svc/docker-registry                172.30.24.240    <none>        5000/TCP                  3h
svc/glusterfs-registry-endpoints   172.30.27.136    <none>        1/TCP                     19m
svc/kubernetes                     172.30.0.1       <none>        443/TCP,53/UDP,53/TCP     3h
svc/router                         172.30.162.149   <none>        80/TCP,443/TCP,1936/TCP   3h

NAME                          READY     STATUS    RESTARTS   AGE
po/docker-registry-1-deploy   0/1       Error     0          17m
po/router-2-v2lz9             1/1       Running   0          18m

Log from output:

TASK [openshift_storage_glusterfs : Create GlusterFS registry volume] *************************************************************************
task path: /opt/env/openshift-ansible/roles/openshift_storage_glusterfs/tasks/glusterfs_registry.yml:79
skipping: [openshift-master] => {
    "changed": false, 
    "skip_reason": "Conditional result was False", 
    "skipped": true
}

jarrpa commented 7 years ago

@Asgoret Yes, it should be skipped since you manually created the volume. What's the status of the PV and PVC for the registry volume?

Asgoret commented 7 years ago

@jarrpa It`s wrong deploy for "docker-registry".

[root@openshift-master ~]# heketi-cli topology info
Cluster Id: 20302085243253aa6554cd1a644d4c66

    Volumes:

    Name: glusterfs-registry-volume
....
 Nodes:

    Node Id: 10d6908d10a18f8d7ff1551e09117f01
    Management Hostname: openshift-gluster5
    Storage Hostname: 10.5.135.167

    Node Id: 179392a3fd5cccc27655e35a513fbaf4
    Management Hostname: openshift-gluster4
    Storage Hostname: 10.5.135.168

    Node Id: 8d7f9ace55758e75c67a6f007163f7a3
    Management Hostname: openshift-gluster6
    Storage Hostname: 10.5.135.166

BUT, openshift-gluster4-6 is persistent storage, not docker-registry-storage.

So, i delete docker-registry-storage from wrong cluster, re-create it on right cluster and re-run install.

Describe of docker-registry-deploy:

[root@openshift-master ~]# oc describe po/docker-registry-1-rq3j5 
Name:           docker-registry-1-rq3j5
Namespace:      default
Security Policy:    hostnetwork
Node:           openshift-gluster1/10.5.135.171
Start Time:     Tue, 05 Sep 2017 14:15:45 -0400
Labels:         deployment=docker-registry-1
            deploymentconfig=docker-registry
            docker-registry=default
Annotations:        kubernetes.io/created-by={"kind":"SerializedReference","apiVersion":"v1","reference":{"kind":"ReplicationController","namespace":"default","name":"docker-registry-1","uid":"7ccef581-8dc1-11e7-869f-005...
            openshift.io/deployment-config.latest-version=1
            openshift.io/deployment-config.name=docker-registry
            openshift.io/deployment.name=docker-registry-1
            openshift.io/scc=hostnetwork
Status:         Pending
IP:         
Controllers:        ReplicationController/docker-registry-1
Containers:
  registry:
    Container ID:   
    Image:      openshift/origin-docker-registry:v3.6.0
    Image ID:       
    Port:       5000/TCP
    State:      Waiting
      Reason:       ContainerCreating
    Ready:      False
    Restart Count:  0
    Requests:
      cpu:  100m
      memory:   256Mi
    Liveness:   http-get https://:5000/healthz delay=10s timeout=5s period=10s #success=1 #failure=3
    Readiness:  http-get https://:5000/healthz delay=0s timeout=5s period=10s #success=1 #failure=3
    Environment:
      REGISTRY_HTTP_ADDR:                   :5000
      REGISTRY_HTTP_NET:                    tcp
      REGISTRY_HTTP_SECRET:                 jF28jXYKEUU92N/nS1E3sEm4hCiK/m5Buz6mvVG+rrI=
      REGISTRY_MIDDLEWARE_REPOSITORY_OPENSHIFT_ENFORCEQUOTA:    false
      OPENSHIFT_DEFAULT_REGISTRY:               docker-registry.default.svc:5000
      REGISTRY_HTTP_TLS_KEY:                    /etc/secrets/registry.key
      REGISTRY_HTTP_TLS_CERTIFICATE:                /etc/secrets/registry.crt
    Mounts:
      /etc/secrets from registry-certificates (rw)
      /registry from registry-storage (rw)
      /var/run/secrets/kubernetes.io/serviceaccount from registry-token-lf14z (ro)
Conditions:
  Type      Status
  Initialized   True 
  Ready     False 
  PodScheduled  True 
Volumes:
  registry-storage:
    Type:   PersistentVolumeClaim (a reference to a PersistentVolumeClaim in the same namespace)
    ClaimName:  registry-claim
    ReadOnly:   false
  registry-certificates:
    Type:   Secret (a volume populated by a Secret)
    SecretName: registry-certificates
    Optional:   false
  registry-token-lf14z:
    Type:   Secret (a volume populated by a Secret)
    SecretName: registry-token-lf14z
    Optional:   false
QoS Class:  Burstable
Node-Selectors: region=infra
Tolerations:    <none>
Events:
  FirstSeen LastSeen    Count   From                SubObjectPath   Type        Reason      Message
  --------- --------    -----   ----                -------------   --------    ------      -------
  1m        1m      1   default-scheduler               Normal      Scheduled   Successfully assigned docker-registry-1-rq3j5 to openshift-gluster1
  <invalid> <invalid>   1   kubelet, openshift-gluster1         Warning     FailedMount MountVolume.SetUp failed for volume "kubernetes.io/glusterfs/7f1dd57e-8dc1-11e7-869f-00505693371a-registry-volume" (spec.Name: "registry-volume") pod "7f1dd57e-8dc1-11e7-869f-00505693371a" (UID: "7f1dd57e-8dc1-11e7-869f-00505693371a") with: glusterfs: mount failed: mount failed: exit status 1
Mounting command: mount
Mounting arguments: 10.5.135.169:glusterfs-registry-volume /var/lib/origin/openshift.local.volumes/pods/7f1dd57e-8dc1-11e7-869f-00505693371a/volumes/kubernetes.io~glusterfs/registry-volume glusterfs [log-level=ERROR log-file=/var/lib/origin/openshift.local.volumes/plugins/kubernetes.io/glusterfs/registry-volume/docker-registry-1-rq3j5-glusterfs.log backup-volfile-servers=10.5.135.169:10.5.135.170:10.5.135.171]
Output: Mount failed. Please check the log file for more details.

 the following error information was pulled from the glusterfs log to help diagnose this issue: 
[2017-09-05 18:15:45.994781] E [glusterfsd-mgmt.c:1779:mgmt_getspec_cbk] 0-glusterfs: failed to get the 'volume file' from server
[2017-09-05 18:15:45.994850] E [glusterfsd-mgmt.c:1879:mgmt_getspec_cbk] 0-mgmt: failed to fetch volume file (key:glusterfs-registry-volume)
....

Hmmm...maybe i wrong configure glusterfs-registry cluster?

Asgoret commented 7 years ago

@jarrpa In your example docker-storage is native for node0-2. In my configuration persistent storage and docker-registry is external. First gluster-cluster is glusterfs persistent storage. Second gluster-cluster is glusterfs-registry.

Can i configure external docker-registry and external persistent storage on different glusterfs clusters?

jarrpa commented 7 years ago

@Asgoret You're starting to lose me a bit, please be more consistent with your terminology. :)

First, make sure you created the volume on the cluster that had the node IP addresses of the glusterfs_registry cluster. Second, make sure the glusterfs-registry-endpoints Endpoints have those same IP addresses listed.

Asgoret commented 7 years ago

@jarrpa Ok) All is correct.

[root@openshift-master ~]# heketi-cli topology info
Cluster Id: 7c4dd1c18b6e9a357404bf86e5c442a5
    Volumes:

    Name: glusterfs-registry-volume
    Size: 5
    Id: 880a60fa56f80b05be3c63316d7ec21f
    Cluster Id: 7c4dd1c18b6e9a357404bf86e5c442a5
    Mount: 10.5.135.169:glusterfs-registry-volume
    Mount Options: backup-volfile-servers=10.5.135.170,10.5.135.171
    Durability Type: replicate
    Replica: 3
    Snapshot: Disabled
Nodes:
    Node Id: 5a5905a448514c28d3ae4bc200b823df
    State: online
    Management Hostname: openshift-gluster2
    Storage Hostname: 10.5.135.170

    Node Id: 9add049d9f0258a019479503c15756e3
    State: online
    Management Hostname: openshift-gluster1
    Storage Hostname: 10.5.135.171

    Node Id: d1aad8533a6f40653d7a222e3b7ee691
    State: online
    Management Hostname: openshift-gluster3
    Storage Hostname: 10.5.135.169

[root@openshift-master ~]# oc get endpoints
NAME                           ENDPOINTS                                               AGE
docker-registry                <none>                                                  5h
glusterfs-registry-endpoints   10.5.135.169:1,10.5.135.170:1,10.5.135.171:1            53m
kubernetes                     10.5.135.185:8443,10.5.135.185:8053,10.5.135.185:8053   5h
router                         10.5.135.171:443,10.5.135.171:1936,10.5.135.171:80      5h

jarrpa commented 7 years ago

@Asgoret And you have a PVC bound to the PV of the GlusterFS volume?

Asgoret commented 7 years ago

@jarrpa

I little confused...on what node i need to saw this information? And what command you mean?

jarrpa commented 7 years ago

@Asgoret For a GlusterFS volume to be mounted by a pod, normally it needs to be represented by a PersistentVolume which is then bound to a PersistentVolumeClaim. it's that PVC that the registry pods are trying to mount. So there should be some entry with the work glusterfs in the output of both oc get pv and oc get pvc --all-namespaces. These should have been automatically created by the openshift-ansible installer. If you can find them, please post the oc describe output for both resources. If not, we've hit our problem. :)

Asgoret commented 7 years ago

@jarrpa

[root@openshift-master ~]# oc get pv
NAME              CAPACITY   ACCESSMODES   RECLAIMPOLICY   STATUS    CLAIM                    STORAGECLASS   REASON    AGE
registry-volume   5Gi        RWX           Retain          Bound     default/registry-claim                            6h
[root@openshift-master ~]# oc get pvc --all-namespaces
NAMESPACE   NAME             STATUS    VOLUME            CAPACITY   ACCESSMODES   STORAGECLASS   AGE
default     registry-claim   Bound     registry-volume   5Gi        RWX                          6h

And describes:

[root@openshift-master ~]# oc describe pv
Name:       registry-volume
Labels:     <none>
Annotations:    pv.kubernetes.io/bound-by-controller=yes
StorageClass:   
Status:     Bound
Claim:      default/registry-claim
Reclaim Policy: Retain
Access Modes:   RWX
Capacity:   5Gi
Message:    
Source:
    Type:       Glusterfs (a Glusterfs mount on the host that shares a pod's lifetime)
    EndpointsName:  glusterfs-registry-endpoints
    Path:       glusterfs-registry-volume
    ReadOnly:       false
Events:         <none>

[root@openshift-master ~]# oc describe pvc
Name:       registry-claim
Namespace:  default
StorageClass:   
Status:     Bound
Volume:     registry-volume
Labels:     <none>
Annotations:    pv.kubernetes.io/bind-completed=yes
        pv.kubernetes.io/bound-by-controller=yes
Capacity:   5Gi
Access Modes:   RWX
Events:     <none>

Asgoret commented 7 years ago

@jarrpa You are an amazing person, thank you for your patience!

jarrpa commented 7 years ago

@Asgoret No problem! My apologies that I haven't been able to help sort this out faster. :)

Hmm... try running mount -t glusterfs -o log-level=DEBUG,backup-volfile-servers=10.5.135.169:10.5.135.170:10.5.135.171 10.5.135.169:glusterfs-registry-volume /mnt on one of the nodes designated for the registry pods. if it fails, check the log files in /var/log/glusterfs for any signs of trying to mount that volume and see if there's anything useful (we're looking for error statements).

Asgoret commented 7 years ago

@jarrpa It's okay, I understand that my architecture is non-standard.

[root@openshift-gluster1 ~]# mount -t glusterfs -o log-level=DEBUG,backup-volfile-servers=10.5.135.170:10.5.135.171 10.5.135.169:glusterfs-registry-volume /mnt
Mount failed. Please check the log file for more details.

From log /var/log/glusterfs/mnt.log:

...
[2017-09-06 13:48:43.552866] E [glusterfsd-mgmt.c:1779:mgmt_getspec_cbk] 0-glusterfs: failed to get the 'volume file' from server
[2017-09-06 13:48:43.552884] E [glusterfsd-mgmt.c:1879:mgmt_getspec_cbk] 0-mgmt: failed to fetch volume file (key:glusterfs-registry-volume)
...

Full log: mnt.txt

jarrpa commented 7 years ago

@Asgoret What's the output of gluster volume info on 10.5.135.169?

Asgoret commented 7 years ago

@jarrpa

[root@openshift-gluster3 ~]# gluster volume info
No volumes present

From another nodes in registry docker cluster:

[root@openshift-gluster1 ~]# gluster volume info
No volumes present

[root@openshift-gluster2 ~]# gluster volume info
No volumes present

openshift / openshift-ansible