Open lightmans2 opened 3 years ago
What stands out to me here is "Unable to access the configuration object" which probably means your iscsi gateway cant access the RADOS object which stores the IGW state configuration. Do you have the admin keyring in /etc/ceph/ceph.client.admin.keyring ?
Hi,
you can see here that i can do lspools on the broken gateway and i checked already
root@cd88-ceph-rgw-01:/etc/ceph# cat ceph.client.admin.keyring
[client.admin]
key = AQAm/xxxxxxxxxxxxxxxxxxxxxxxxxxxxx== :-)
caps mds = "allow *"
caps mgr = "allow *"
caps mon = "allow *"
caps osd = "allow *"
root@cd88-ceph-rgw-01:/etc/ceph# rados lspools
device_health_metrics
rbd
.rgw.root
default.rgw.log
default.rgw.control
default.rgw.meta
hot-storage
cold-storage
test
and here i checked for lock file for the gateway.conf... but all looks good.
root@cd88-ceph-rgw-01:~# rados -p rbd lock list gateway.conf
{"objname":"gateway.conf","locks":[{"name":"lock"}]}root@cd88-ceph-rgw-01:~# rados -p rbd lock info gateway.conf lock
{"name":"lock","type":"exclusive","tag":"","lockers":[]}root@cd88-ceph-rgw-01:~#
is the rbd-target-api daemon running?
i see you have it on a non-default port 5001, is that open between your iscsi gateways
Hi,
i already tried several times to reset the failed service counter and restarted the rbd-target-api service. But after 3 times.. it crashes... i can see in the journalctl logs that it loads the gateway.conf and he has a problem with the initiator chap passwords...
i see you have it on a non-default port 5001, is that open between your iscsi gateways yes there are no firewall like ufw or firewalld running and the networkswitches are open without any firewall or proxy or similar..
Jul 29 22:15:47 cd88-ceph-rgw-01 rbd-target-api[91804]: Could not load gateway: Unable to decode password for iqn.1998-01.com.vmware:cd-133-1-4-4a715184. CHAP error: Problems decoding the encrypted password
Jul 29 22:15:47 cd88-ceph-rgw-01 rbd-target-api[91804]: Encryption/decryption failed.
● rbd-target-api.service - Ceph iscsi target configuration API
Loaded: loaded (/lib/systemd/system/rbd-target-api.service; enabled; vendor preset: enabled)
Active: failed (Result: exit-code) since Thu 2021-07-29 22:15:47 CEST; 39min ago
Process: 91804 ExecStart=/usr/bin/rbd-target-api (code=exited, status=16)
Main PID: 91804 (code=exited, status=16)
Jul 29 22:15:47 cd88-ceph-rgw-01 systemd[1]: rbd-target-api.service: Scheduled restart job, restart counter is at 3.
Jul 29 22:15:47 cd88-ceph-rgw-01 systemd[1]: Stopped Ceph iscsi target configuration API.
Jul 29 22:15:47 cd88-ceph-rgw-01 systemd[1]: rbd-target-api.service: Start request repeated too quickly.
Jul 29 22:15:47 cd88-ceph-rgw-01 systemd[1]: rbd-target-api.service: Failed with result 'exit-code'.
Jul 29 22:15:47 cd88-ceph-rgw-01 systemd[1]: Failed to start Ceph iscsi target configuration API.
what does /var/log/rbd-target-api/rbd-target-api.log say ? I wouldnt expect much of anything with gwcli to work if rbd-target-api isnt running.
what does /var/log/rbd-target-api/rbd-target-api.log say ? I wouldnt expect much of anything with gwcli to work if rbd-target-api isnt running.
2021-07-29 22:15:47,389 INFO [target.py:395:load_config()] - No available network portal for target with iqn of 'iqn.2003-01.com.ceph.iscsi-gw'
2021-07-29 22:15:47,390 INFO [target.py:404:load_config()] - (Gateway.load_config) successfully loaded existing target definition
2021-07-29 22:15:47,631 INFO [gateway.py:359:delete()] - Active Ceph iSCSI gateway configuration removed
2021-07-29 22:15:47,631 CRITICAL [rbd-target-api:2792:halt()] - Could not load gateway: Unable to decode password for iqn.1998-01.com.vmware:cd-133-1-4-4a715184. CHAP error: Problems decoding the encrypted password
root@cd88-ceph-rgw-01:~#
please add debug = true to iscsi-gateway.cfg as well
please add debug = true to iscsi-gateway.cfg as well
[config]
# Name of the Ceph storage cluster. A suitable Ceph configuration file allowing
# access to the Ceph storage cluster from the gateway node is required, if not
# colocated on an OSD node.
cluster_name = ceph
# Place a copy of the ceph cluster's admin keyring in the gateway's /etc/ceph
# drectory and reference the filename here
gateway_keyring = ceph.client.admin.keyring
pool = rbd
debug = true
# API settings.
# The API supports a number of options that allow you to tailor it to your
# local environment. If you want to run the API under https, you will need to
# create cert/key files that are compatible for each iSCSI gateway node, that is
# not locked to a specific node. SSL cert and key files *must* be called
# 'iscsi-gateway.crt' and 'iscsi-gateway.key' and placed in the '/etc/ceph/' directory
# on *each* gateway node. With the SSL files in place, you can use 'api_secure = true'
# to switch to https mode.
# To support the API, the bare minimum settings are:
api_secure = false
# Additional API configuration options are as follows, defaults shown.
api_user = admin
api_password = admin
api_port = 5001
trusted_ip_list = 10.50.50.20,10.50.50.21
/var/log/rbd-target-api/rbd-target-api.log 2021-07-29 23:06:24,956 INFO [target.py:395:load_config()] - No available network portal for target with iqn of 'iqn.2003-01.com.ceph.iscsi-gw' 2021-07-29 23:06:24,956 INFO [target.py:404:load_config()] - (Gateway.load_config) successfully loaded existing target definition 2021-07-29 23:06:25,205 INFO [gateway.py:359:delete()] - Active Ceph iSCSI gateway configuration removed 2021-07-29 23:06:25,206 CRITICAL [rbd-target-api:2792:halt()] - Could not load gateway: Unable to decode password for iqn.1998-01.com.vmware:cd-133-1-4-4a715184. CHAP error: Problems decoding the encrypted password
journalctl log Jul 29 23:06:24 cd88-ceph-rgw-01 rbd-target-api[102165]: (Gateway.load_config) successfully loaded existing target definition Jul 29 23:06:24 cd88-ceph-rgw-01 rbd-target-api[102165]: No available network portal for target with iqn of 'iqn.2003-01.com.ceph.iscsi-gw' Jul 29 23:06:24 cd88-ceph-rgw-01 rbd-target-api[102165]: Removing iSCSI target from LIO Jul 29 23:06:24 cd88-ceph-rgw-01 rbd-target-api[102165]: Could not load gateway: Unable to decode password for iqn.1998-01.com.vmware:cd-133-1-4-4a7> Jul 29 23:06:24 cd88-ceph-rgw-01 rbd-target-api[102165]: Could not define clients: Unable to decode password for iqn.1998-01.com.vmware:cd-133-1-4-4> Jul 29 23:06:24 cd88-ceph-rgw-01 rbd-target-api[102165]: iqn.2003-01.com.ceph.iscsi-gw - Processing client configuration Jul 29 23:06:24 cd88-ceph-rgw-01 rbd-target-api[102165]: Setup group ao for rbd.disk_4 on tpg 1 (state 0, owner True, failover type 1) Jul 29 23:06:24 cd88-ceph-rgw-01 rbd-target-api[102165]: (LUN.add_dev_to_lio) Successfully added rbd/disk_4 to LIO
Jul 29 23:06:22 cd88-ceph-rgw-01 systemd[1]: Stopped Ceph iscsi target configuration API.
Jul 29 23:06:22 cd88-ceph-rgw-01 systemd[1]: rbd-target-api.service: Scheduled restart job, restart counter is at 2.
Jul 29 23:06:22 cd88-ceph-rgw-01 systemd[1]: rbd-target-api.service: Failed with result 'exit-code'.
Jul 29 23:06:22 cd88-ceph-rgw-01 systemd[1]: rbd-target-api.service: Main process exited, code=exited, status=16/n/a
Jul 29 23:06:22 cd88-ceph-rgw-01 rbd-target-api[101564]: Encryption/decryption failed.
Jul 29 23:06:22 cd88-ceph-rgw-01 rbd-target-api[101564]: Could not load gateway: Unable to decode password for iqn.1998-01.com.vmware:cd-133-1-4-4a7>
Jul 29 23:06:22 cd88-ceph-rgw-01 rbd-target-api[101564]: Active Ceph iSCSI gateway configuration removed
FYI
i already removed booth gateways and configuration and added again the problem occurs after adding the third or fourth iscsi initiator for the esxi hosts with username and password for chap... its very strange...
i already did that 2 times ... two days ago was the other gw host cd133-ceph-rgw-01 giving me that error or problem
Wondering if the json of the rados config object is somehow malformed, could you post its contents?
can you give me the command to download it ? :-)
root@cd88-ceph-rgw-01:~# rados -p rbd get gateway.conf /tmp/gateway.conf
root@cd88-ceph-rgw-01:~# cat /tmp/gateway.conf
{
"created": "2021/07/27 14:07:02",
"discovery_auth": {
"mutual_password": "",
"mutual_password_encryption_enabled": false,
"mutual_username": "",
"password": "",
"password_encryption_enabled": false,
"username": ""
},
"disks": {
"rbd/disk_1": {
"allocating_host": "cd133-ceph-rgw-01",
"backstore": "user:rbd",
"backstore_object_name": "rbd.disk_1",
"controls": {
"max_data_area_mb": 128
},
"created": "2021/07/28 23:01:28",
"image": "disk_1",
"owner": "cd133-ceph-rgw-01",
"pool": "rbd",
"pool_id": 2,
"updated": "2021/07/28 23:10:58",
"wwn": "6beb2cdd-6d5b-4121-803a-c6c29933f67f"
},
"rbd/disk_2": {
"allocating_host": "cd133-ceph-rgw-01",
"backstore": "user:rbd",
"backstore_object_name": "rbd.disk_2",
"controls": {
"max_data_area_mb": 128
},
"created": "2021/07/28 23:01:30",
"image": "disk_2",
"owner": "cd133-ceph-rgw-01",
"pool": "rbd",
"pool_id": 2,
"updated": "2021/07/29 11:00:27",
"wwn": "9974ec18-c5a7-4908-9533-a73690582bd3"
},
"rbd/disk_3": {
"allocating_host": "cd133-ceph-rgw-01",
"backstore": "user:rbd",
"backstore_object_name": "rbd.disk_3",
"controls": {
"max_data_area_mb": 128
},
"created": "2021/07/28 23:01:31",
"image": "disk_3",
"owner": "cd133-ceph-rgw-01",
"pool": "rbd",
"pool_id": 2,
"updated": "2021/07/28 23:11:00",
"wwn": "4e5465cd-5c2f-485f-a3ad-731efc4091fd"
},
"rbd/disk_4": {
"allocating_host": "cd133-ceph-rgw-01",
"backstore": "user:rbd",
"backstore_object_name": "rbd.disk_4",
"controls": {
"max_data_area_mb": 128
},
"created": "2021/07/28 23:01:32",
"image": "disk_4",
"owner": "cd133-ceph-rgw-01",
"pool": "rbd",
"pool_id": 2,
"updated": "2021/07/29 11:00:27",
"wwn": "5aaf02b0-db8b-4092-8d90-5229ecf33fd5"
}
},
"epoch": 275,
"gateways": {
"cd133-ceph-rgw-01": {
"active_luns": 4,
"created": "2021/07/28 23:00:25",
"updated": "2021/07/29 11:00:27"
},
"cd88-ceph-rgw-01": {
"active_luns": 0,
"created": "2021/07/29 11:02:42",
"updated": "2021/07/29 11:02:42"
}
},
"targets": {
"iqn.2003-01.com.ceph.iscsi-gw": {
"acl_enabled": true,
"auth": {
"mutual_password": "",
"mutual_password_encryption_enabled": true,
"mutual_username": "",
"password": "",
"password_encryption_enabled": true,
"username": ""
},
"clients": {
"iqn.1998-01.com.vmware:cd-133-1-4-4a715184": {
"auth": {
"mutual_password": "",
"mutual_password_encryption_enabled": false,
"mutual_username": "",
"password": "MASi3i6jXkYlnuqlqs7jwByKjNLJk03tUsVk5d7+cEiL3OzqiFK5kSKp9CwmzntPQQXWfXEWEfvR6RZLMFJ6s/1uJ/c8PhYXQRMhT39ocPTeNcBaGEzVu76VXwFlhLL/MPhl6BqzlevxajWHyonySL9I+1U25VJH0EFru82jBeM/d8/gPAm69Czc/ixNFrKcwTmY47sGy/qORZh3VsZrWY2ifZ37k/q9kH5T3BJR/jZX/vPR1bqT+fommlQtZ2dFO/dVKpJOz6PqTWlhbP9PjtxzPsssRtXKpGL1tJS8PS035+wf1LVTXabyhqwatmWs/vCO9pvMpnzEAY7i3lY7cA==",
"password_encryption_enabled": true,
"username": "administrator"
},
"group_name": "",
"luns": {
"rbd/disk_1": {
"lun_id": 0
},
"rbd/disk_2": {
"lun_id": 1
},
"rbd/disk_3": {
"lun_id": 2
},
"rbd/disk_4": {
"lun_id": 3
}
}
},
"iqn.1998-01.com.vmware:esx-088-2-0-53595db0": {
"auth": {
"mutual_password": "",
"mutual_password_encryption_enabled": false,
"mutual_username": "",
"password": "FGSDKsWLA2PPNniSw1aAoWUZs4RCOs1sS5f5VgZ4DyQ97LF0SiyhvkFyNOvsTURNO6rJjMg1tmkT9laVPfRwdt/O2yAwDHqQIU1wb7n58YYekUHt6hC3Gdwz0NkogQuezR8tEiAqLUcwhUxpppyg+fKzXxzEIammWQQof69DPnjMte/pBFz04KomhZqNFyO4btXL6mEE/PkvjuZcBESGbZSHc87tEqYeuoHlAo2xx5P7HItOCOWgsAm2t+CfbJM+hk0IX/IMChN0NxOWqF8SsqFUtMw/kCZV6YAuzDXpM4LoWamAoavfO7zCMy2bjWLZZrkZ+7UbdkBTX+dDo2ZrBw==",
"password_encryption_enabled": true,
"username": "administrator"
},
"group_name": "",
"luns": {
"rbd/disk_1": {
"lun_id": 0
},
"rbd/disk_2": {
"lun_id": 1
},
"rbd/disk_3": {
"lun_id": 2
},
"rbd/disk_4": {
"lun_id": 3
}
}
},
"iqn.1998-01.com.vmware:esx-088-3-3-74094904": {
"auth": {
"mutual_password": "",
"mutual_password_encryption_enabled": false,
"mutual_username": "",
"password": "RjOXKDEatgT7ZXDfYWOF3pAPg/OYhjnGlYnxBEU7EvVbm/oH5cshXejwlfMHGqQPgAOAuw/No4ahgKz14Br4S6ioGsqgT9KIpqk9a5W6wKT0M/YCcKFouJQEnPMUMXkFjsh9SbKrx1dArzEqS5nOmaaYq3UCRJ0ZX9G6CTkitBb6A40yY+eICh8xaoWjU0okycxOZpBvcHpIpFcmVViOaHPkd2GRZoj0gpZmJJzOFUw5bxUdwalhYwaard4h/blCdiHcqO1cKaIJLANzzILuYId+27NGuUNThF7oeTW3wVe1zYakNBnqHd554E7GfZoEHdysZXIDbSczhwRAIz1xog==",
"password_encryption_enabled": true,
"username": "administrator"
},
"group_name": "",
"luns": {
"rbd/disk_1": {
"lun_id": 0
},
"rbd/disk_2": {
"lun_id": 1
},
"rbd/disk_3": {
"lun_id": 2
},
"rbd/disk_4": {
"lun_id": 3
}
}
},
"iqn.1998-01.com.vmware:esx-133-1-3-708cbbe8": {
"auth": {
"mutual_password": "",
"mutual_password_encryption_enabled": false,
"mutual_username": "",
"password": "ezjkk7Q2mWMXY1HnTsOjETg8VnqJR42jCpcxOidp466kKrJCQeONJl5ePqvpXtuIuB/OeSQbwwxV1itS7IB2tf9JgWjBgcvV/azFglbLjvwRaQW5CsxKt8lEGl45fF6/7zvuVuUWYW4n61tuHh143JTiPK6rDLbsRN+RA5AYcxnOhpnT7R43Rv54EDwL1MLej9HTYYF/7Z3dH8mUIw03L65c2ApCOX6oXp8ymhKbYQ8IhgwZhXUgwUyRt62BNxaq9uSF2kSB/CFVw99/I8PgMlYGhSqK3yWOyuRAD5HsJyPbxqqKtqsO0y5apVbOCbcvoSnJT/ui4F8G4rk9uHt2sw==",
"password_encryption_enabled": true,
"username": "administrator"
},
"group_name": "",
"luns": {
"rbd/disk_1": {
"lun_id": 0
},
"rbd/disk_2": {
"lun_id": 1
},
"rbd/disk_3": {
"lun_id": 2
},
"rbd/disk_4": {
"lun_id": 3
}
}
},
"iqn.1998-01.com.vmware:esx-133-1-5-40a79d79": {
"auth": {
"mutual_password": "",
"mutual_password_encryption_enabled": false,
"mutual_username": "",
"password": "T1nVzKyS1AtE/Pixfy1pEjm511t47CRpWDcHLlxKxMpoOl6NPAa6vuqA3b6B65B/NdsjKfmv3NBX0obIPAaFSyfXrRenmBvzL3fz1CM/rsZgn9xRnvWn8T47T4W30H8PZ3QLlk7tdiAbjYJPh0m+aTN6UsNi2Mi3LP+6XGjaolMD38m4uDEWt8yYryIfrMGNEWSoWCwJBH8OEWgQ8vlsr9Z5VVgM2ZSptqhKOwTqVYqzr4XZ1AxeJ1FUy32WFLH6sCHrVzbhsqEqrtI2te/nQ2eZOh8kwTdLVseraMlpuu2RjmMnQskmf2VMe+M5ZLn2/HY+fanXmiw4PmvPlAg9qA==",
"password_encryption_enabled": true,
"username": "administrator"
},
"group_name": "",
"luns": {
"rbd/disk_1": {
"lun_id": 0
},
"rbd/disk_2": {
"lun_id": 1
},
"rbd/disk_3": {
"lun_id": 2
},
"rbd/disk_4": {
"lun_id": 3
}
}
},
"iqn.1998-01.com.vmware:esx-133-1-6-3da3dcdf": {
"auth": {
"mutual_password": "",
"mutual_password_encryption_enabled": false,
"mutual_username": "",
"password": "vTkd5nD4XS6gBeXk96EtUOL67nDJWM6JqqiShwE0U6+C3zrkWZVODm7gcgBUAiQt8Pz9rroT9ucRmj2o+JgY6B91TuL5fMDGizJcUYI81djsgEoBTHlACfy7Fdb98XE6BEnfq16iVMZCFxNMQjSE+8qvI6NQPQZpFNsN+mO2tGlSpZqf5/eGY4hs2OWhkYMNtowt01UInvATLte4sImDX/tnfHSwRlaqYHcJJ8//ySaNGLrhG9i2fWTfdb+S2s4TYou9m2rLf6uQV57rEXQcbntrhEulTi0xL4/Qix73UPvpHI+iVhrTHnbZ1iEUEtkTzDxb/pMsaTSFIlu2y8mS7A==",
"password_encryption_enabled": true,
"username": "administrator"
},
"group_name": "",
"luns": {
"rbd/disk_1": {
"lun_id": 0
},
"rbd/disk_2": {
"lun_id": 1
},
"rbd/disk_3": {
"lun_id": 2
},
"rbd/disk_4": {
"lun_id": 3
}
}
}
},
"controls": {},
"created": "2021/07/28 22:59:37",
"disks": {
"rbd/disk_1": {
"lun_id": 0
},
"rbd/disk_2": {
"lun_id": 1
},
"rbd/disk_3": {
"lun_id": 2
},
"rbd/disk_4": {
"lun_id": 3
}
},
"groups": {},
"ip_list": [
"10.50.50.31",
"10.50.50.30"
],
"portals": {
"cd133-ceph-rgw-01": {
"gateway_ip_list": [
"10.50.50.31",
"10.50.50.30"
],
"inactive_portal_ips": [
"10.50.50.30"
],
"portal_ip_addresses": [
"10.50.50.31"
],
"tpgs": 2
},
"cd88-ceph-rgw-01": {
"gateway_ip_list": [
"10.50.50.31",
"10.50.50.30"
],
"inactive_portal_ips": [
"10.50.50.31"
],
"portal_ip_addresses": [
"10.50.50.30"
],
"tpgs": 2
}
},
"updated": "2021/07/29 11:02:42"
}
},
"updated": "2021/07/29 11:02:42",
"version": 11
rados --pool rbd get gateway.conf /tmp/gateway.conf.original
rados --pool rbd get gateway.conf /tmp/gateway.conf.original
yep thx :-) i found already
maybe there is a bug in the script and maybe its a special char in the generated password of intiator iqn.1998-01.com.vmware:cd-133-1-4-4a715184 ?
is the password encrypted with openssl?
"password": "vTkd5nD4XS6gBeXk96EtUOL67nDJWM6JqqiShwE0U6+C3zrkWZVODm7gcgBUAiQt8Pz9rroT9ucRmj2o+JgY6B91TuL5fMDGizJcUYI81djsgEoBTHlACfy7Fdb98XE6BEnfq16iVMZCFxNMQjSE+8qvI6NQPQZpFNsN+mO2tGlSpZqf5/eGY4hs2OWhkYMNtowt01UInvATLte4sImDX/tnfHSwRlaqYHcJJ8//ySaNGLrhG9i2fWTfdb+S2s4TYou9m2rLf6uQV57rEXQcbntrhEulTi0xL4/Qix73UPvpHI+iVhrTHnbZ1iEUEtkTzDxb/pMsaTSFIlu2y8mS7A==",
FYI
the username and the password are all the same on all intiators...
# auth username=administrator password=1q2w3e4r5t6z7u
any ideas or sugestions? i think we have here a bug or something similar?
Hello together
i have a strange problem on one of my two gateways. on the second gateway the service rbd-target-api does not start because there are a problem with the password encryption/decryption.
i already removed the node from the gwcli and rebootet the machine and reregistred it in gwcli but the problem still exists
some idea how to fix that or what is the reason?
thx in advance
here some infos:
OS: Ubuntu 20.04.2 ceph-iscsi 3.5
root@cd88-ceph-rgw-01:~# uname -r 5.4.0-80-generic