[root@xnode-02 ~]# lvmlockctl -i
VG cluster-vg lock_type=sanlock ekIrXF-E90O-jbNf-N0PJ-OkSj-5c38-yECI0Q
LS sanlock lvm_cluster-vg
LK VG un ver 59
LK LV sh 8esgI4-76Kl-lKmz-FPJb-liJI-bMeh-Yty1Ln
LK LV sh psXVNp-amo6-cYrr-pb7p-S3Ba-5cnf-5J7DhP
LK LV ex Fl2jpM-swLS-vRSy-B7Hw-kpbP-mzFk-mLPBdS
LK LV sh ydSYeV-wefa-JrYq-8Mi1-IshD-Y1Eb-WsqPC6
LK LV ex JpMLPd-Oilm-Y2rP-wivl-8FyE-RXK7-E784zN
LK LV ex veg4qE-FSyf-Szw0-EpUb-62NA-ne9d-O4QFjt
LK LV sh jniCrd-P0zL-4E28-lAQP-5FwY-87F0-kpnuDw
LK LV sh 4QAyY2-plcW-fUrW-Gqd9-gR0H-CzbN-RmU5CD
LK LV ex vkGvTS-89DI-5pH3-3TnL-sUSJ-kzXy-7qsbhq
LK LV sh mUWlmK-20li-DlAf-YL5b-9uJ5-1PmY-cQMP6m
LK LV ex rUlZiV-OdpP-2wFY-I1gD-fj4Q-SXs0-7xfWho
LK LV sh 1odInM-XD6d-6sfi-I3eZ-U85s-KED9-SzE67L
LK LV ex TS6DNR-ZsxO-fr2K-CMW1-7cD3-93iI-vG9TOp
LK LV ex VViiYK-7Psg-1wNz-jPuQ-ItqF-L1Uy-7TI8HX
LK LV sh N8yKxY-WdMD-kf0u-yxLN-F5RC-5xs3-hIgkxP
LK LV sh LwQY13-sVp4-xHHc-QMvN-zBGS-u6Z6-Fg74Ol
LK LV ex SY0Zdu-P88U-81ho-BS9m-gMt6-28AD-YwzK2j
LK LV sh aYzkuW-Bszv-Gf8X-kvCY-JSdK-T2KE-uP4tPI
LK LV ex nhH18Z-9nWt-gw8a-iEjB-mvJW-3PTu-gbFhkT
LK LV ex K1QARr-YN1w-zCrv-RBel-auMC-U2cD-wv081z
LK LV sh lsp8Bw-uzbi-zwQY-5pzD-3o2I-Tg7N-fEW6ja
LK LV sh 3k4Lw0-BTKZ-AUz9-0eF5-mEDZ-qil4-UHEqUy
LK LV ex gjr6LA-ziJs-ENTV-fVxC-PMpj-zOaU-UzINjA
LK LV sh QwHtCp-BpFm-tpMC-usyv-5Tue-Fmg4-wcwKOt
LK LV ex B7pbLE-yy2p-mKbR-ccrN-qMRV-eyVA-rgzmEm
LK LV ex B4olL3-XYy3-cFHt-eN9O-E49o-KIVd-4X2no5
LK LV ex 6UtzRd-kD7Y-7kEv-Tzdt-5ZUz-d2WF-tfwjOw
[root@xnode-02 ~]# lvm lvscan
Skipping global lock: lockspace not found or started
ACTIVE '/dev/cluster-vg/imagecache_c5771408-5ad6-4e70-8e91-b92add4de4e3' [50.01 GiB] inherit
ACTIVE '/dev/cluster-vg/imagecache_d6c5b885-5408-436e-8b50-545334a8ec91' [<3.12 GiB] inherit
ACTIVE '/dev/cluster-vg/b562c479-f800-4008-8734-12aa4b218c40' [50.01 GiB] inherit
ACTIVE '/dev/cluster-vg/imagecache_db54b618-2714-444c-869a-56086b1575bf' [50.01 GiB] inherit
ACTIVE '/dev/cluster-vg/8d00c69c-1aa4-4f39-807d-1b07304bce31' [50.01 GiB] inherit
ACTIVE '/dev/cluster-vg/snap_3cb5f5e1-52a2-4403-8764-3e6f5cd9af41' [50.01 GiB] inherit
ACTIVE '/dev/cluster-vg/9cfc1cdb-52e8-46bf-8592-92d43d146bb3' [50.01 GiB] inherit
ACTIVE '/dev/cluster-vg/imagecache_708a4e5d-2fe4-4291-86e8-d8de9b4f5297' [4.68 GiB] inherit
ACTIVE '/dev/cluster-vg/a5d79858-5b68-4730-8732-48f37055bcae' [50.01 GiB] inherit
ACTIVE '/dev/cluster-vg/imagecache_79c6ab74-f588-4c83-84b4-cc34ac70f8a3' [50.01 GiB] inherit
ACTIVE '/dev/cluster-vg/1dae556a-0fa1-4e7b-8a05-1f4f3026dca3' [50.01 GiB] inherit
ACTIVE '/dev/cluster-vg/c9904b74-a729-41ae-8d41-dd5c3af410cf' [50.01 GiB] inherit
ACTIVE '/dev/cluster-vg/3aa501a2-ddd4-47cb-8aa1-606db1144d25' [50.01 GiB] inherit
ACTIVE '/dev/cluster-vg/2d485819-9516-4ab6-88bb-d00aec21da2e' [50.01 GiB] inherit
ACTIVE '/dev/cluster-vg/bdc7474b-1ea7-4e0e-881d-795a91b7af87' [30.01 GiB] inherit
ACTIVE '/dev/cluster-vg/imagecache_e843ada2-e25b-463a-8acc-496008605045' [30.01 GiB] inherit
ACTIVE '/dev/cluster-vg/a2341ad5-6d65-4477-8e1b-7dec51065e5d' [30.01 GiB] inherit
ACTIVE '/dev/cluster-vg/08443518-068f-47f5-842c-afaafb101620' [50.01 GiB] inherit
ACTIVE '/dev/cluster-vg/snap_0fcb5ed1-97e0-4457-8d0a-a77d6300df43' [50.01 GiB] inherit
ACTIVE '/dev/cluster-vg/snap_f356aa94-0de6-4ff8-85cc-e7970bea0933' [100.01 GiB] inherit
ACTIVE '/dev/cluster-vg/imagecache_abf62112-bd76-4a5a-8e6f-eb49ab1f5d28' [4.22 GiB] inherit
ACTIVE '/dev/cluster-vg/imagecache_a832cfbe-308c-411b-89dd-f3442299e752' [5.25 GiB] inherit
ACTIVE '/dev/cluster-vg/190064fa-58be-4584-84e9-23dd8fef08ca' [40.01 GiB] inherit
ACTIVE '/dev/cluster-vg/627ba72c-b56b-42a5-8c6d-d7c48e0e4104' [100.01 GiB] inherit
ACTIVE '/dev/cluster-vg/32a641de-ca49-4100-86d2-049f75cbca33' [100.01 GiB] inherit
ACTIVE '/dev/cluster-vg/e76bf36b-fee8-43e3-8874-a7c55c9894da' [50.01 GiB] inherit
ACTIVE '/dev/cluster-vg/a4de438e-7b90-476e-8313-f83ff9abe735' [100.01 GiB] inherit
ACTIVE '/dev/openeuler_xnode-03/swap' [8.00 GiB] inherit
ACTIVE '/dev/openeuler_xnode-03/home' [300.00 GiB] inherit
ACTIVE '/dev/openeuler_xnode-03/root' [580.00 GiB] inherit
问题描述/What happened:
根据文档-Shared LVM Storage With Lvmlockd创建了一个4节点的机器用于测试,机器编号为 A、B、C、D:
所有节点均按照文档修改了配置(4 台机器 host_id 不同)
新建了若干虚拟机,较为均衡的分布运行在4台宿主机上。
宿主机物理重启后,先启动的机器(假设为B)会获取所有 lvs 并设置为锁定状态,导致其他宿主机无法获取lvs锁定,虚机启动失败。
查看 B 机器锁状态,发现shared的磁盘都被挂载了,且为 ex 排他锁。
通过 lsblk 查看各机器磁盘挂载
机器B
其他机器(A、C、D)因无法获取虚机的磁盘,只挂载了镜像缓存
其他 A、C、D 机器启动后,通过
kubectl logs -f default-host-w2nvl -n onecloud
指令查看,均报类似以下错误,导致虚拟机无法启动。环境/Environment:
OS (e.g.
cat /etc/os-release
): NAME="openEuler" VERSION="22.03 (LTS-SP3)" ID="openEuler" VERSION_ID="22.03" PRETTY_NAME="openEuler 22.03 (LTS-SP3)" ANSI_COLOR="0;31"Kernel (e.g.
uname -a
): Linux xnode-01.icc.local 5.10.0-224.0.0.127.oe2203sp3.x86_64 #1 SMP Wed Aug 21 14:51:23 CST 2024 x86_64 x86_64 x86_64 GNU/LinuxHost: (e.g.
dmidecode | egrep -i 'manufacturer|product' |sort -u
) [root@xnode-01 ~]# dmidecode | egrep -i 'manufacturer|product' |sort -u Manufacturer: Intel(R) Corporation Manufacturer: Samsung Manufacturer: XFUSION Memory Subsystem Controller Manufacturer ID: Unknown Memory Subsystem Controller Product ID: Unknown Module Manufacturer ID: Bank 1, Hex 0xCE Module Product ID: Unknown Product Name: 2288H V6 Product Name: BC13MBSBHService Version (e.g.
kubectl exec -n onecloud $(kubectl get pods -n onecloud | grep climc | awk '{print $1}') -- climc version-list
):