shakenfist / shakenfist

Old man shakes fist at cloud
Apache License 2.0
32 stars 12 forks source link

Race creating a file in the image cache? #1311

Open mikalstill opened 2 years ago

mikalstill commented 2 years ago

This looks like a real bug to me, from a recent CI run:

shakenfist_ci.tests.test_cloudinit.TestCloudInit.test_cloudinit_no_tracebacks
-----------------------------------------------------------------------------

Captured traceback:
~~~~~~~~~~~~~~~~~~~
    Traceback (most recent call last):

      File "/home/debian/shakenfist/deploy/shakenfist_ci/tests/test_cloudinit.py", line 93, in test_cloudinit_no_tracebacks
    self._await_instance_ready(inst['uuid'])

      File "/home/debian/shakenfist/deploy/shakenfist_ci/base.py", line 127, in _await_instance_ready
    return self._await_instance_event(

      File "/home/debian/shakenfist/deploy/shakenfist_ci/base.py", line 144, in _await_instance_event
    raise StartException(

    shakenfist_ci.base.StartException: Instance 94490fd0-0e2c-4aba-bc9f-acc81dd51656 failed to start (marked as error state, {'uuid': '94490fd0-0e2c-4aba-bc9f-acc81dd51656', 'cpus': 2, 'disk_spec': [{'base': 'sf://upload/system/ubuntu-2004', 'blob_uuid': '330e7f5f-9109-4876-b73f-35b517d0294b', 'size': 8, 'type': 'disk'}], 'memory': 2048, 'name': 'notracebacks', 'namespace': 'ci-cloudinit-lnedhqhj', 'ssh_key': None, 'state': 'error', 'user_data': None, 'video': {'memory': 16384, 'model': 'cirrus'}, 'uefi': False, 'configdrive': 'openstack-disk', 'nvram_template': None, 'secure_boot': False, 'machine_type': 'pc', 'side_channels': ['sf-agent'], 'version': 7, 'error_message': 'Failed queue task: Unexpected error while running command.\nCommand: ionice -c 2 -n 7 qemu-img convert -t none -o cluster_size=2048K -O qcow2 /srv/shakenfist/blobs/330e7f5f-9109-4876-b73f-35b517d0294b /srv/shakenfist/image_cache/330e7f5f-9109-4876-b73f-35b517d0294b.qcow2\nExit code: 1\nStdout: \'\'\nStderr: \'qemu-img: /srv/shakenfist/image_cache/330e7f5f-9109-4876-b73f-35b517d0294b.qcow2: error while converting qcow2: Failed to get shared "resize" lock\\nIs another process using the image [/srv/shakenfist/image_cache/330e7f5f-9109-4876-b73f-35b517d0294b.qcow2]?\\n\'', 'console_port': None, 'node': 'sf-5', 'power_state': 'initial', 'vdi_port': None, 'interfaces': [{'uuid': 'a5159cb1-6243-49d0-bc2a-81011fdc46f1', 'network_uuid': 'b5bc2c90-3644-4963-8364-97c36f238891', 'instance_uuid': '94490fd0-0e2c-4aba-bc9f-acc81dd51656', 'macaddr': '02:00:00:26:ac:fd', 'ipv4': '192.168.242.144', 'order': 0, 'model': 'virtio', 'state': 'deleted', 'version': 2, 'floating': None}, {'uuid': '6aca99bb-2f2c-456c-8ae2-4fb6c691558f', 'network_uuid': '4573a82c-fd6e-4ca4-b0e9-bd54b6086eb1', 'instance_uuid': '94490fd0-0e2c-4aba-bc9f-acc81dd51656', 'macaddr': '02:00:00:73:22:aa', 'ipv4': None, 'order': 1, 'model': 'virtio', 'state': 'deleted', 'version': 2, 'floating': None}], 'disks': []})
mikalstill commented 2 years ago

Similarly:

shakenfist_ci.tests.test_boot.TestBoot.test_boot_no_network
-----------------------------------------------------------

Captured traceback:
~~~~~~~~~~~~~~~~~~~
    Traceback (most recent call last):

      File "/home/debian/shakenfist/deploy/shakenfist_ci/tests/test_boot.py", line 33, in test_boot_no_network
    self._await_instance_ready(inst['uuid'])

      File "/home/debian/shakenfist/deploy/shakenfist_ci/base.py", line 127, in _await_instance_ready
    return self._await_instance_event(

      File "/home/debian/shakenfist/deploy/shakenfist_ci/base.py", line 144, in _await_instance_event
    raise StartException(

    shakenfist_ci.base.StartException: Instance b5c7e005-380d-48d9-acbd-9199bd30c70b failed to start (marked as error state, {'uuid': 'b5c7e005-380d-48d9-acbd-9199bd30c70b', 'cpus': 1, 'disk_spec': [{'base': 'sf://upload/system/debian-11', 'blob_uuid': '37c76155-8519-4c41-8457-132e45d23b1a', 'size': 8, 'type': 'disk'}], 'memory': 1024, 'name': 'test-boot-no-network', 'namespace': 'ci-cirros-vkjtfjli', 'ssh_key': None, 'state': 'error', 'user_data': None, 'video': {'memory': 16384, 'model': 'cirrus'}, 'uefi': False, 'configdrive': 'openstack-disk', 'nvram_template': None, 'secure_boot': False, 'machine_type': 'pc', 'side_channels': ['sf-agent'], 'version': 7, 'error_message': 'Failed queue task: Unexpected error while running command.\nCommand: ionice -c 2 -n 7 qemu-img convert -t none -o cluster_size=2048K -O qcow2 /srv/shakenfist/blobs/37c76155-8519-4c41-8457-132e45d23b1a /srv/shakenfist/image_cache/37c76155-8519-4c41-8457-132e45d23b1a.qcow2\nExit code: 1\nStdout: \'\'\nStderr: \'qemu-img: /srv/shakenfist/image_cache/37c76155-8519-4c41-8457-132e45d23b1a.qcow2: error while converting qcow2: Failed to get "write" lock\\nIs another process using the image [/srv/shakenfist/image_cache/37c76155-8519-4c41-8457-132e45d23b1a.qcow2]?\\n\'', 'console_port': None, 'node': 'sf-4', 'power_state': 'initial', 'vdi_port': None, 'interfaces': [], 'disks': []})