red-hat-storage / ocs-ci

https://ocs-ci.readthedocs.io/en/latest/
MIT License
109 stars 165 forks source link

PVC UI tests failing with "concurrent.futures._base.TimeoutError" #7861

Open sidhant-agrawal opened 1 year ago

sidhant-agrawal commented 1 year ago

Tests in tests/ui/test_pvc_ui.py are failing with below error:

concurrent.futures._base.TimeoutError

Stack Trace from one of the failed tests:

self = <test_pvc_ui.TestPvcUserInterface object at 0x7f05c16709d0>
project_factory = <function project_factory_fixture.<locals>.factory at 0x7f05cc89fdc0>
teardown_factory = <function teardown_factory_fixture.<locals>.factory at 0x7f05cc89f670>
setup_ui_class = <selenium.webdriver.chrome.webdriver.WebDriver (session="7fa4d49344d01a6d191570cb1819aa2c")>
sc_name = 'ocs-storagecluster-ceph-rbd', access_mode = 'ReadWriteMany'
pvc_size = '3', vol_mode = 'Block'

    @tier1
    @pytest.mark.parametrize(
        argnames=["sc_name", "access_mode", "pvc_size", "vol_mode"],
        argvalues=[
            pytest.param(
                "ocs-storagecluster-cephfs",
                "ReadWriteMany",
                "2",
                "Filesystem",
            ),
            pytest.param(
                "ocs-storagecluster-ceph-rbd",
                "ReadWriteMany",
                "3",
                "Block",
            ),
            pytest.param(
                "ocs-storagecluster-cephfs",
                "ReadWriteOnce",
                "10",
                "Filesystem",
            ),
            pytest.param(
                *["ocs-storagecluster-ceph-rbd", "ReadWriteOnce", "11", "Block"],
                marks=[skipif_ocs_version("<4.7")],
            ),
            pytest.param(
                "ocs-storagecluster-ceph-rbd",
                "ReadWriteOnce",
                "13",
                "Filesystem",
            ),
        ],
    )
    def test_create_resize_delete_pvc(
        self,
        project_factory,
        teardown_factory,
        setup_ui_class,
        sc_name,
        access_mode,
        pvc_size,
        vol_mode,
    ):
        """
        Test create, resize and delete pvc via UI

        """
        # Creating a test project via CLI
        pro_obj = project_factory()
        project_name = pro_obj.namespace

        pvc_ui_obj = PvcUI()

        # Creating PVC via UI
        pvc_name = create_unique_resource_name("test", "pvc")

        if config.DEPLOYMENT["external_mode"]:
            if sc_name == constants.CEPHFILESYSTEM_SC:
                sc_name = constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_CEPHFS
            elif sc_name == constants.CEPHBLOCKPOOL_SC:
                sc_name = constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_RBD

        pvc_ui_obj.create_pvc_ui(
            project_name, sc_name, pvc_name, access_mode, pvc_size, vol_mode
        )

        pvc_objs = get_all_pvc_objs(namespace=project_name)
        pvc = [pvc_obj for pvc_obj in pvc_objs if pvc_obj.name == pvc_name]

        assert pvc[0].size == int(pvc_size), (
            f"size error| expected size:{pvc_size} \n "
            f"actual size:{str(pvc[0].size)}"
        )

        assert pvc[0].get_pvc_access_mode == access_mode, (
            f"access mode error| expected access mode:{access_mode} "
            f"\n actual access mode:{pvc[0].get_pvc_access_mode}"
        )

        assert pvc[0].backed_sc == sc_name, (
            f"storage class error| expected storage class:{sc_name} "
            f"\n actual storage class:{pvc[0].backed_sc}"
        )

        assert pvc[0].get_pvc_vol_mode == vol_mode, (
            f"volume mode error| expected volume mode:{vol_mode} "
            f"\n actual volume mode:{pvc[0].get_pvc_vol_mode}"
        )

        # Verifying PVC via UI
        logger.info("Verifying PVC Details via UI")
        pvc_ui_obj.verify_pvc_ui(
            pvc_size=pvc_size,
            access_mode=access_mode,
            vol_mode=vol_mode,
            sc_name=sc_name,
            pvc_name=pvc_name,
            project_name=project_name,
        )
        logger.info("PVC Details Verified via UI..!!")

        # Creating Pod via CLI
        logger.info("Creating Pod")
        if sc_name in constants.DEFAULT_STORAGECLASS_RBD:
            interface_type = constants.CEPHBLOCKPOOL
        elif sc_name in constants.DEFAULT_EXTERNAL_MODE_STORAGECLASS_RBD:
            interface_type = constants.CEPHBLOCKPOOL
        else:
            interface_type = constants.CEPHFILESYSTEM

        new_pod = helpers.create_pod(
            interface_type=interface_type,
            pvc_name=pvc_name,
            namespace=project_name,
            raw_block_pv=vol_mode == constants.VOLUME_MODE_BLOCK,
        )

        logger.info(f"Waiting for Pod: state= {constants.STATUS_RUNNING}")
        wait_for_resource_state(
            resource=new_pod, state=constants.STATUS_RUNNING, timeout=120
        )

        # Calling the Teardown Factory Method to make sure Pod is deleted
        teardown_factory(new_pod)

        # Expanding the PVC
        logger.info("Pvc Resizing")
        new_size = int(pvc_size) + 3
        pvc_ui_obj.pvc_resize_ui(
            pvc_name=pvc_name, new_size=new_size, project_name=project_name
        )

        assert new_size > int(
            pvc_size
        ), f"New size of the PVC cannot be less than existing size: new size is {new_size})"

        ocp_version = get_ocp_version()
        self.pvc_loc = locators[ocp_version]["pvc"]

        # Verifying PVC expansion
        logger.info("Verifying PVC resize")
        expected_capacity = f"{new_size} GiB"
        pvc_resize = pvc_ui_obj.verify_pvc_resize_ui(
            project_name=project_name,
            pvc_name=pvc_name,
            expected_capacity=expected_capacity,
        )

        assert pvc_resize, "PVC resize failed"
        logger.info(
            "Pvc resize verified..!!"
            f"New Capacity after PVC resize is {expected_capacity}"
        )

        # Running FIO
        logger.info("Execute FIO on a Pod")
        if vol_mode == constants.VOLUME_MODE_BLOCK:
            storage_type = constants.WORKLOAD_STORAGE_TYPE_BLOCK
        else:
            storage_type = constants.WORKLOAD_STORAGE_TYPE_FS

        new_pod.run_io(storage_type, size=(new_size - 1), invalidate=0, rate="1000m")

>       get_fio_rw_iops(new_pod)

tests/ui/test_pvc_ui.py:194: 
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 
ocs_ci/ocs/resources/pod.py:1265: in get_fio_rw_iops
    fio_result = pod_obj.get_fio_results(120)
ocs_ci/ocs/resources/pod.py:144: in get_fio_results
    result = self.fio_thread.result(timeout)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

self = None, timeout = 120

    def result(self, timeout=None):
        """Return the result of the call that the future represents.

        Args:
            timeout: The number of seconds to wait for the result if the future
                isn't done. If None, then there is no limit on the wait time.

        Returns:
            The result of the call that the future represents.

        Raises:
            CancelledError: If the future was cancelled.
            TimeoutError: If the future didn't finish executing before the given
                timeout.
            Exception: If the call raised then that exception will be raised.
        """
        try:
            with self._condition:
                if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
                    raise CancelledError()
                elif self._state == FINISHED:
                    return self.__get_result()

                self._condition.wait(timeout)

                if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:
                    raise CancelledError()
                elif self._state == FINISHED:
                    return self.__get_result()
                else:
>                   raise TimeoutError()
E                   concurrent.futures._base.TimeoutError

/usr/lib64/python3.8/concurrent/futures/_base.py:446: TimeoutError

Failure observed in RUN ID: 1687156471 RP launch URL: https://url.corp.redhat.com/03a8ce0

sidhant-agrawal commented 1 year ago

Few other RUN IDs where similar issue observed: 1687174376 1687160565 1686857538

sidhant-agrawal commented 10 months ago

PR #8315 and #8538 have been raised for reverting back to default timeout value. Waiting for that PR to get merged and re-test if this issue is still reproducible.

github-actions[bot] commented 6 months ago

This issue has been automatically marked as stale because it has not had recent activity. It will be closed in 30 days if no further activity occurs.

github-actions[bot] commented 5 months ago

This issue has been automatically closed due to inactivity. Please re-open if this still requires investigation.

sidhant-agrawal commented 1 week ago

Issue Reproduced: https://url.corp.redhat.com/83f1b29