resource = <ocs_ci.ocs.resources.pvc.PVC object at 0x7fe67726f2b0>
state = 'Bound', timeout = 18000
def wait_for_resource_state(resource, state, timeout=60):
"""
Wait for a resource to get to a given status
Args:
resource (OCS obj): The resource object
state (str): The status to wait for
timeout (int): Time in seconds to wait
Raises:
ResourceWrongStatusException: In case the resource hasn't
reached the desired state
"""
if check_cluster_is_compact():
timeout = 180
if (
resource.name == constants.DEFAULT_STORAGECLASS_CEPHFS
or resource.name == constants.DEFAULT_STORAGECLASS_RBD
):
logger.info("Attempt to default default Secret or StorageClass")
return
try:
resource.ocp.wait_for_resource(
condition=state, resource_name=resource.name, timeout=timeout
)
def wait_for_resource(
self,
condition,
resource_name="",
column="STATUS",
selector=None,
resource_count=0,
timeout=60,
sleep=3,
dont_allow_other_resources=False,
error_condition=None,
):
"""
Wait for a resource to reach to a desired condition
Args:
condition (str): The desired state the resource that is sampled
from 'oc get <kind> <resource_name>' command
resource_name (str): The name of the resource to wait
for (e.g.my-pv1)
column (str): The name of the column to compare with
selector (str): The resource selector to search with.
Example: 'app=rook-ceph-mds'
resource_count (int): How many resources expected to be
timeout (int): Time in seconds to wait
sleep (int): Sampling time in seconds
dont_allow_other_resources (bool): If True it will not allow other
resources in different state. For example you are waiting for 2
resources and there are currently 3 (2 in running state,
1 in ContainerCreating) the function will continue to next
iteration to wait for only 2 resources in running state and no
other exists.
error_condition (str): State of the resource that is sampled
from 'oc get <kind> <resource_name>' command, which makes this
method to fail immediately without waiting for a timeout. This
is optional and makes sense only when there is a well defined
unrecoverable state of the resource(s) which is not expected to
be part of a workflow under test, and at the same time, the
timeout itself is large.
Returns:
bool: True in case all resources reached desired condition,
False otherwise
"""
if condition == error_condition:
# when this fails, this method is used in a wrong way
raise ValueError(
f"Condition '{condition}' we are waiting for must be different"
f" from error condition '{error_condition}'"
" which describes unexpected error state."
)
log.info(
(
f"Waiting for a resource(s) of kind {self._kind}"
f" identified by name '{resource_name}'"
f" using selector {selector}"
f" at column name {column}"
f" to reach desired condition {condition}"
)
)
resource_name = resource_name if resource_name else self.resource_name
selector = selector if selector else self.selector
# actual status of the resource we are waiting for, setting it to None
# now prevents UnboundLocalError raised when waiting timeouts
actual_status = None
try:
for sample in TimeoutSampler(
timeout, sleep, self.get, resource_name, True, selector
):
# Only 1 resource expected to be returned
if resource_name:
retry = int(timeout / sleep if sleep else timeout / 1)
status = self.get_resource(
resource_name,
column,
retry=retry,
wait=sleep,
)
if status == condition:
log.info(
f"status of {resource_name} at {column}"
" reached condition!"
)
return True
log.info(
(
f"status of {resource_name} at column {column} was {status},"
f" but we were waiting for {condition}"
)
)
actual_status = status
if error_condition is not None and status == error_condition:
raise ResourceWrongStatusException(
resource_name,
column=column,
expected=condition,
got=status,
)
# More than 1 resources returned
elif sample.get("kind") == "List":
in_condition = []
in_condition_len = 0
actual_status = []
sample = sample["items"]
sample_len = len(sample)
for item in sample:
try:
item_name = item.get("metadata").get("name")
status = self.get_resource(item_name, column)
actual_status.append(status)
if status == condition:
in_condition.append(item)
in_condition_len = len(in_condition)
if (
error_condition is not None
and status == error_condition
):
raise ResourceWrongStatusException(
item_name,
column=column,
expected=condition,
got=status,
)
except CommandFailed as ex:
log.info(
f"Failed to get status of resource: {item_name} at column {column}, "
f"Error: {ex}"
)
if resource_count:
if in_condition_len == resource_count:
log.info(
f"{in_condition_len} resources already "
f"reached condition!"
)
if (
dont_allow_other_resources
and sample_len != in_condition_len
):
log.info(
f"There are {sample_len} resources in "
f"total. Continue to waiting as "
f"you don't allow other resources!"
)
continue
return True
elif len(sample) == len(in_condition):
return True
# preparing logging message with expected number of
# resource items we are waiting for
if resource_count > 0:
exp_num_str = f"all {resource_count}"
else:
exp_num_str = "all"
log.info(
(
f"status of {resource_name} at column {column} - item(s) were {actual_status},"
f" but we were waiting"
f" for {exp_num_str} of them to be {condition}"
)
)
except TimeoutExpiredError as ex:
log.error(f"timeout expired: {ex}")
# run `oc describe` on the resources we were waiting for to provide
# evidence so that we can understand what was wrong
output = self.describe(resource_name, selector=selector)
log.warning(
"Description of the resource(s) we were waiting for:\n%s", output
)
log.error(
(
f"Wait for {self._kind} resource {resource_name} at column {column}"
f" to reach desired condition {condition} failed,"
f" last actual status was {actual_status}"
)
)
raise (ex)
def wait_for_resource(
self,
condition,
resource_name="",
column="STATUS",
selector=None,
resource_count=0,
timeout=60,
sleep=3,
dont_allow_other_resources=False,
error_condition=None,
):
"""
Wait for a resource to reach to a desired condition
Args:
condition (str): The desired state the resource that is sampled
from 'oc get <kind> <resource_name>' command
resource_name (str): The name of the resource to wait
for (e.g.my-pv1)
column (str): The name of the column to compare with
selector (str): The resource selector to search with.
Example: 'app=rook-ceph-mds'
resource_count (int): How many resources expected to be
timeout (int): Time in seconds to wait
sleep (int): Sampling time in seconds
dont_allow_other_resources (bool): If True it will not allow other
resources in different state. For example you are waiting for 2
resources and there are currently 3 (2 in running state,
1 in ContainerCreating) the function will continue to next
iteration to wait for only 2 resources in running state and no
other exists.
error_condition (str): State of the resource that is sampled
from 'oc get <kind> <resource_name>' command, which makes this
method to fail immediately without waiting for a timeout. This
is optional and makes sense only when there is a well defined
unrecoverable state of the resource(s) which is not expected to
be part of a workflow under test, and at the same time, the
timeout itself is large.
Returns:
bool: True in case all resources reached desired condition,
False otherwise
"""
if condition == error_condition:
# when this fails, this method is used in a wrong way
raise ValueError(
f"Condition '{condition}' we are waiting for must be different"
f" from error condition '{error_condition}'"
" which describes unexpected error state."
)
log.info(
(
f"Waiting for a resource(s) of kind {self._kind}"
f" identified by name '{resource_name}'"
f" using selector {selector}"
f" at column name {column}"
f" to reach desired condition {condition}"
)
)
resource_name = resource_name if resource_name else self.resource_name
selector = selector if selector else self.selector
# actual status of the resource we are waiting for, setting it to None
# now prevents UnboundLocalError raised when waiting timeouts
actual_status = None
try:
for sample in TimeoutSampler(
timeout, sleep, self.get, resource_name, True, selector
):
ocs_ci/ocs/ocp.py:686:
self = <ocs_ci.utility.utils.TimeoutSampler object at 0x7fe64f36cc40>
def iter(self):
if self.start_time is None:
self.start_time = time.time()
while True:
self.last_sample_time = time.time()
if self.timeout <= (self.last_sample_time - self.start_time):
@pytest.mark.parametrize(
argnames=["interface", "copies", "timeout"],
argvalues=[
pytest.param(
[constants.CEPHBLOCKPOOL, 7, 1800],
marks=pytest.mark.polarion_id("OCS-2673"),
),
pytest.param(
[constants.CEPHFILESYSTEM, 7, 1800],
marks=[
pytest.mark.polarion_id("OCS-2674"),
pytest.mark.bugzilla("2101874"),
],
),
],
)
def test_pvc_clone_performance_multiple_files(
self,
secret_factory,
interface,
copies,
timeout,
):
"""
Test assign nodeName to a pod using RWX pvc
Each kernel (unzipped) is 892M and 61694 files
The test creates a pvc and a pods, writes kernel files multiplied by number of copies
The test creates number of clones samples, calculates creation and deletion times for each one the clones
and calculates the average creation and average deletion times
"""
# Initialize some variabels
self.interface = interface
self.timeout = timeout
self.pvc_size = "100"
if self.dev_mode:
self.pvc_size = "10"
copies = 1
self.results_path = get_full_test_logs_path(cname=self)
# Initialize the results doc file.
full_results = self.init_full_results(
ClonesResultsAnalyse(
self.uuid,
self.crd_data,
self.full_log_path,
"test_pvc_clone_performance_multiple_files",
)
)
files_written = ""
data_written = ""
test_start_time = self.get_time()
# Create new pool and sc only for RBD, for CepgFS use thr default
if self.interface == constants.CEPHBLOCKPOOL:
# Creating new pool to run the test on it
self.create_new_pool_and_sc(secret_factory)
else:
self.sc_obj = ocs.OCS(
kind="StorageCluster",
metadata={
"namespace": self.namespace,
"name": Interfaces_info[self.interface]["sc"],
},
)
self.pool_name = "ocs-storagecluster-cephfilesystem"
# Create a PVC
self.create_pvc_and_wait_for_bound()
# Create a POD
self.create_pod_and_wait_for_completion(
command=["/opt/multiple_files.sh"],
command_args=[f"{copies}", "/mnt"],
)
# Get the number of files and total written data from the pod
for line in self.pod_object.ocp.get_logs(name=self.pod_object.name).split("\n"):
if "Number Of Files" in line:
files_written = line.split(" ")[-1]
if "Total Data" in line:
data_written = line.split(" ")[-1]
logger.info("Getting the amount of data written to the PVC")
logger.info(f"The amount of written data is {data_written}")
logger.info(
f"For {self.interface} - The number of files written to the pod is {int(files_written):,}"
)
# increasing the timeout since clone creation time is longer than pod attach time
self.timeout = 18000
# taking the time, so parsing the provision log will be faster.
start_time = self.get_time("csi")
tests/e2e/performance/csi_tests/test_pvc_clone_performance.py:249: in create_and_delete_clones
helpers.wait_for_resource_state(
resource = <ocs_ci.ocs.resources.pvc.PVC object at 0x7fe67726f2b0>
state = 'Bound', timeout = 18000
def wait_for_resource_state(resource, state, timeout=60):
"""
Wait for a resource to get to a given status
Args:
resource (OCS obj): The resource object
state (str): The status to wait for
timeout (int): Time in seconds to wait
Raises:
ResourceWrongStatusException: In case the resource hasn't
reached the desired state
"""
if check_cluster_is_compact():
timeout = 180
if (
resource.name == constants.DEFAULT_STORAGECLASS_CEPHFS
or resource.name == constants.DEFAULT_STORAGECLASS_RBD
):
logger.info("Attempt to default default Secret or StorageClass")
return
try:
resource.ocp.wait_for_resource(
condition=state, resource_name=resource.name, timeout=timeout
)
except TimeoutExpiredError:
logger.error(f"{resource.kind} {resource.name} failed to reach {state}")
resource.reload()
raise ResourceWrongStatusException(resource.name, resource.describe())
E ocs_ci.ocs.exceptions.ResourceWrongStatusException: Resource clone-pas-test-1 describe output: Name: clone-pas-test-1
E Namespace: namespace-pas-test-058960c607af4789baa76
E StorageClass: ocs-storagecluster-cephfs
E Status: Pending
E Volume:
E Labels:
E Annotations: volume.beta.kubernetes.io/storage-provisioner: openshift-storage.cephfs.csi.ceph.com
E volume.kubernetes.io/storage-provisioner: openshift-storage.cephfs.csi.ceph.com
E Finalizers: [kubernetes.io/pvc-protection]
E Capacity:
E Access Modes:
E VolumeMode: Filesystem
E DataSource:
E Kind: PersistentVolumeClaim
E Name: pvc-pas-test
E Used By:
E Events:
E Type Reason Age From Message
E ---- ------ ---- ---- -------
E Normal ExternalProvisioning 4m56s (x1211 over 5h) persistentvolume-controller Waiting for a volume to be created either by the external provisioner 'openshift-storage.cephfs.csi.ceph.com' or manually by the system administrator. If volume creation is delayed, please verify that the provisioner is running and correctly registered.
E Normal Provisioning 4m1s (x87 over 5h) openshift-storage.cephfs.csi.ceph.com_csi-cephfsplugin-provisioner-7569b959fd-t9xzg_8bb4b643-6f55-48f6-97f0-c34ab048e33c External provisioner is provisioning volume for claim "namespace-pas-test-058960c607af4789baa76/clone-pas-test-1"
Test case is failing - re-run the test and determine whether this is a product bug ( open a bz) or a test bug ( submit a pr with a fix)
https://reportportal-ocs4.apps.ocp-c1.prod.psi.redhat.com/ui/#ocs/launches/all/17989/883997/884036/log?item1Params=page.sort%3Dstatus%252CASC
resource = <ocs_ci.ocs.resources.pvc.PVC object at 0x7fe67726f2b0> state = 'Bound', timeout = 18000
def wait_for_resource_state(resource, state, timeout=60): """ Wait for a resource to get to a given status
ocs_ci/helpers/helpers.py:117:
self = <ocs_ci.ocs.ocp.OCP object at 0x7fe6e8628160>, condition = 'Bound' resource_name = 'clone-pas-test-1', column = 'STATUS', selector = None resource_count = 0, timeout = 18000, sleep = 3 dont_allow_other_resources = False, error_condition = None
def wait_for_resource( self, condition, resource_name="", column="STATUS", selector=None, resource_count=0, timeout=60, sleep=3, dont_allow_other_resources=False, error_condition=None, ): """ Wait for a resource to reach to a desired condition
ocs_ci/ocs/ocp.py:795:
self = <ocs_ci.ocs.ocp.OCP object at 0x7fe6e8628160>, condition = 'Bound' resource_name = 'clone-pas-test-1', column = 'STATUS', selector = None resource_count = 0, timeout = 18000, sleep = 3 dont_allow_other_resources = False, error_condition = None
def wait_for_resource( self, condition, resource_name="", column="STATUS", selector=None, resource_count=0, timeout=60, sleep=3, dont_allow_other_resources=False, error_condition=None, ): """ Wait for a resource to reach to a desired condition
ocs_ci/ocs/ocp.py:686:
self = <ocs_ci.utility.utils.TimeoutSampler object at 0x7fe64f36cc40>
def iter(self): if self.start_time is None: self.start_time = time.time() while True: self.last_sample_time = time.time() if self.timeout <= (self.last_sample_time - self.start_time):
E ocs_ci.ocs.exceptions.TimeoutExpiredError: Timed out after 18000s running get("clone-pas-test-1", True, None)
ocs_ci/utility/utils.py:1288: TimeoutExpiredError
During handling of the above exception, another exception occurred:
self = <test_pvc_clone_performance.TestPVCClonePerformance object at 0x7fe6d1ef55e0> secret_factory = <function secret_factory_fixture..factory at 0x7fe60e874a60> interface = 'CephFileSystem', copies = 7, timeout = 1800
@pytest.mark.parametrize( argnames=["interface", "copies", "timeout"], argvalues=[ pytest.param( [constants.CEPHBLOCKPOOL, 7, 1800], marks=pytest.mark.polarion_id("OCS-2673"), ), pytest.param( [constants.CEPHFILESYSTEM, 7, 1800], marks=[ pytest.mark.polarion_id("OCS-2674"), pytest.mark.bugzilla("2101874"), ], ), ], ) def test_pvc_clone_performance_multiple_files( self, secret_factory, interface, copies, timeout, ): """ Test assign nodeName to a pod using RWX pvc Each kernel (unzipped) is 892M and 61694 files The test creates a pvc and a pods, writes kernel files multiplied by number of copies The test creates number of clones samples, calculates creation and deletion times for each one the clones and calculates the average creation and average deletion times """
clones_list = self.create_and_delete_clones()
tests/e2e/performance/csi_tests/test_pvc_clone_performance.py:504:
tests/e2e/performance/csi_tests/test_pvc_clone_performance.py:249: in create_and_delete_clones helpers.wait_for_resource_state(
resource = <ocs_ci.ocs.resources.pvc.PVC object at 0x7fe67726f2b0> state = 'Bound', timeout = 18000
def wait_for_resource_state(resource, state, timeout=60): """ Wait for a resource to get to a given status
E ocs_ci.ocs.exceptions.ResourceWrongStatusException: Resource clone-pas-test-1 describe output: Name: clone-pas-test-1 E Namespace: namespace-pas-test-058960c607af4789baa76 E StorageClass: ocs-storagecluster-cephfs E Status: Pending E Volume: E Labels:
E Annotations: volume.beta.kubernetes.io/storage-provisioner: openshift-storage.cephfs.csi.ceph.com E volume.kubernetes.io/storage-provisioner: openshift-storage.cephfs.csi.ceph.com E Finalizers: [kubernetes.io/pvc-protection] E Capacity: E Access Modes: E VolumeMode: Filesystem E DataSource: E Kind: PersistentVolumeClaim E Name: pvc-pas-test E Used By:
E Events: E Type Reason Age From Message E ---- ------ ---- ---- ------- E Normal ExternalProvisioning 4m56s (x1211 over 5h) persistentvolume-controller Waiting for a volume to be created either by the external provisioner 'openshift-storage.cephfs.csi.ceph.com' or manually by the system administrator. If volume creation is delayed, please verify that the provisioner is running and correctly registered. E Normal Provisioning 4m1s (x87 over 5h) openshift-storage.cephfs.csi.ceph.com_csi-cephfsplugin-provisioner-7569b959fd-t9xzg_8bb4b643-6f55-48f6-97f0-c34ab048e33c External provisioner is provisioning volume for claim "namespace-pas-test-058960c607af4789baa76/clone-pas-test-1"
ocs_ci/helpers/helpers.py:123: ResourceWrongStatusException
2024-01-13 07:25:10