red-hat-storage / ocs-ci

https://ocs-ci.readthedocs.io/en/latest/
MIT License
109 stars 166 forks source link

test_bulk_pod_attach_performance[CephBlockPool-120] is failing in 4.15 #9204

Closed ypersky1980 closed 7 months ago

ypersky1980 commented 8 months ago

Test case is failing - re-run the test and determine whether this is a product bug ( open a bz) or a test bug ( submit a pr with a fix)

https://reportportal-ocs4.apps.ocp-c1.prod.psi.redhat.com/ui/#ocs/launches/all/17989/883997/884000/log?item1Params=page.sort%3Dstatus%252CASC

self = <test_bulk_pod_attachtime_performance.TestBulkPodAttachPerformance object at 0x7fe706829820>

def setup(self): """ Setting up test parameters """ log.info("Starting the test setup")

super(TestBulkPodAttachPerformance, self).setup()

tests/e2e/performance/csi_tests/test_bulk_pod_attachtime_performance.py:43:

ocs_ci/ocs/perftests.py:99: in setup self.get_node_info(node_type="master") ocs_ci/ocs/perftests.py:261: in get_node_info self.environment[f"{node_type}_nodes_memory"] = oc_cmd.exec_oc_debug_cmd( ocs_ci/ocs/ocp.py:226: in exec_oc_debug_cmd self.exec_oc_cmd(command=debug_cmd, out_yaml_format=False, timeout=timeout) ocs_ci/ocs/ocp.py:178: in exec_oc_cmd out = run_cmd( ocs_ci/utility/utils.py:484: in run_cmd completed_process = exec_cmd(

cmd = ['oc', '--kubeconfig', '/home/jenkins/current-cluster-dir/openshift-cluster-dir/auth/kubeconfig', '-n', 'openshift-storage', 'debug', ...] secrets = None, timeout = 300, ignore_error = False, threading_lock = None silent = False, use_shell = False cluster_config = <ocs_ci.framework.MultiClusterConfig object at 0x7fe719f43640> kwargs = {} masked_cmd = 'oc -n openshift-storage debug nodes/control-plane-0 --to-namespace=openshift-storage -- chroot /host /bin/bash -c "free | grep Mem | awk '{print $2}' || echo 'CMD FAILED'; "' kubepath = '/home/jenkins/current-cluster-dir/openshift-cluster-dir/auth/kubeconfig' completed_process = CompletedProcess(args=['oc', '--kubeconfig', '/home/jenkins/current-cluster-dir/openshift-cluster-dir/auth/kubeconfig'...nTo use host binaries, run chroot /host\n\nRemoving debug pod ...\nerror: non-zero exit code from debug container\n') masked_stdout = '16367348\n' masked_stderr = 'Starting pod/control-plane-0-debug-g6jj4 ...\nTo use host binaries, run chroot /host\n\nRemoving debug pod ...\nerror: non-zero exit code from debug container\n'

def exec_cmd( cmd, secrets=None, timeout=600, ignore_error=False, threading_lock=None, silent=False, use_shell=False, cluster_config=None, **kwargs, ): """ Run an arbitrary command locally

If the command is grep and matching pattern is not found, then this function
returns "command terminated with exit code 1" in stderr.

Args:
    cmd (str): command to run
    secrets (list): A list of secrets to be masked with asterisks
        This kwarg is popped in order to not interfere with
        subprocess.run(``**kwargs``)
    timeout (int): Timeout for the command, defaults to 600 seconds.
    ignore_error (bool): True if ignore non zero return code and do not
        raise the exception.
    threading_lock (threading.RLock): threading.RLock object that is used
        for handling concurrent oc commands
    silent (bool): If True will silent errors from the server, default false
    use_shell (bool): If True will pass the cmd without splitting
    cluster_config (MultiClusterConfig): In case of multicluster environment this object
            will be non-null

Raises:
    CommandFailed: In case the command execution fails

Returns:
    (CompletedProcess) A CompletedProcess object of the command that was executed
    CompletedProcess attributes:
    args: The list or str args passed to run().
    returncode (str): The exit code of the process, negative for signals.
    stdout     (str): The standard output (None if not captured).
    stderr     (str): The standard error (None if not captured).

"""
masked_cmd = mask_secrets(cmd, secrets)
log.info(f"Executing command: {masked_cmd}")
if isinstance(cmd, str) and not kwargs.get("shell"):
    cmd = shlex.split(cmd)
if cluster_config and cmd[0] == "oc" and "--kubeconfig" not in cmd:
    kubepath = cluster_config.RUN["kubeconfig"]
    cmd = list_insert_at_position(cmd, 1, ["--kubeconfig"])
    cmd = list_insert_at_position(cmd, 2, [kubepath])
if threading_lock and cmd[0] == "oc":
    threading_lock.acquire()
completed_process = subprocess.run(
    cmd,
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    stdin=subprocess.PIPE,
    timeout=timeout,
    **kwargs,
)
if threading_lock and cmd[0] == "oc":
    threading_lock.release()
masked_stdout = mask_secrets(completed_process.stdout.decode(), secrets)
if len(completed_process.stdout) > 0:
    log.debug(f"Command stdout: {masked_stdout}")
else:
    log.debug("Command stdout is empty")

masked_stderr = mask_secrets(completed_process.stderr.decode(), secrets)
if len(completed_process.stderr) > 0:
    if not silent:
        log.warning(f"Command stderr: {masked_stderr}")
else:
    log.debug("Command stderr is empty")
log.debug(f"Command return code: {completed_process.returncode}")
if completed_process.returncode and not ignore_error:
    masked_stderr = bin_xml_escape(filter_out_emojis(masked_stderr))
    if (
        "grep" in masked_cmd
        and b"command terminated with exit code 1" in completed_process.stderr
    ):
        log.info(f"No results found for grep command: {masked_cmd}")
    else:

      raise CommandFailed(
            f"Error during execution of command: {masked_cmd}."
            f"\nError is {masked_stderr}"
        )

E ocs_ci.ocs.exceptions.CommandFailed: Error during execution of command: oc -n openshift-storage debug nodes/control-plane-0 --to-namespace=openshift-storage -- chroot /host /bin/bash -c "free | grep Mem | awk '{print $2}' || echo 'CMD FAILED'; ". E Error is Starting pod/control-plane-0-debug-g6jj4 ... E To use host binaries, run chroot /host E E Removing debug pod ... E error: non-zero exit code from debug container

ocs_ci/utility/utils.py:664: CommandFailed

2024-01-11 23:37:57

ypersky1980 commented 7 months ago

Closing the issue since lately this test has passed on 3 different pplatforms : IBM cloud 4.15 and 4.14 + VMware LSO 4.14.