red-hat-storage / ocs-ci

https://ocs-ci.readthedocs.io/en/latest/
MIT License
109 stars 166 forks source link

fio tests are failing on 4.13 with error rror from server (BadRequest): container "container-00" in pod "1024312848-debug" is waiting to start: ContainerCreating #9609

Closed pintojoy closed 1 month ago

pintojoy commented 5 months ago

def setup(self): """ Setting up test parameters """ log.info("Starting the test setup") self.benchmark_name = "FIO" self.client_pod_name = "fio-client"

super(TestFIOBenchmark, self).setup()

tests/e2e/performance/io_workload/test_fio_benchmark.py:147:

ocs_ci/ocs/perftests.py:99: in setup self.get_node_info(node_type="worker") ocs_ci/ocs/perftests.py:256: in get_node_info self.environment[f"{node_type}_nodes_cpu_num"] = oc_cmd.exec_oc_debug_cmd( ocs_ci/ocs/ocp.py:226: in exec_oc_debug_cmd self.exec_oc_cmd(command=debug_cmd, out_yaml_format=False, timeout=timeout) ocs_ci/ocs/ocp.py:178: in exec_oc_cmd out = run_cmd( ocs_ci/utility/utils.py:482: in run_cmd completed_process = exec_cmd(

cmd = ['oc', '--kubeconfig', '/home/jenkins/current-cluster-dir/openshift-cluster-dir/auth/kubeconfig', '-n', 'openshift-storage', 'debug', ...] secrets = None, timeout = 300, ignore_error = False, threading_lock = None silent = False, use_shell = False cluster_config = <ocs_ci.framework.MultiClusterConfig object at 0x7f4b3a9e1220> kwargs = {} masked_cmd = 'oc -n openshift-storage debug nodes/10.243.128.48 --to-namespace=openshift-storage -- chroot /host /bin/bash -c "lscpu | grep '^CPU(s):' | awk '{print $NF}' || echo 'CMD FAILED'; "' kubepath = '/home/jenkins/current-cluster-dir/openshift-cluster-dir/auth/kubeconfig' completed_process = CompletedProcess(args=['oc', '--kubeconfig', '/home/jenkins/current-cluster-dir/openshift-cluster-dir/auth/kubeconfig'...from server (BadRequest): container "container-00" in pod "1024312848-debug" is waiting to start: ContainerCreating\n') masked_stdout = '' masked_stderr = 'Starting pod/1024312848-debug ...\nTo use host binaries, run chroot /host\n\nRemoving debug pod ...\nError from server (BadRequest): container "container-00" in pod "1024312848-debug" is waiting to start: ContainerCreating\n'

def exec_cmd( cmd, secrets=None, timeout=600, ignore_error=False, threading_lock=None, silent=False, use_shell=False, cluster_config=None, **kwargs, ): """ Run an arbitrary command locally

If the command is grep and matching pattern is not found, then this function
returns "command terminated with exit code 1" in stderr.

Args:
    cmd (str): command to run
    secrets (list): A list of secrets to be masked with asterisks
        This kwarg is popped in order to not interfere with
        subprocess.run(``**kwargs``)
    timeout (int): Timeout for the command, defaults to 600 seconds.
    ignore_error (bool): True if ignore non zero return code and do not
        raise the exception.
    threading_lock (threading.Lock): threading.Lock object that is used
        for handling concurrent oc commands
    silent (bool): If True will silent errors from the server, default false
    use_shell (bool): If True will pass the cmd without splitting
    cluster_config (MultiClusterConfig): In case of multicluster environment this object
            will be non-null

Raises:
    CommandFailed: In case the command execution fails

Returns:
    (CompletedProcess) A CompletedProcess object of the command that was executed
    CompletedProcess attributes:
    args: The list or str args passed to run().
    returncode (str): The exit code of the process, negative for signals.
    stdout     (str): The standard output (None if not captured).
    stderr     (str): The standard error (None if not captured).

"""
masked_cmd = mask_secrets(cmd, secrets)
log.info(f"Executing command: {masked_cmd}")
if isinstance(cmd, str) and not kwargs.get("shell"):
    cmd = shlex.split(cmd)
if cluster_config and cmd[0] == "oc" and "--kubeconfig" not in cmd:
    kubepath = cluster_config.RUN["kubeconfig"]
    cmd = list_insert_at_position(cmd, 1, ["--kubeconfig"])
    cmd = list_insert_at_position(cmd, 2, [kubepath])
if threading_lock and cmd[0] == "oc":
    threading_lock.acquire()
completed_process = subprocess.run(
    cmd,
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    stdin=subprocess.PIPE,
    timeout=timeout,
    **kwargs,
)
if threading_lock and cmd[0] == "oc":
    threading_lock.release()
masked_stdout = mask_secrets(completed_process.stdout.decode(), secrets)
if len(completed_process.stdout) > 0:
    log.debug(f"Command stdout: {masked_stdout}")
else:
    log.debug("Command stdout is empty")

masked_stderr = mask_secrets(completed_process.stderr.decode(), secrets)
if len(completed_process.stderr) > 0:
    if not silent:
        log.warning(f"Command stderr: {masked_stderr}")
else:
    log.debug("Command stderr is empty")
log.debug(f"Command return code: {completed_process.returncode}")
if completed_process.returncode and not ignore_error:
    masked_stderr = bin_xml_escape(filter_out_emojis(masked_stderr))
    if (
        "grep" in masked_cmd
        and b"command terminated with exit code 1" in completed_process.stderr
    ):
        log.info(f"No results found for grep command: {masked_cmd}")
    else:

      raise CommandFailed(
            f"Error during execution of command: {masked_cmd}."
            f"\nError is {masked_stderr}"
        )

E ocs_ci.ocs.exceptions.CommandFailed: Error during execution of command: oc -n openshift-storage debug nodes/10.243.128.48 --to-namespace=openshift-storage -- chroot /host /bin/bash -c "lscpu | grep '^CPU(s):' | awk '{print $NF}' || echo 'CMD FAILED'; ". E Error is Starting pod/1024312848-debug ... E To use host binaries, run chroot /host E E Removing debug pod ... E Error from server (BadRequest): container "container-00" in pod "1024312848-debug" is waiting to start: ContainerCreating

github-actions[bot] commented 2 months ago

This issue has been automatically marked as stale because it has not had recent activity. It will be closed in 30 days if no further activity occurs.

github-actions[bot] commented 1 month ago

This issue has been automatically closed due to inactivity. Please re-open if this still requires investigation.