red-hat-storage / ocs-ci

https://ocs-ci.readthedocs.io/en/latest/
MIT License
108 stars 168 forks source link

test_run_jenkins_node_reboot[master-2-15] fails on Vsphere UPI with error 'Error is Error from server (ServiceUnavailable): the server is currently unable to handle the request (get builds.build.openshift.io jax-rs-build-1)' #8996

Open nagendra202 opened 12 months ago

nagendra202 commented 12 months ago

Message: ocs_ci.ocs.exceptions.CommandFailed: Error during execution of command: oc --kubeconfig /home/jenkins/current-cluster-dir/openshift-cluster-dir/auth/kubeconfig -n myjenkins-1 get Build jax-rs-build-1 -n myjenkins-1 -o yaml. Error is Error from server (ServiceUnavailable): the server is currently unable to handle the request (get builds.build.openshift.io jax-rs-build-1) Type: None

Text: self = <tests.e2e.workloads.app.jenkins.test_jenkins_node_reboot.TestJenkinsNodeReboot object at 0x7f71c17b7100> jenkins = <ocs_ci.ocs.jenkins.Jenkins object at 0x7f724e616ee0> nodes = <ocs_ci.ocs.platform_nodes.VMWareUPINodes object at 0x7f724e616fa0> node_type = 'master', num_projects = 2, num_of_builds = 15

@pytest.mark.parametrize( argnames=["node_type", "num_projects", "num_of_builds"], argvalues=[ pytest.param( [MASTER_MACHINE, 2, 15], marks=pytest.mark.polarion_id("OCS-2202") ), pytest.param( [WORKER_MACHINE, 2, 15], marks=pytest.mark.polarion_id("OCS-2178") ), ], ) @pytest.mark.usefixtures(jenkins_setup.name) def test_run_jenkins_node_reboot( self, jenkins, nodes, node_type, num_projects, num_of_builds ): """

Test Node Reboot jenkins
"""
# Init number of projects
jenkins.number_projects = num_projects

# Create app jenkins
jenkins.create_app_jenkins()

# Create jenkins pvc
jenkins.create_jenkins_pvc()

# Create jenkins build config
jenkins.create_jenkins_build_config()

# Wait jenkins deploy pod reach to completed state
jenkins.wait_for_jenkins_deploy_status(status=STATUS_COMPLETED)

# Get relevant node
nodes_reboot = jenkins.get_node_name_where_jenkins_pod_not_hosted(
    node_type=node_type, num_of_nodes=1
)

# Init number of builds per project
jenkins.number_builds_per_project = num_of_builds

# Start Builds
jenkins.start_build()

if len(nodes_reboot) > 0:
    # Restart Node
    nodes.restart_nodes(get_node_objs(nodes_reboot))
else:
    log.info("No node was reboot")

# Wait build reach 'Complete' state

jenkins.wait_for_build_to_complete()

tests/e2e/workloads/app/jenkins/test_jenkins_node_reboot.py:99:

ocs_ci/ocs/jenkins.py:150: in wait_for_build_to_complete jenkins_builds = self.get_builds_sorted_by_number(project=project) ocs_ci/ocs/jenkins.py:185: in get_builds_sorted_by_number jenkins_builds_unsorted = self.get_builds_obj(namespace=project) ocs_ci/ocs/jenkins.py:222: in get_builds_obj ocp_dict = ocp_obj.get(resource_name=build_name) ocs_ci/ocs/ocp.py:291: in get return self.exec_oc_cmd( ocs_ci/ocs/ocp.py:178: in exec_oc_cmd out = run_cmd( ocs_ci/utility/utils.py:481: in run_cmd completed_process = exec_cmd(

cmd = ['oc', '--kubeconfig', '/home/jenkins/current-cluster-dir/openshift-cluster-dir/auth/kubeconfig', '-n', 'myjenkins-1', 'get', ...] secrets = None, timeout = 600, ignore_error = False, threading_lock = None silent = False, use_shell = False cluster_config = <ocs_ci.framework.MultiClusterConfig object at 0x7f726111e6a0> kwargs = {} masked_cmd = 'oc --kubeconfig /home/jenkins/current-cluster-dir/openshift-cluster-dir/auth/kubeconfig -n myjenkins-1 get Build jax-rs-build-1 -n myjenkins-1 -o yaml' completed_process = CompletedProcess(args=['oc', '--kubeconfig', '/home/jenkins/current-cluster-dir/openshift-cluster-dir/auth/kubeconfig'...ceUnavailable): the server is currently unable to handle the request (get builds.build.openshift.io jax-rs-build-1)\n') masked_stdout = '' masked_stderr = 'Error from server (ServiceUnavailable): the server is currently unable to handle the request (get builds.build.openshift.io jax-rs-build-1)\n'

def exec_cmd( cmd, secrets=None, timeout=600, ignore_error=False, threading_lock=None, silent=False, use_shell=False, cluster_config=None, **kwargs, ): """ Run an arbitrary command locally

If the command is grep and matching pattern is not found, then this function
returns "command terminated with exit code 1" in stderr.

Args:
    cmd (str): command to run
    secrets (list): A list of secrets to be masked with asterisks
        This kwarg is popped in order to not interfere with
        subprocess.run(``**kwargs``)
    timeout (int): Timeout for the command, defaults to 600 seconds.
    ignore_error (bool): True if ignore non zero return code and do not
        raise the exception.
    threading_lock (threading.Lock): threading.Lock object that is used
        for handling concurrent oc commands
    silent (bool): If True will silent errors from the server, default false
    use_shell (bool): If True will pass the cmd without splitting
    cluster_config (MultiClusterConfig): In case of multicluster environment this object
            will be non-null

Raises:
    CommandFailed: In case the command execution fails

Returns:
    (CompletedProcess) A CompletedProcess object of the command that was executed
    CompletedProcess attributes:
    args: The list or str args passed to run().
    returncode (str): The exit code of the process, negative for signals.
    stdout     (str): The standard output (None if not captured).
    stderr     (str): The standard error (None if not captured).

"""
masked_cmd = mask_secrets(cmd, secrets)
log.info(f"Executing command: {masked_cmd}")
if isinstance(cmd, str) and not kwargs.get("shell"):
    cmd = shlex.split(cmd)
if cluster_config and cmd[0] == "oc" and "--kubeconfig" not in cmd:
    kubepath = cluster_config.RUN["kubeconfig"]
    cmd = list_insert_at_position(cmd, 1, ["--kubeconfig"])
    cmd = list_insert_at_position(cmd, 2, [kubepath])
if threading_lock and cmd[0] == "oc":
    threading_lock.acquire()
completed_process = subprocess.run(
    cmd,
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    stdin=subprocess.PIPE,
    timeout=timeout,
    **kwargs,
)
if threading_lock and cmd[0] == "oc":
    threading_lock.release()
masked_stdout = mask_secrets(completed_process.stdout.decode(), secrets)
if len(completed_process.stdout) > 0:
    log.debug(f"Command stdout: {masked_stdout}")
else:
    log.debug("Command stdout is empty")

masked_stderr = mask_secrets(completed_process.stderr.decode(), secrets)
if len(completed_process.stderr) > 0:
    if not silent:
        log.warning(f"Command stderr: {masked_stderr}")
else:
    log.debug("Command stderr is empty")
log.debug(f"Command return code: {completed_process.returncode}")
if completed_process.returncode and not ignore_error:
    masked_stderr = filter_out_emojis(masked_stderr)
    if (
        "grep" in masked_cmd
        and b"command terminated with exit code 1" in completed_process.stderr
    ):
        log.info(f"No results found for grep command: {masked_cmd}")
    else:

      raise CommandFailed(
            f"Error during execution of command: {masked_cmd}."
            f"\nError is {masked_stderr}"
        )

E ocs_ci.ocs.exceptions.CommandFailed: Error during execution of command: oc --kubeconfig /home/jenkins/current-cluster-dir/openshift-cluster-dir/auth/kubeconfig -n myjenkins-1 get Build jax-rs-build-1 -n myjenkins-1 -o yaml. E Error is Error from server (ServiceUnavailable): the server is currently unable to handle the request (get builds.build.openshift.io jax-rs-build-1)

ocs_ci/utility/utils.py:661: CommandFailed

RP: https://reportportal-ocs4.apps.ocp-c1.prod.psi.redhat.com/ui/#ocs/launches/557/17069/828575/828603/828605/log?logParams=history%3D619487%26page.page%3D1

github-actions[bot] commented 8 months ago

This issue has been automatically marked as stale because it has not had recent activity. It will be closed in 30 days if no further activity occurs.

PrasadDesala commented 6 months ago

@nagendra202 Did you observe this issue in recent runs?

github-actions[bot] commented 3 months ago

This issue has been automatically marked as stale because it has not had recent activity. It will be closed in 30 days if no further activity occurs.

github-actions[bot] commented 3 weeks ago

This issue has been automatically marked as stale because it has not had recent activity. It will be closed in 30 days if no further activity occurs.