Closed vkathole closed 2 months ago
self = <test_monitor_recovery.TestMonitorRecovery object at 0x7fc13293aa30>
dc_pod_factory = <function dc_pod_factory.<locals>.factory at 0x7fc132b6aaf0>
mcg_obj = <ocs_ci.ocs.resources.mcg.MCG object at 0x7fc132b47ee0>
bucket_factory = <function bucket_factory_fixture.<locals>._create_buckets at 0x7fc13389b1f0>
def test_monitor_recovery(
self,
dc_pod_factory,
mcg_obj,
bucket_factory,
):
"""
Verifies Monitor recovery procedure as per:
https://access.redhat.com/documentation/en-us/red_hat_openshift_container_storage/4.8/html/troubleshooting_openshift_container_storage/restoring-the-monitor-pods-in-openshift-container-storage_rhocs
"""
# Initialize mon recovery class
mon_recovery = MonitorRecovery()
logger.info("Corrupting ceph monitors by deleting store.db")
corrupt_ceph_monitors()
logger.info("Backing up all the deployments")
mon_recovery.backup_deployments()
dep_revert, mds_revert = mon_recovery.deployments_to_revert()
logger.info("Starting the monitor recovery procedure")
logger.info("Scaling down rook and ocs operators")
mon_recovery.scale_rook_ocs_operators(replica=0)
logger.info(
"Preparing script and patching OSDs to remove LivenessProbe and sleep to infinity"
)
mon_recovery.prepare_monstore_script()
mon_recovery.patch_sleep_on_osds()
switch_to_project(config.ENV_DATA["cluster_namespace"])
logger.info("Getting mon-store from OSDs")
> mon_recovery.run_mon_store()
tests/e2e/kcs/test_monitor_recovery.py:148:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
ocs_ci/utility/retry.py:49: in f_retry
return f(*args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
self = <test_monitor_recovery.MonitorRecovery object at 0x7fc138610be0>
@retry(CommandFailed, tries=15, delay=5, backoff=1)
def run_mon_store(self):
"""
Runs script to get the mon store from OSDs
Raise:
CommandFailed
"""
logger.info("Running mon-store script..")
result = exec_cmd(cmd=f"sh {self.backup_dir}/recover_mon.sh")
result.stdout = result.stdout.decode()
logger.info(f"OSD mon store retrieval stdout {result.stdout}")
result.stderr = result.stderr.decode()
logger.info(f"OSD mon store retrieval stderr {result.stderr}")
search_pattern = re.search(
pattern="error|unable to open mon store", string=result.stderr
)
if search_pattern:
logger.info(f"Error found: {search_pattern}")
> raise CommandFailed
E ocs_ci.ocs.exceptions.CommandFailed
tests/e2e/kcs/test_monitor_recovery.py:362: CommandFailed
This issue has been automatically marked as stale because it has not had recent activity. It will be closed in 30 days if no further activity occurs.
This issue has been automatically marked as stale because it has not had recent activity. It will be closed in 30 days if no further activity occurs.
This issue has been automatically closed due to inactivity. Please re-open if this still requires investigation.
The test scenario is de-prioritized
tests/e2e/kcs/test_monitor_recovery.py::TestMonitorRecovery::test_monitor_recovery fails with output
https://ocs4-jenkins-csb-odf-qe.apps.ocp-c1.prod.psi.redhat.com/job/qe-deploy-ocs-cluster/29790/consoleFull