Closed ypersky1980 closed 10 months ago
=================================== FAILURES =================================== _ TestSmallFileWorkload.test_smallfile_workload[4-5000-22-5-33-CephBlockPool] __
self = <test_small_file_workload.TestSmallFileWorkload object at 0x7f82c3c00b50> file_size = 4, files = 5000, threads = 22, samples = 5, clients = 33 interface = 'CephBlockPool'
@pytest.mark.parametrize(
argnames=["file_size", "files", "threads", "samples", "clients", "interface"],
argvalues=[
pytest.param(*[4, 5000, 22, 5, 33, constants.CEPHBLOCKPOOL]),
pytest.param(*[16, 5000, 8, 5, 21, constants.CEPHBLOCKPOOL]),
pytest.param(*[4, 2500, 4, 5, 9, constants.CEPHFILESYSTEM]),
pytest.param(*[16, 1500, 4, 5, 9, constants.CEPHFILESYSTEM]),
],
)
@pytest.mark.polarion_id("OCS-1295")
def test_smallfile_workload(
self, file_size, files, threads, samples, clients, interface
):
"""
Run SmallFile Workload
Args:
file_size (int) : the size of the file to be used
files (int) : number of files to use
threads (int) : number of threads to be use in the test
samples (int) : how meany samples to run for each test
interface (str) : the volume type (rbd / cephfs)
"""
if config.PERF.get("deploy_internal_es"):
self.es = ElasticSearch()
else:
if config.PERF.get("internal_es_server") == "":
self.es = None
return
else:
self.es = {
"server": config.PERF.get("internal_es_server"),
"port": config.PERF.get("internal_es_port"),
"url": f"http://{config.PERF.get('internal_es_server')}:{config.PERF.get('internal_es_port')}",
}
# verify that the connection to the elasticsearch server is OK
if not super(TestSmallFileWorkload, self).es_connect():
self.es = None
return
# deploy the benchmark-operator
self.deploy_benchmark_operator()
# verify that there is an elasticsearch server for the benchmark
if not self.es:
log.error("This test must have an Elasticsearch server")
return False
# Getting the full path for the test logs
self.full_log_path = get_full_test_logs_path(cname=self)
self.results_path = get_full_test_logs_path(cname=self)
self.full_log_path += (
f"-{file_size}-{files}-{threads}-{samples}-{clients}-{interface}"
)
log.info(f"Logs file path name is : {self.full_log_path}")
# Loading the main template yaml file for the benchmark
log.info("Create resource file for small_files workload")
self.crd_data = templating.load_yaml(constants.SMALLFILE_BENCHMARK_YAML)
# Saving the Original elastic-search IP and PORT - if defined in yaml
self.es_info_backup(self.es)
self.set_storageclass(interface=interface)
# Setting the data set to 40% of the total storage capacity
self.setting_storage_usage(file_size, files, threads, samples, clients)
self.get_env_info()
if not self.run():
tests/e2e/performance/io_workload/test_small_file_workload.py:619:
tests/e2e/performance/io_workload/test_small_file_workload.py:517: in run self.wait_for_wl_to_finish(sleep=30)
self = <test_small_file_workload.TestSmallFileWorkload object at 0x7f82c3c00b50> timeout = 18000, sleep = 30
def wait_for_wl_to_finish(self, timeout=18000, sleep=300):
"""
Waiting until the workload is finished and get the test log
Args:
timeout (int): time in second to wait until the benchmark start
sleep (int): Sleep interval seconds
Raise:
exception for too much restarts of the test.
ResourceWrongStatusException : test Failed / Error
TimeoutExpiredError : test did not completed on time.
"""
log.info(f"Waiting for {self.client_pod_name} to complete")
Finished = 0
restarts = 0
total_time = timeout
while not Finished and total_time > 0:
results = run_oc_command(
"get pod --no-headers -o custom-columns=:metadata.name,:status.phase",
namespace=benchmark_operator.BMO_NAME,
)
(fname, status) = ["", ""]
for name in results:
# looking for the pod which run the benchmark (not the IO)
# this pod contain the `client` in his name, and there is only one
# pod like this, other pods have the `server` in the name.
(fname, status) = name.split()
if re.search("client", fname):
break
else:
(fname, status) = ["", ""]
if fname == "": # there is no `client` pod !
err_msg = f"{self.client_pod} Failed to run !!!"
log.error(err_msg)
raise Exception(err_msg)
if not fname == self.client_pod:
# The client pod name is different from previous check, it was restarted
log.info(
f"The pod {self.client_pod} was restart. the new client pod is {fname}"
)
self.client_pod = fname
restarts += 1
# in case of restarting the benchmark, reset the timeout as well
total_time = timeout
if restarts > 3: # we are tolerating only 3 restarts
err_msg = f"Too much restarts of the benchmark ({restarts})"
log.error(err_msg)
raise Exception(err_msg)
if status == "Succeeded":
# Getting the end time of the benchmark - for reporting.
self.end_time = self.get_time()
self.test_logs = self.pod_obj.exec_oc_cmd(
f"logs {self.client_pod}", out_yaml_format=False
)
log.info(f"{self.client_pod} completed successfully")
Finished = 1
elif (
status != constants.STATUS_RUNNING
and status != constants.STATUS_PENDING
):
# if the benchmark pod is not in Running state (and not Completed/Pending),
# no need to wait for timeout.
# Note: the pod can be in pending state in case of restart.
err_msg = f"{self.client_pod} Failed to run - ({status})"
log.error(err_msg)
raise exceptions.ResourceWrongStatusException(
self.client_pod, describe_out=err_msg, column="Status", expected="Succeeded", got=status, ) E ocs_ci.ocs.exceptions.ResourceWrongStatusException: Resource smallfile-client-1-benchmark-abbc7fb5-gr4tj in column Status was in state Failed but expected Succeeded describe output: smallfile-client-1-benchmark-abbc7fb5-gr4tj Failed to run - (Failed)
ocs_ci/ocs/perftests.py:477: ResourceWrongStatusException
This issue has been automatically marked as stale because it has not had recent activity. It will be closed in 30 days if no further activity occurs.
This issue has been automatically closed due to inactivity. Please re-open if this still requires investigation.
test_small_file_workload.py - all test cases failed on both AWS and VMware LSO platforms while running on 4.14. A fix is needed!
The test passed in 4.13 on all the platforms.
VMware LSO: https://ocs4-jenkins-csb-odf-qe.apps.ocp-c1.prod.psi.redhat.com/job/qe-deploy-ocs-cluster/28837/testReport/ AWS https://ocs4-jenkins-csb-odf-qe.apps.ocp-c1.prod.psi.redhat.com/view/Performance/job/qe-trigger-aws-ipi-3az-rhcos-3m-3w-performance/101/testReport/