Open Xichen96 opened 3 years ago
/data/sonic-mgmt-int/tests$ ./run_tests.sh -c platform_tests/test_reboot.py::test_soft_reboot -n xxxxxx -i ../ansible/str,../ansible/veos -f ../ansible/testbed.csv -e "--disable_loganalyzer" -u === Running tests in groups === /usr/local/lib/python2.7/dist-packages/ansible/parsing/vault/init.py:44: CryptographyDeprecationWarning: Python 2 is no longer supported by the Python core team. Support for it is now deprecated in cryptography, and will be removed in a future release. from cryptography.exceptions import InvalidSignature ======================================================== test session starts ======================================================== platform linux2 -- Python 2.7.17, pytest-4.6.5, py-1.9.0, pluggy-0.13.1 ansible: 2.8.12 rootdir: /data/sonic-mgmt-int/tests, inifile: pytest.ini plugins: forked-1.3.0, xdist-1.28.0, html-1.22.1, metadata-1.10.0, repeat-0.9.1, ansible-2.2.2 collected 1 item
platform_tests/test_reboot.py::test_soft_reboot[xxxxxx]
----------------------------------------------------------- live log call -----------------------------------------------------------
06:30:13 init.pytest_runtest_call L0039 ERROR | Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/_pytest/python.py", line 1464, in runtest
self.ihook.pytest_pyfunc_call(pyfuncitem=self)
File "/usr/local/lib/python2.7/dist-packages/pluggy/hooks.py", line 286, in call
return self._hookexec(self, self.get_hookimpls(), kwargs)
File "/usr/local/lib/python2.7/dist-packages/pluggy/manager.py", line 93, in _hookexec
return self._inner_hookexec(hook, methods, kwargs)
File "/usr/local/lib/python2.7/dist-packages/pluggy/manager.py", line 87, in
platform_tests/test_reboot.py::test_soft_reboot[xxxxxx] ERROR [100%]
============================================================== ERRORS =============================================================== __ ERROR at teardown of test_soft_reboot[xxxxxx] ___
duthosts = <tests.common.devices.duthosts.DutHosts object at 0x7f99ebe10450>, enum_rand_one_per_hwsku_hostname = 'xxxxxx' conn_graph_facts = {'device_conn': {'xxxxxx': {'Ethernet0': {'peerdevice': u'xxxxx', 'peerport': u'Ethernet0', 'speed'...ard', 'HwSku': u'xxxxxxx', 'ManagementGw': u'xxxxxxx', 'ManagementIp': uxxxxxxxx', ...}}, ...} xcvr_skip_list = {'xxxxxx': []}
@pytest.fixture(scope="module", autouse=True)
def teardown_module(duthosts, enum_rand_one_per_hwsku_hostname, conn_graph_facts, xcvr_skip_list):
duthost = duthosts[enum_rand_one_per_hwsku_hostname]
yield
logging.info("Tearing down: to make sure all the critical services, interfaces and transceivers are good")
interfaces = conn_graph_facts["device_conn"][duthost.hostname]
check_critical_processes(duthost, watch_secs=10)
conn_graph_facts = {'device_conn': {'xxxxxx': {'Ethernet0': {'peerdevice': u'sx', 'peerport': u'Ethernet0', 'speed'...ard', 'HwSku': u'x', 'ManagementGw': u'x', 'ManagementIp': u'x', ...}}, ...}
duthost =
platform_tests/test_reboot.py:43:
dut =
def check_critical_processes(dut, watch_secs=0):
"""
@summary: check all critical processes. They should be all running.
keep on checking every 5 seconds until watch_secs drops below 0.
@param dut: The AnsibleHost object of DUT. For interacting with DUT.
@param watch_secs: all processes should remain healthy for watch_secs seconds.
"""
logging.info("Check all critical processes are healthy for {} seconds".format(watch_secs))
while watch_secs >= 0:
status, details = get_critical_processes_status(dut)
pytest_assert(status, "Not all critical processes are healthy: {}".format(details))
E Failed: Not all critical processes are healthy: {'lldp': {'status': False, 'exited_critical_process': [], 'running_critical_process': []}, 'pmon': {'status': False, 'exited_critical_process': [], 'running_critical_process': []}, 'database': {'status': False, 'exited_critical_process': [], 'running_critical_process': []}, 'snmp': {'status': False, 'exited_critical_process': [], 'running_critical_process': []}, 'bgp': {'status': False, 'exited_critical_process': [], 'running_critical_process': []}, 'teamd': {'status': False, 'exited_critical_process': [], 'running_critical_process': []}, 'syncd': {'status': False, 'exited_critical_process': [], 'running_critical_process': []}, 'swss': {'status': False, 'exited_critical_process': [], 'running_critical_process': []}}
details = {'bgp': {'exited_critical_process': [], 'running_critical_process': [], 'status': False}, 'database': {'exited_critica...': [], 'status': False}, 'pmon': {'exited_critical_process': [], 'running_critical_process': [], 'status': False}, ...}
dut =
common/platform/processesutils.py:36: Failed ============================================================= FAILURES ============================================================== ____ test_softreboot[xxxxxx] ____
duthosts = <tests.common.devices.duthosts.DutHosts object at 0x7f99ebe10450>, enum_rand_one_per_hwsku_hostname = 'xxxxxx' localhost = <tests.common.devices.local.Localhost object at 0x7f99ea197710> conn_graph_facts = {'device_conn': {'xxxxxx': {'Ethernet0': {'peerdevice': u'xxxxxx', 'peerport': u'Ethernet0', 'speed'...ard', 'HwSku': u'Celestica-E1031-T48S4', 'ManagementGw': u'xxxxxx', 'ManagementIp': u'xxxxxx80/23', ...}}, ...} xcvr_skip_list = {'xxxxxx': []}
def test_soft_reboot(duthosts, enum_rand_one_per_hwsku_hostname, localhost, conn_graph_facts, xcvr_skip_list):
"""
@summary: This test case is to perform soft reboot and check platform status
"""
duthost = duthosts[enum_rand_one_per_hwsku_hostname]
soft_reboot_supported = duthost.command('which soft-reboot', module_ignore_errors=True)["stdout"]
if "" == soft_reboot_supported:
pytest.skip("Soft-reboot is not supported on this DUT, skip this test case")
if duthost.is_multi_asic:
pytest.skip("Multi-ASIC devices not supporting soft reboot")
reboot_and_check(localhost, duthost, conn_graph_facts["device_conn"][duthost.hostname], xcvr_skip_list, reboot_type=REBOOT_TYPE_SOFT)
conn_graph_facts = {'device_conn': {'xxxxxx': {'Ethernet0': {'peerdevice': u'xxxxxx', 'peerport': u'Ethernet0', 'speed'...ard', 'HwSku': u'Celestica-E1031-T48S4', 'ManagementGw': u'xxxxxx', 'ManagementIp': u'xxxxxx80/23', ...}}, ...}
duthost =
platform_tests/test_reboot.py:138:
platform_tests/test_reboot.py:60: in reboot_and_check reboot(dut, localhost, reboot_type=reboot_type, reboot_helper=reboot_helper, reboot_kwargs=reboot_kwargs)
duthost =
def reboot(duthost, localhost, reboot_type='cold', delay=10, \
timeout=0, wait=0, wait_for_ssh=True, reboot_helper=None, reboot_kwargs=None):
"""
reboots DUT
:param duthost: DUT host object
:param localhost: local host object
:param reboot_type: reboot type (cold, fast, warm)
:param delay: delay between ssh availability checks
:param timeout: timeout for waiting ssh port state change
:param wait: time to wait for DUT to initialize
:param reboot_helper: helper function to execute the power toggling
:param reboot_kwargs: arguments to pass to the reboot_helper
:return:
"""
# pool for executing tasks asynchronously
pool = ThreadPool()
dut_ip = duthost.mgmt_ip
hostname = duthost.hostname
try:
reboot_ctrl = reboot_ctrl_dict[reboot_type]
reboot_command = reboot_ctrl['command'] if reboot_type != REBOOT_TYPE_POWEROFF else None
if timeout == 0:
timeout = reboot_ctrl['timeout']
if wait == 0:
wait = reboot_ctrl['wait']
except KeyError:
raise ValueError('invalid reboot type: "{} for {}"'.format(reboot_type, hostname))
def execute_reboot_command():
logger.info('rebooting {} with command "{}"'.format(hostname, reboot_command))
return duthost.command(reboot_command)
def execute_reboot_helper():
logger.info('rebooting {} with helper "{}"'.format(hostname, reboot_helper))
return reboot_helper(reboot_kwargs)
dut_datetime = duthost.get_now_time()
DUT_ACTIVE.clear()
if reboot_type != REBOOT_TYPE_POWEROFF:
reboot_res = pool.apply_async(execute_reboot_command)
else:
assert reboot_helper is not None, "A reboot function must be provided for power off reboot"
reboot_res = pool.apply_async(execute_reboot_helper)
logger.info('waiting for ssh to drop on {}'.format(hostname))
res = localhost.wait_for(host=dut_ip,
port=SONIC_SSH_PORT,
state='absent',
search_regex=SONIC_SSH_REGEX,
delay=delay,
timeout=timeout,
module_ignore_errors=True)
if res.is_failed or ('msg' in res and 'Timeout' in res['msg']):
if reboot_res.ready():
logger.error('reboot result: {} on {}'.format(reboot_res.get(), hostname))
raise Exception('DUT {} did not shutdown'.format(hostname))
if not wait_for_ssh:
return
# TODO: add serial output during reboot for better debuggability
# This feature requires serial information to be present in
# testbed information
logger.info('waiting for ssh to startup on {}'.format(hostname))
res = localhost.wait_for(host=dut_ip,
port=SONIC_SSH_PORT,
state='started',
search_regex=SONIC_SSH_REGEX,
delay=delay,
timeout=timeout,
module_ignore_errors=True)
if res.is_failed or ('msg' in res and 'Timeout' in res['msg']):
raise Exception('DUT {} did not startup'.format(hostname))
E Exception: DUT xxxxxx did not startup
delay = 10
dut_datetime = datetime.datetime(2021, 9, 8, 6, 24, 49)
dut_ip = u'xxxxxx80'
duthost =
common/reboot.py:162: Exception ------------------------------------ generated xml file: /data/sonic-mgmt-int/tests/logs/tr.xml ------------------------------------- ====================================================== short test summary info ====================================================== ERROR platform_tests/test_reboot.py::test_soft_reboot[xxxxxx] - Failed: Not all critical processes are healthy: {'lldp': ... FAILED platform_tests/test_reboot.py::test_soft_reboot[xxxxxx] - Exception: DUT xxxxxx did not startup =============================================== 1 failed, 1 error in 1811.18 seconds ================================================ INFO:root:Can not get Allure report URL. Please check logs xichenlin@211b3745deba:/data/sonic-mgmt-int/tests$ ./run_tests.sh -c platform_tests/test_reboot.py::test_soft_reboot -n xxxxxx -i ../ansible/str,../ansible/veos -f ../ansible/testbed.csv -e "--disable_loganalyzer" -u === Running tests in groups === /usr/local/lib/python2.7/dist-packages/ansible/parsing/vault/init.py:44: CryptographyDeprecationWarning: Python 2 is no longer supported by the Python core team. Support for it is now deprecated in cryptography, and will be removed in a future release. from cryptography.exceptions import InvalidSignature ======================================================== test session starts ======================================================== platform linux2 -- Python 2.7.17, pytest-4.6.5, py-1.9.0, pluggy-0.13.1 ansible: 2.8.12 rootdir: /data/sonic-mgmt-int/tests, inifile: pytest.ini plugins: forked-1.3.0, xdist-1.28.0, html-1.22.1, metadata-1.10.0, repeat-0.9.1, ansible-2.2.2 collected 1 item
platform_tests/test_reboot.py::test_soft_reboot[xxxxxx]
----------------------------------------------------------- live log call -----------------------------------------------------------
07:20:24 init.pytest_runtest_call L0039 ERROR | Traceback (most recent call last):
File "/usr/local/lib/python2.7/dist-packages/_pytest/python.py", line 1464, in runtest
self.ihook.pytest_pyfunc_call(pyfuncitem=self)
File "/usr/local/lib/python2.7/dist-packages/pluggy/hooks.py", line 286, in call
return self._hookexec(self, self.get_hookimpls(), kwargs)
File "/usr/local/lib/python2.7/dist-packages/pluggy/manager.py", line 93, in _hookexec
return self._inner_hookexec(hook, methods, kwargs)
File "/usr/local/lib/python2.7/dist-packages/pluggy/manager.py", line 87, in
platform_tests/test_reboot.py::test_soft_reboot[xxxxxx] ERROR [100%]
============================================================== ERRORS =============================================================== __ ERROR at teardown of test_soft_reboot[xxxxxx] ___
duthosts = <tests.common.devices.duthosts.DutHosts object at 0x7fed973623d0>, enum_rand_one_per_hwsku_hostname = 'xxxxxx' conn_graph_facts = {'device_conn': {'xxxxxx': {'Ethernet0': {'peerdevice': u'xxxxxx', 'peerport': u'Ethernet0', 'speed'...ard', 'HwSku': u'Celestica-E1031-T48S4', 'ManagementGw': u'xxxxxx', 'ManagementIp': u'xxxxxx80/23', ...}}, ...} xcvr_skip_list = {'xxxxxx': []}
@pytest.fixture(scope="module", autouse=True)
def teardown_module(duthosts, enum_rand_one_per_hwsku_hostname, conn_graph_facts, xcvr_skip_list):
duthost = duthosts[enum_rand_one_per_hwsku_hostname]
yield
logging.info("Tearing down: to make sure all the critical services, interfaces and transceivers are good")
interfaces = conn_graph_facts["device_conn"][duthost.hostname]
check_critical_processes(duthost, watch_secs=10)
conn_graph_facts = {'device_conn': {'xxxxxx': {'Ethernet0': {'peerdevice': u'xxxxxx', 'peerport': u'Ethernet0', 'speed'...ard', 'HwSku': u'Celestica-E1031-T48S4', 'ManagementGw': u'xxxxxx', 'ManagementIp': u'xxxxxx80/23', ...}}, ...}
duthost =
platform_tests/test_reboot.py:43:
dut =
def check_critical_processes(dut, watch_secs=0):
"""
@summary: check all critical processes. They should be all running.
keep on checking every 5 seconds until watch_secs drops below 0.
@param dut: The AnsibleHost object of DUT. For interacting with DUT.
@param watch_secs: all processes should remain healthy for watch_secs seconds.
"""
logging.info("Check all critical processes are healthy for {} seconds".format(watch_secs))
while watch_secs >= 0:
status, details = get_critical_processes_status(dut)
pytest_assert(status, "Not all critical processes are healthy: {}".format(details))
E Failed: Not all critical processes are healthy: {'lldp': {'status': False, 'exited_critical_process': [], 'running_critical_process': []}, 'pmon': {'status': False, 'exited_critical_process': [], 'running_critical_process': []}, 'database': {'status': False, 'exited_critical_process': [], 'running_critical_process': []}, 'snmp': {'status': False, 'exited_critical_process': [], 'running_critical_process': []}, 'bgp': {'status': False, 'exited_critical_process': [], 'running_critical_process': []}, 'teamd': {'status': False, 'exited_critical_process': [], 'running_critical_process': []}, 'syncd': {'status': False, 'exited_critical_process': [], 'running_critical_process': []}, 'swss': {'status': False, 'exited_critical_process': [], 'running_critical_process': []}}
details = {'bgp': {'exited_critical_process': [], 'running_critical_process': [], 'status': False}, 'database': {'exited_critica...': [], 'status': False}, 'pmon': {'exited_critical_process': [], 'running_critical_process': [], 'status': False}, ...}
dut =
common/platform/processesutils.py:36: Failed ============================================================= FAILURES ============================================================== ____ test_softreboot[xxxxxx] ____
duthosts = <tests.common.devices.duthosts.DutHosts object at 0x7fed973623d0>, enum_rand_one_per_hwsku_hostname = 'xxxxxx' localhost = <tests.common.devices.local.Localhost object at 0x7fed95462050> conn_graph_facts = {'device_conn': {'xxxxxx': {'Ethernet0': {'peerdevice': u'xxxxxx', 'peerport': u'Ethernet0', 'speed'...ard', 'HwSku': u'Celestica-E1031-T48S4', 'ManagementGw': u'xxxxxx', 'ManagementIp': u'xxxxxx80/23', ...}}, ...} xcvr_skip_list = {'xxxxxx': []}
def test_soft_reboot(duthosts, enum_rand_one_per_hwsku_hostname, localhost, conn_graph_facts, xcvr_skip_list):
"""
@summary: This test case is to perform soft reboot and check platform status
"""
duthost = duthosts[enum_rand_one_per_hwsku_hostname]
soft_reboot_supported = duthost.command('which soft-reboot', module_ignore_errors=True)["stdout"]
if "" == soft_reboot_supported:
pytest.skip("Soft-reboot is not supported on this DUT, skip this test case")
if duthost.is_multi_asic:
pytest.skip("Multi-ASIC devices not supporting soft reboot")
reboot_and_check(localhost, duthost, conn_graph_facts["device_conn"][duthost.hostname], xcvr_skip_list, reboot_type=REBOOT_TYPE_SOFT)
conn_graph_facts = {'device_conn': {'xxxxxx': {'Ethernet0': {'peerdevice': u'xxxxxx', 'peerport': u'Ethernet0', 'speed'...ard', 'HwSku': u'Celestica-E1031-T48S4', 'ManagementGw': u'xxxxxx', 'ManagementIp': u'xxxxxx80/23', ...}}, ...}
duthost =
platform_tests/test_reboot.py:138:
platform_tests/test_reboot.py:60: in reboot_and_check reboot(dut, localhost, reboot_type=reboot_type, reboot_helper=reboot_helper, reboot_kwargs=reboot_kwargs)
duthost =
def reboot(duthost, localhost, reboot_type='cold', delay=10, \
timeout=0, wait=0, wait_for_ssh=True, reboot_helper=None, reboot_kwargs=None):
"""
reboots DUT
:param duthost: DUT host object
:param localhost: local host object
:param reboot_type: reboot type (cold, fast, warm)
:param delay: delay between ssh availability checks
:param timeout: timeout for waiting ssh port state change
:param wait: time to wait for DUT to initialize
:param reboot_helper: helper function to execute the power toggling
:param reboot_kwargs: arguments to pass to the reboot_helper
:return:
"""
# pool for executing tasks asynchronously
pool = ThreadPool()
dut_ip = duthost.mgmt_ip
hostname = duthost.hostname
try:
reboot_ctrl = reboot_ctrl_dict[reboot_type]
reboot_command = reboot_ctrl['command'] if reboot_type != REBOOT_TYPE_POWEROFF else None
if timeout == 0:
timeout = reboot_ctrl['timeout']
if wait == 0:
wait = reboot_ctrl['wait']
except KeyError:
raise ValueError('invalid reboot type: "{} for {}"'.format(reboot_type, hostname))
def execute_reboot_command():
logger.info('rebooting {} with command "{}"'.format(hostname, reboot_command))
return duthost.command(reboot_command)
def execute_reboot_helper():
logger.info('rebooting {} with helper "{}"'.format(hostname, reboot_helper))
return reboot_helper(reboot_kwargs)
dut_datetime = duthost.get_now_time()
DUT_ACTIVE.clear()
if reboot_type != REBOOT_TYPE_POWEROFF:
reboot_res = pool.apply_async(execute_reboot_command)
else:
assert reboot_helper is not None, "A reboot function must be provided for power off reboot"
reboot_res = pool.apply_async(execute_reboot_helper)
logger.info('waiting for ssh to drop on {}'.format(hostname))
res = localhost.wait_for(host=dut_ip,
port=SONIC_SSH_PORT,
state='absent',
search_regex=SONIC_SSH_REGEX,
delay=delay,
timeout=timeout,
module_ignore_errors=True)
if res.is_failed or ('msg' in res and 'Timeout' in res['msg']):
if reboot_res.ready():
logger.error('reboot result: {} on {}'.format(reboot_res.get(), hostname))
raise Exception('DUT {} did not shutdown'.format(hostname))
if not wait_for_ssh:
return
# TODO: add serial output during reboot for better debuggability
# This feature requires serial information to be present in
# testbed information
logger.info('waiting for ssh to startup on {}'.format(hostname))
res = localhost.wait_for(host=dut_ip,
port=SONIC_SSH_PORT,
state='started',
search_regex=SONIC_SSH_REGEX,
delay=delay,
timeout=timeout,
module_ignore_errors=True)
if res.is_failed or ('msg' in res and 'Timeout' in res['msg']):
raise Exception('DUT {} did not startup'.format(hostname))
E Exception: DUT xxxxxx did not startup
delay = 10
dut_datetime = datetime.datetime(2021, 9, 8, 7, 14, 57)
dut_ip = u'xxxxxx80'
duthost =
common/reboot.py:162: Exception ------------------------------------ generated xml file: /data/sonic-mgmt-int/tests/logs/tr.xml ------------------------------------- ====================================================== short test summary info ====================================================== ERROR platform_tests/test_reboot.py::test_soft_reboot[xxxxxx] - Failed: Not all critical processes are healthy: {'lldp': ... FAILED platform_tests/test_reboot.py::test_soft_reboot[xxxxxx] - Exception: DUT xxxxxx did not startup =============================================== 1 failed, 1 error in 1890.96 seconds ================================================ INFO:root:Can not get Allure report URL. Please check logs
Description
soft-reboot does not actually boot up. Console show the switch keeps rebooting.
Steps to reproduce the issue:
Describe the results you received:
Describe the results you expected:
Output of
show version
:Output of
show techsupport
:Additional information you deem important (e.g. issue happens only occasionally):