There is a consensus failure during one of the protocol upgrade tests. The consensus failure takes place during the test's "wait_for_upgrade" step. The relevant test is checking governance transfers during protocol upgrade.
context = {'AssetID': '8165572b7550e88f836fb2d8121427915025c165044af6bfe7145b01cb6ce4e9', 'app_state.network_parameters': {'bloc..., ...}, 'current_test_name': 'test_governance_transfers_during_protocol_upgrade', 'current_test_start_block': 124, ...}
proposer = '10A93F18C4'
@pytest.mark.gov_transfers
@pytest.mark.network_infra
def test_governance_transfers_during_protocol_upgrade(context, proposer):
"""
Transfer proposals in either a pre or post enactment state are restored after a protocol upgrade (0028-GOVE-102).
Recurring transfers proposed before an upgrade which start before, during or after an upgrade should complete on the
proposed end epoch (0028-GOVE-130)
One off delivery transfers proposed before an upgrade which are due to start during or after an upgrade should
complete either when the network is available again or at the proposed delivery date/time (0028-GOVE-131)
"""
tag = FUTURE_VERSION_TAG
# build selection of transfers, active and dormant to test survivability across upgrade
destination_accounts = rewardMod.setup_treasury_data(count=4)
_transfer_one_off_delivered = governMod.GovernanceTransfer(
transfer_type="All_Or_Nothing",
amount=0.5,
source_type="Reward",
destination_type="General",
destination=destination_accounts[0][0],
asset_id=settings.context["gov_transfer_asset_0"],
)
_transfer_one_off_delivered.propose(proposer)
_transfer_enacted_dormant = governMod.GovernanceTransfer(
transfer_type="All_Or_Nothing",
amount=0.5,
source_type="Reward",
destination_type="General",
destination=destination_accounts[0][1],
asset_id=settings.context["gov_transfer_asset_1"],
transfer_deliver_on=time.time() + 40, # expected to complete sometime after full restart
)
_transfer_enacted_dormant.propose(proposer)
_transfer_recurr_dormant = governMod.GovernanceTransfer(
transfer_type="All_Or_Nothing",
amount=0.5,
source_type="Reward",
destination_type="General",
destination=destination_accounts[0][2],
asset_id=settings.context["gov_transfer_asset_2"],
start_offset=8,
end_offset=8, # expected to complete immediately on full restart
)
_transfer_recurr_dormant.propose(proposer)
_transfer_recurr_active = governMod.GovernanceTransfer(
transfer_type="All_Or_Nothing",
amount=0.5,
source_type="Reward",
destination_type="General",
destination=destination_accounts[0][3],
asset_id=settings.context["gov_transfer_asset_3"],
start_offset=2,
end_offset=12, # expected to resume after full restart and complete
)
_transfer_recurr_active.propose(proposer)
# Ask VC to prep the directories for upgrade
network.Network.protocol_upgrade(run_config_path=DEFAULT_VISOR_RUN_CONFIG, release_tag=tag)
# Send propose transaction for all nodes for upgrade
nodes = network.Network.list_nodes(mode="validator")
upgrade_block_height = networkMod.get_api_statistics().statistics.block_height + 40
networkMod.submit_upgrade_proposal_for_nodes(nodes, upgrade_block_height, tag)
check_proposal_in_datanode(
upgrade_block_height, tag, "PROTOCOL_UPGRADE_PROPOSAL_STATUS_PENDING", [n.get_vega_pub_key() for n in nodes]
)
> networkMod.wait_for_upgrade(upgrade_block_height=upgrade_block_height, release_tag=tag)
tests/protocol_upgrade/protocol_upgrade_test.py:2224:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
decorators.py:16: in time_closure
result = func(*args, **kwargs)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
upgrade_block_height = 378, release_tag = 'v99.99.0+dev.806a8ae6.modified'
timeout = 90
nodes_to_check = [<modules.vegacapsule.network.Node object at 0x7f6f1b331f90>, <modules.vegacapsule.network.Node object at 0x7f6f1b31a6...odules.vegacapsule.network.Node object at 0x7f6f1b31af50>, <modules.vegacapsule.network.Node object at 0x7f6f1b319650>]
@timefunc
def wait_for_upgrade(upgrade_block_height, release_tag, timeout=90, nodes_to_check=None):
"""Waits until upgrade has taken places."""
if nodes_to_check is None:
nodes_to_check = Network.list_nodes(mode="validator")
def upgrade_done():
try:
for node in nodes_to_check:
stats = node.statistics()
if stats.block_height <= upgrade_block_height:
logging.info(
f"Upgrade not complete. Node {node.get_name()} reports block height {stats.block_height} <= {upgrade_block_height}"
)
return False
if stats.app_version != release_tag:
logging.info(
f"Upgrade not complete. Node {node.get_name()} reports version {stats.app_version} <= {release_tag}"
)
return False
except grpc.RpcError as e:
if e.code() == grpc.StatusCode.UNAVAILABLE:
logging.info(f"Node {node.get_name()} currently unavailable.")
return False
else:
raise
except grpc.FutureTimeoutError as e:
logging.info(f"Node {node.get_name()} currently unavailable.")
return False
Network.wait_for_data_node_ready((now + timeout) - time.time())
logging.info(f"All nodes report correct version and block height > upgrade block.")
return True
now = time.time()
while time.time() < now + timeout:
if upgrade_done():
# we have to restart the scanners because they will have become disconnected when the nodes stopped and started
conftest.restart_scanners(clean=False, reason="protocol upgrade")
return
time.sleep(0.2)
> raise AssertionError(f"Upgrade did not complete in {timeout} seconds.")
E AssertionError: Upgrade did not complete in 90 seconds.
Not a core issue. The test that is falling out of consensus after a protocol upgrade is still using the old binary version which is why its app-hash differs.
Problem encountered
There is a consensus failure during one of the protocol upgrade tests. The consensus failure takes place during the test's "wait_for_upgrade" step. The relevant test is checking governance transfers during protocol upgrade.
Observed behaviour
Consensus failure
Expected behaviour
Never get consensus failures
Steps to reproduce
Software version
806a8ae61fd34d97f8242ca417c9db134e28480b
Failing test
No response
Jenkins run
https://jenkins.vega.rocks/job/common/job/system-tests-wrapper/3966/testReport/tests.protocol_upgrade/protocol_upgrade_test/test_governance_transfers_during_protocol_upgrade/
Configuration used
No response
Relevant log output