Skip to content

Commit

Permalink
Merge pull request #662 from AntelopeIO/production_pause_vote_tests
Browse files Browse the repository at this point in the history
[1.0]Add integration tests for production-pause-vote-timeout
  • Loading branch information
linh2931 authored Aug 29, 2024
2 parents ffeef7b + d7216f6 commit 5365c29
Show file tree
Hide file tree
Showing 5 changed files with 335 additions and 6 deletions.
4 changes: 4 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/disaster_recovery.py ${CMAKE_CURRENT_
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/disaster_recovery_2.py ${CMAKE_CURRENT_BINARY_DIR}/disaster_recovery_2.py COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/disaster_recovery_2_test_shape.json ${CMAKE_CURRENT_BINARY_DIR}/disaster_recovery_2_test_shape.json COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/disaster_recovery_3.py ${CMAKE_CURRENT_BINARY_DIR}/disaster_recovery_3.py COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/production_pause_vote_timeout.py ${CMAKE_CURRENT_BINARY_DIR}/production_pause_vote_timeout.py COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/production_pause_vote_timeout_test_shape.json ${CMAKE_CURRENT_BINARY_DIR}/production_pause_vote_timeout_test_shape.json COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/production_restart.py ${CMAKE_CURRENT_BINARY_DIR}/production_restart.py COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/production_restart_test_shape.json ${CMAKE_CURRENT_BINARY_DIR}/production_restart_test_shape.json COPYONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/trx_finality_status_test.py ${CMAKE_CURRENT_BINARY_DIR}/trx_finality_status_test.py COPYONLY)
Expand Down Expand Up @@ -161,6 +163,8 @@ add_test(NAME disaster_recovery_2 COMMAND tests/disaster_recovery_2.py -v ${UNSH
set_property(TEST disaster_recovery_2 PROPERTY LABELS nonparallelizable_tests)
add_test(NAME disaster_recovery_3 COMMAND tests/disaster_recovery_3.py -v ${UNSHARE} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
set_property(TEST disaster_recovery_3 PROPERTY LABELS nonparallelizable_tests)
add_test(NAME production_pause_vote_timeout COMMAND tests/production_pause_vote_timeout.py -v ${UNSHARE} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
set_property(TEST production_pause_vote_timeout PROPERTY LABELS nonparallelizable_tests)
add_test(NAME production_restart COMMAND tests/production_restart.py -v ${UNSHARE} WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
set_property(TEST production_restart PROPERTY LABELS nonparallelizable_tests)

Expand Down
10 changes: 5 additions & 5 deletions tests/TestHarness/Cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -1029,21 +1029,21 @@ def activateInstantFinality(self, biosFinalizer=True, waitForFinalization=True,
return None, transId
return True, transId

def setFinalizers(self, nodes, node=None):
# finalizerNames specifies non-default finalizer name for each node
def setFinalizers(self, nodes, node=None, finalizerNames=None):
# finalizerNames, if present, must specify finalizer names for all the nodes
assert(finalizerNames is None or len(nodes) == len(finalizerNames))
if node is None:
node = self.biosNode
numFins = len(nodes)
threshold = int(numFins * 2 / 3 + 1)
if threshold > 2 and threshold == numFins:
# nodes are often stopped, so do not require all node votes
threshold = threshold - 1
if Utils.Debug: Utils.Print(f"threshold: {threshold}, numFins: {numFins}")
setFinStr = f'{{"finalizer_policy": {{'
setFinStr += f' "threshold": {threshold}, '
setFinStr += f' "finalizers": ['
finNum = 1
for n in nodes:
finName = n.producerName if n.producerName is not None else f"finalizer{finNum}"
finName = finalizerNames[finNum-1] if finalizerNames is not None else n.producerName if n.producerName is not None else f"finalizer{finNum}"
setFinStr += f' {{"description": "{finName}", '
setFinStr += f' "weight":1, '
setFinStr += f' "public_key": "{n.keys[0].blspubkey}", '
Expand Down
3 changes: 2 additions & 1 deletion tests/nodeos_snapshot_forked_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ def getSnapshotsCount(nodeId):
# "bridge" shape connects defprocera through defproducerb (in node0) to each other and defproducerc is alone (in node01)
# and the only connection between those 2 groups is through the bridge node
if cluster.launch(prodCount=2, topo="bridge", pnodes=totalProducerNodes,
totalNodes=totalNodes, totalProducers=totalProducers, activateIF=activateIF,
totalNodes=totalNodes, totalProducers=totalProducers,
activateIF=activateIF, biosFinalizer=False,
specificExtraNodeosArgs=specificExtraNodeosArgs,
extraNodeosArgs=extraNodeosArgs) is False:
Utils.cmdError("launcher")
Expand Down
205 changes: 205 additions & 0 deletions tests/production_pause_vote_timeout.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
#!/usr/bin/env python3
import os
import shutil
import signal
import time

from TestHarness import Cluster, TestHelper, Utils, WalletMgr, ReturnType
from TestHarness.Node import BlockType

####################################################################################
# production_pause_vote_timeout
# Test production-pause-vote-timeout works as expected.
#
# Setup:
#
# Use five nodes in an hourglass topology. The center node is a relay node that
# initially has vote-threads enabled. The other 4 peripheral nodes are:
#
# node0: Enables block production for producera and has the finalizer key with
# description of producera. Has vote-threads enabled. Connect to the center node.
# node1: Enables block production for producerb and has the finalizer key with
# description of producerb. Has vote-threads enabled. Connect to the center node
# and node0.
# producercNode: Enables block production for producerc. Has vote-threads enabled.
# Connect to the center node and finalizercNode.
# finalizercNode: Has the finalizer key with description of producerc.
# Has vote-threads enabled. Connect to the center node and producercNode.
#
# Test cases:
#
# 1. Bring down finalizercNode. producercNode should eventually
# automatically pause production due to not receiving votes from finalizercNode.
# that are associated to its producerc. However, Node0 and Node1 should not pause.
# Then bring finalizercNode. back up. producercNode should
# automatically resume production.
# 2. Bring down the center node. producercNode should eventually automatically
# pause production due to not receiving votes from Node0 and Node1 that are
# associated with the other producers. However, Node0 and Node1 should not pause.
# Then bring the center node back up. producercNode should automatically
# resume production.
# 3. Restart producercNode with "--production-pause-vote-timeout-ms 0" to
# disable production-pause-vote-timeout. Bring down finalizercNode.
# producercNode should keep producing.
#
####################################################################################

Print=Utils.Print
errorExit=Utils.errorExit

args=TestHelper.parse_args({"-d","--keep-logs","--dump-error-details","-v","--leave-running","--unshared"})
delay=args.d
debug=args.v
dumpErrorDetails=args.dump_error_details
pnodes=3 # number of producing nodes
totalNodes=pnodes + 2 # plus 1 center node and 1 finalizer node for defproducerc
prodCount=1 # number of producers per producing node

Utils.Debug=debug
testSuccessful=False

cluster=Cluster(unshared=args.unshared, keepRunning=args.leave_running, keepLogs=args.keep_logs)
walletMgr=WalletMgr(True, keepRunning=args.leave_running, keepLogs=args.keep_logs)

try:
TestHelper.printSystemInfo("BEGIN")

cluster.setWalletMgr(walletMgr)

Print(f'producing nodes: {pnodes}, delay between nodes launch: {delay} second{"s" if delay != 1 else ""}')

# for defproducerc producing node
specificExtraNodeosArgs={}
specificExtraNodeosArgs[2]="--production-pause-vote-timeout-ms 1000"

Print("Stand up cluster")
# Cannot use activateIF to transition to Savanna directly as it assumes
# each producer node has finalizer configured.
if cluster.launch(pnodes=pnodes, totalNodes=totalNodes, totalProducers=pnodes, prodCount=prodCount, delay=delay, loadSystemContract=False,
specificExtraNodeosArgs=specificExtraNodeosArgs,
activateIF=False, signatureProviderForNonProducer=True,
topo="./tests/production_pause_vote_timeout_test_shape.json") is False:
errorExit("Failed to stand up eos cluster.")

assert cluster.biosNode.getInfo(exitOnError=True)["head_block_producer"] != "eosio", "launch should have waited for production to change"

node0 = cluster.getNode(0) # producer and finalizer node for defproducera
node1 = cluster.getNode(1) # producer and finalizer node for defproducerb
producercNode = cluster.getNode(2) # producer node for defproducerc
finalizercNode = cluster.getNode(3) # finalizer node for defproducerc
centerNode = cluster.getNode(4)

Print("Set finalizer policy and start transition to Savanna")
# Specifically, need to configure finalizer name for finalizercNode as defproducerc
transId = cluster.setFinalizers(nodes=[node0, node1, finalizercNode], finalizerNames=["defproducera", "defproducerb", "defproducerc"])
assert transId is not None, "setfinalizers failed"
assert cluster.biosNode.waitForTransFinalization(transId), f"setfinalizers transaction {transId} was not rolled into a LIB block"
assert cluster.biosNode.waitForLibToAdvance(), "LIB did not advance after setFinalizers"

# biosNode no longer needed
cluster.biosNode.kill(signal.SIGTERM)
cluster.waitOnClusterSync(blockAdvancing=5)

Print("Wait for LIB on all producing nodes to advance")
assert node0.waitForLibToAdvance(), "node0 did not advance LIB"
assert node1.waitForLibToAdvance(), "node1 did not advance LIB"
assert producercNode.waitForLibToAdvance(), "producercNode did not advance LIB"

####################### test 1 ######################

Print("Shutdown finalizercNode")
finalizercNode.kill(signal.SIGTERM)
assert not finalizercNode.verifyAlive(), "finalizercNode did not shutdown"

# wait some time for producercNode paused
paused = False
for i in range(0, 15):
time.sleep(1)
# Do not use waitForHeadToAdvance() to check for pausing, as producercNode
# still receive blocks from node0 and node1 and can make head advance
paused = producercNode.processUrllibRequest("producer", "paused", returnType=ReturnType.raw)
if paused == b'true':
Print(f'paused after {i} seconds after finalizercNode was shutdown')
break;
# Verify producercNode paused
assert paused, "producercNode still producing after finalizercNode was shutdown"
# Verify node0 and node1 still producing but LIB should not advance
assert node0.processUrllibRequest("producer", "paused", returnType=ReturnType.raw) == b'false', "node0 paused after finalizercNode was shutdown"
assert node1.processUrllibRequest("producer", "paused", returnType=ReturnType.raw) == b'false', "node1 paused after finalizercNode was shutdown"
if node0.waitForLibToAdvance(timeout=5): # LIB can advance for a few blocks first
assert not node0.waitForLibToAdvance(timeout=5), "LIB should not advance on node0 after finalizercNode was shutdown"
if node1.waitForLibToAdvance(timeout=5):
assert not node1.waitForLibToAdvance(timeout=5), "LIB should not advance on node1 after finalizercNode was shutdown"

Print("Restart finalizercNode")
finalizercNode.relaunch()

Print("Verify production unpaused and LIB advances after restart of finalizercNode")
assert node0.waitForLibToAdvance(), "node0 did not advance LIB"
assert node1.waitForLibToAdvance(), "node1 did not advance LIB"
assert producercNode.waitForLibToAdvance(), "producercNode did not advance LIB"
assert producercNode.processUrllibRequest("producer", "paused", returnType=ReturnType.raw) == b'false', "producercNode should have resumed production after finalizercNode restarted"

####################### test 2 ######################

Print("Shutdown centerNode")
centerNode.kill(signal.SIGTERM)
assert not centerNode.verifyAlive(), "centerNode did not shutdown"

# wait some time for producercNode paused
paused = False
for i in range(0, 15):
time.sleep(1)
paused = producercNode.processUrllibRequest("producer", "paused", returnType=ReturnType.raw)
if paused == b'true':
Print(f'paused after {i} seconds after centerNode was shutdown')
break;
# Verify producercNode paused
assert paused, "producercNode still producing after centerNode was shutdown"
# Verify node0 and node1 still producing but LIB should not advance
assert node0.processUrllibRequest("producer", "paused", returnType=ReturnType.raw) == b'false', "node0 paused after centerNode was shutdown"
assert node1.processUrllibRequest("producer", "paused", returnType=ReturnType.raw) == b'false', "node1 paused after centerNode was shutdown"
if node0.waitForLibToAdvance(timeout=5): # LIB can advance for a few blocks first
assert not node0.waitForLibToAdvance(timeout=5), "LIB should not advance on node0 after centerNode was shutdown"
if node1.waitForLibToAdvance(timeout=5):
assert not node1.waitForLibToAdvance(timeout=5), "LIB should not advance on node1 after centerNode was shutdown"

Print("Restart centerNode")
centerNode.relaunch()

Print("Verify production unpaused and LIB advances after restart of centerNode")
assert node0.waitForLibToAdvance(), "node0 did not advance LIB"
assert node1.waitForLibToAdvance(), "node1 did not advance LIB"
assert producercNode.waitForLibToAdvance(), "producercNode did not advance LIB"
assert producercNode.processUrllibRequest("producer", "paused", returnType=ReturnType.raw) == b'false', "producercNode should have resumed production after centerNode restarted"

####################### test 3 ######################

Print("Shutdown producercNode")
producercNode.kill(signal.SIGTERM)
assert not producercNode.verifyAlive(), "producercNode did not shutdown"

# disable production-pause-vote-timeout
Print("Relaunch producercNode with --production-pause-vote-timeout-ms 0")
addSwapFlags={"--production-pause-vote-timeout-ms": "0"}
producercNode.relaunch(chainArg="--enable-stale-production", addSwapFlags=addSwapFlags)

Print("Shutdown finalizercNode")
finalizercNode.kill(signal.SIGTERM)
assert not finalizercNode.verifyAlive(), "finalizercNode did not shutdown"

# Verify producercNode still producing
assert producercNode.processUrllibRequest("producer", "paused", returnType=ReturnType.raw) == b'false', "producercNode (--production-pause-vote-timeout-ms 0) paused after finalizercNode was shutdown"
# Check again after at least 1 round (6 seconds)
time.sleep(7)
assert producercNode.processUrllibRequest("producer", "paused", returnType=ReturnType.raw) == b'false', "producercNode (--production-pause-vote-timeout-ms 0) paused after finalizercNode was shutdown"
# Verify node0 and node1 still producing
assert node0.waitForHeadToAdvance(), "node0 paused after finalizercNode was shutdown"
assert node1.waitForHeadToAdvance(), "node1 paused after finalizercNode was shutdown"

testSuccessful=True
finally:
TestHelper.shutdown(cluster, walletMgr, testSuccessful=testSuccessful, dumpErrorDetails=dumpErrorDetails)

exitCode = 0 if testSuccessful else 1
exit(exitCode)
119 changes: 119 additions & 0 deletions tests/production_pause_vote_timeout_test_shape.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
{
"name": "testnet_",
"ssh_helper": {
"ssh_cmd": "/usr/bin/ssh",
"scp_cmd": "/usr/bin/scp",
"ssh_identity": "",
"ssh_args": ""
},
"nodes": {
"bios":{
"name": "bios",
"keys": [
{
"privkey":"5KQwrPbwdL6PhXujxW37FSSQZ1JiwsST4cqQzDeyXtP79zkvFD3",
"pubkey":"EOS6MRyAjQq8ud7hVNYcfnVPJqcVpscN5So8BhtHuGYqET5GDW5CV"
}
],
"peers": [],
"producers": [
"eosio"
],
"dont_start": false
},
"testnet_00":{
"name": "testnet_00",
"keys": [
{
"privkey":"5Jf4sTk7vwX1MYpLJ2eQFanVvKYXFqGBrCyANPukuP2BJ5WAAKZ",
"pubkey":"EOS58B33q9S7oNkgeFfcoW3VJYu4obfDiqn5RHGE2ige6jVjUhymR",
"blspubkey":"PUB_BLS_rYRa_-bT7uLOSAfPIBy6NlXFB0YxwROeSuqHzw6s-1cuK_-GJUKqp20ktyAnsO4ZuHdx3BEPDaLronpnL22MXKWM7bvZnkCfbGCD6OzizQqxXkM9N5z5R-OUA4Ime6cF5YTSFg",
"blsprivkey":"PVT_BLS_GQjR0E8Hu8KrsTCvLKnlOCIwQijAj2-5KDizQwF-bAY6pise",
"blspop":"SIG_BLS_syFMuifUnX2zQQKr0cuHYzQQjsuPrNG75_z6y8fOyYg_twqMICZ0kT7ObbwIOUsLfXx9PVb4-QLEgUYGSRg1NSfeHGjIGkhea82wa3ayfI8elUEU1MStKbeKpys7xUAQz1PEgwcz5dClq3HyLQmMAjpoL74N_Znf0KiNEVZMte-DLF7x_6sAfp_834LthyYHjZYTmdG7belyzlYHKJb6upnZy9nR_zoKpx9jeTd3tzVhoTCuAN6aFw68D_ItY5cWiY2dhA"
}
],
"peers": [
"bios",
"testnet_04"
],
"producers": [
"defproducera"
],
"dont_start": false
},
"testnet_01":{
"name": "testnet_01",
"keys": [
{
"pubkey": "EOS8XH2gKxsef9zxmMHm4vaSvxQUhg7W4GC3nK2KSRxyYrNG5gZFS",
"privkey": "5JcoRRhDcgm51dkBrRTmErceTqrYhrq22UnmUjTZToMpH91B9N1",
"blspubkey":"PUB_BLS_Wf_O_QeyVhekDXS5q3qBxTyj_qxSrX_uiCY4z8ClpW0X2jrAVgAVHOQ9IR2H40QTWveD8QIGhhSbmSFPa0zFbs5k3yfnjfuuwpA7T1O13_LSdtxT19ehYiE4chZX6SUMJ09JFA",
"blsprivkey":"PVT_BLS_1ZLWim0k80ssXswSZp1T3ydHO9U3gLnKKlEBIDy8927XDLLj",
"blspop":"SIG_BLS_EL09aI3w-qCgarLM2Z5-T6sisSHBN0J4vMZxtGQklkOcAxgnCaPPXe0roxY4W0gVe2y6T01YrklmT_qZu2tAwqiNrVJcScY8QKvRSeczGBBab1MgnHvaAOuf6bA4JPAELIu2iPWfsS6-oLyLbNP5xtZpMXPHu3yaSJssXNOb5rcVs1KXaIUEagJeAlBBQEcKmFWfeAsJ_R8JDw4i9gSNmROzUjm6LVBpvB7vrnPDPFRA0BQ19H4FED6PtuFPShwJGVz4dg"
}
],
"peers": [
"bios",
"testnet_00",
"testnet_04"
],
"producers": [
"defproducerb"
],
"dont_start": false
},
"testnet_02":{
"name": "testnet_02",
"keys": [
{
"pubkey": "EOS6Tkpf8kcDfa32WA9B4nTcEJ64ZdDMSNioDcaL6rzdMwnpzaWJB",
"privkey": "5KkQbdxFHr8Pg1N3DEMDdU7emFgUTwQvh99FDJrodFhUbbsAtQT"
}
],
"peers": [
"bios",
"testnet_03",
"testnet_04"
],
"producers": [
"defproducerc"
],
"dont_start": false
},
"testnet_03":{
"name": "testnet_03",
"keys": [
{
"pubkey": "EOS52ntDHqA2qj4xVo7KmxdezMRhvvBqpZBuKYJCsgihisxmywpAx",
"privkey": "5JxTJJegQBpEL1p77TzkN1ompMB9gDwAfjM9chPzFCB4chxmwrE",
"blspubkey":"PUB_BLS_C-FprIiry6X-8dlLYH7xUAhIuKXBQv56zJPgtcdmKeHf8AAy750eRrOYBtKG0-QEIN5l_yl9dTLvAYmOios6Q5t3ybWBUVVQ2WWcbZLVxzwBftLwYvo1zPXH7LHEE_sAgP1i7g",
"blsprivkey":"PVT_BLS_ubElmjajfsYP_9HRSpmV-Fi_IPWKTyJS4XFSWrU8ezMZ_mL_",
"blspop":"SIG_BLS_k3wrhVl2GUG_lGsPr9io-zoamPw7eiaxMDExk-yOqcpXtu0zALHoUWJRh0WOerAS1-_RQNhbi4q-BWO9IbiNWRKP9CYIhNIL6ochGHHy4aBmZ-IzEjfBrDt7inDtFTYY0Gl372e5OqPXAwi6J3GeHipXuzAiw7SV8XdWFefthxId4meKX6vw5_RWx4XQ4ScRYoCG7UQtIZkQPEsu1SfJGL6z-cfTTSq-naKbzp0QQYfqtQkFfmL7qQUH1iohnb0HbTbRbQ"
}
],
"peers": [
"bios",
"testnet_02",
"testnet_04"
],
"producers": [
],
"dont_start": false
},
"testnet_04":{
"name": "testnet_04",
"keys": [
{
"pubkey":"EOS7K5pQCk22ojetRdyumrqp6nJX6eiQiTWWcGkZAMGhoBxgcsxhK",
"privkey":"5K3h9XiAmrx9EuqD8CRxHgQwEVDaWpqrhrnpdvwHtVzwJFMhNmE"
}
],
"peers": [
"bios"
],
"producers": [
],
"dont_start": false
}
}
}

0 comments on commit 5365c29

Please sign in to comment.