Skip to content

Commit

Permalink
Merge bitcoin#29006: test: fix v2 transport intermittent test failure (
Browse files Browse the repository at this point in the history
…bitcoin#29002)

00e0658 test: fix v2 transport intermittent test failure (bitcoin#29002) (Sebastian Falbesoner)

Pull request description:

  This PR improves the following fragile construct for detection of a new connection to the node under test in `p2p_v2_transport.py`:
  https://github.com/bitcoin/bitcoin/blob/6d5790956f45e3de5c6c4ee6fda21878b0d1287b/test/functional/p2p_v2_transport.py#L154-L156
  Only relying on the number of peers for that suffers from race conditions, as unrelated previous peers could disconnect at anytime in-between. In the test run in bitcoin#29002, the following happens:

  - `getpeerinfo()` is called the first time -> assigned to `num_peers`
  - **previous peer disconnects**, the node's peer count is now `num_peers - 1` (in most test runs, this happens before the first getpeerinfo call)
  - new peer connects, the node's peer count is now `num_peers`
  - the condition that the node's peer count is `num_peers + 1` is never true, test fails

  Use the more robust approach of watching for an increased highest peer id instead (again using the `getpeerinfo` RPC call), with a newly introduced context manager method `TestNode.wait_for_new_peer()`. Note that for the opposite case of a disconnect, no new method is introduced; this is currently used only once in the test and is also simpler.

  Still happy to take suggestions for alternative solutions.

  Fixes bitcoin#29002.

ACKs for top commit:
  kevkevinpal:
    Concept ACK [00e0658](bitcoin@00e0658)
  maflcko:
    Ok, lgtm ACK 00e0658
  stratospher:
    ACK 00e0658.

Tree-SHA512: 0118b87f54ea5e6e080ff44f29d6af6674c757a588534b3add040da435f4359e71bf85bc0a5eb7170f99cc9956e1a03c35cce653d642d31eed41bbed1f94f44f
  • Loading branch information
fanquake committed Dec 8, 2023
2 parents 1f352cf + 00e0658 commit 03042fb
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 8 deletions.
16 changes: 8 additions & 8 deletions test/functional/p2p_v2_transport.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,8 @@ def run_test(self):
V1_PREFIX = MAGIC_BYTES["regtest"] + b"version\x00\x00\x00\x00\x00"
assert_equal(len(V1_PREFIX), 16)
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
num_peers = len(self.nodes[0].getpeerinfo())
s.connect(("127.0.0.1", p2p_port(0)))
self.wait_until(lambda: len(self.nodes[0].getpeerinfo()) == num_peers + 1)
with self.nodes[0].wait_for_new_peer():
s.connect(("127.0.0.1", p2p_port(0)))
s.sendall(V1_PREFIX[:-1])
assert_equal(self.nodes[0].getpeerinfo()[-1]["transport_protocol_type"], "detecting")
s.sendall(bytes([V1_PREFIX[-1]])) # send out last prefix byte
Expand All @@ -144,22 +143,23 @@ def run_test(self):
# Check wrong network prefix detection (hits if the next 12 bytes correspond to a v1 version message)
wrong_network_magic_prefix = MAGIC_BYTES["signet"] + V1_PREFIX[4:]
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.connect(("127.0.0.1", p2p_port(0)))
with self.nodes[0].wait_for_new_peer():
s.connect(("127.0.0.1", p2p_port(0)))
with self.nodes[0].assert_debug_log(["V2 transport error: V1 peer with wrong MessageStart"]):
s.sendall(wrong_network_magic_prefix + b"somepayload")

# Check detection of missing garbage terminator (hits after fixed amount of data if terminator never matches garbage)
MAX_KEY_GARB_AND_GARBTERM_LEN = 64 + 4095 + 16
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
num_peers = len(self.nodes[0].getpeerinfo())
s.connect(("127.0.0.1", p2p_port(0)))
self.wait_until(lambda: len(self.nodes[0].getpeerinfo()) == num_peers + 1)
with self.nodes[0].wait_for_new_peer():
s.connect(("127.0.0.1", p2p_port(0)))
s.sendall(b'\x00' * (MAX_KEY_GARB_AND_GARBTERM_LEN - 1))
self.wait_until(lambda: self.nodes[0].getpeerinfo()[-1]["bytesrecv"] == MAX_KEY_GARB_AND_GARBTERM_LEN - 1)
with self.nodes[0].assert_debug_log(["V2 transport error: missing garbage terminator"]):
peer_id = self.nodes[0].getpeerinfo()[-1]["id"]
s.sendall(b'\x00') # send out last byte
# should disconnect immediately
self.wait_until(lambda: len(self.nodes[0].getpeerinfo()) == num_peers)
self.wait_until(lambda: not peer_id in [p["id"] for p in self.nodes[0].getpeerinfo()])


if __name__ == '__main__':
Expand Down
18 changes: 18 additions & 0 deletions test/functional/test_framework/test_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,24 @@ def wait_for_debug_log(self, expected_msgs, timeout=60):
'Expected messages "{}" does not partially match log:\n\n{}\n\n'.format(
str(expected_msgs), print_log))

@contextlib.contextmanager
def wait_for_new_peer(self, timeout=5):
"""
Wait until the node is connected to at least one new peer. We detect this
by watching for an increased highest peer id, using the `getpeerinfo` RPC call.
Note that the simpler approach of only accounting for the number of peers
suffers from race conditions, as disconnects from unrelated previous peers
could happen anytime in-between.
"""
def get_highest_peer_id():
peer_info = self.getpeerinfo()
return peer_info[-1]["id"] if peer_info else -1

initial_peer_id = get_highest_peer_id()
yield
wait_until_helper_internal(lambda: get_highest_peer_id() > initial_peer_id,
timeout=timeout, timeout_factor=self.timeout_factor)

@contextlib.contextmanager
def profile_with_perf(self, profile_name: str):
"""
Expand Down

0 comments on commit 03042fb

Please sign in to comment.