Skip to content

Commit

Permalink
scylla_node: watch_rest_for_alive: add wait_normal_token_owner param
Browse files Browse the repository at this point in the history
For backward compatibility.
Some tests may want to pass `node.start(wait_other_notice=True)`
and not wait for nodes to become normal token owners
if they need to examine the node earlier than that.

Signed-off-by: Benny Halevy <[email protected]>
  • Loading branch information
bhalevy authored and fruch committed Dec 19, 2023
1 parent cada847 commit 1411230
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 14 deletions.
12 changes: 8 additions & 4 deletions ccmlib/scylla_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,15 @@ def __update_pids(self, started):
node._update_pid(p)

def start_nodes(self, nodes=None, no_wait=False, verbose=False, wait_for_binary_proto=None,
wait_other_notice=None, jvm_args=None, profile_options=None,
wait_other_notice=None, wait_normal_token_owner=None, jvm_args=None, profile_options=None,
quiet_start=False):
if wait_for_binary_proto is None:
wait_for_binary_proto = self.force_wait_for_cluster_start
if wait_other_notice is None:
wait_other_notice = self.force_wait_for_cluster_start
self.debug(f"start_nodes: no_wait={no_wait} wait_for_binary_proto={wait_for_binary_proto} wait_other_notice={wait_other_notice} force_wait_for_cluster_start={self.force_wait_for_cluster_start}")
if wait_normal_token_owner is None and wait_other_notice:
wait_normal_token_owner = True
self.debug(f"start_nodes: no_wait={no_wait} wait_for_binary_proto={wait_for_binary_proto} wait_other_notice={wait_other_notice} wait_normal_token_owner={wait_normal_token_owner} force_wait_for_cluster_start={self.force_wait_for_cluster_start}")
self.started=True

p = None
Expand Down Expand Up @@ -137,7 +139,8 @@ def start_nodes(self, nodes=None, no_wait=False, verbose=False, wait_for_binary_
p = node.start(update_pid=False, jvm_args=jvm_args,
profile_options=profile_options, no_wait=no_wait,
wait_for_binary_proto=wait_for_binary_proto,
wait_other_notice=wait_other_notice)
wait_other_notice=wait_other_notice,
wait_normal_token_owner=False)
started.append((node, p, mark))
marks.append((node, mark))

Expand All @@ -156,7 +159,8 @@ def start_nodes(self, nodes=None, no_wait=False, verbose=False, wait_for_binary_
for old_node, _ in marks:
for node, _, _ in started:
if old_node is not node:
old_node.watch_rest_for_alive(node, timeout=self.default_wait_other_notice_timeout)
old_node.watch_rest_for_alive(node, timeout=self.default_wait_other_notice_timeout,
wait_normal_token_owner=wait_normal_token_owner)

return started

Expand Down
32 changes: 22 additions & 10 deletions ccmlib/scylla_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,8 @@ def wait_for_starting(self, from_mark=None, timeout=None):
time.sleep(sleep_time)
return bool(self.grep_log(f"{bootstrap_message}|{resharding_message}", from_mark=from_mark))

def _start_scylla(self, args, marks, update_pid, wait_other_notice,
def _start_scylla(self, args, marks, update_pid,
wait_other_notice, wait_normal_token_owner,
wait_for_binary_proto, ext_env):
log_file = os.path.join(self.get_path(), 'logs', 'system.log')
# In case we are restarting a node
Expand Down Expand Up @@ -333,8 +334,8 @@ def _start_scylla(self, args, marks, update_pid, wait_other_notice,
if wait_other_notice:
for node, _ in marks:
t = self.cluster.default_wait_other_notice_timeout
node.watch_rest_for_alive(self, timeout=t)
self.watch_rest_for_alive(node, timeout=t)
node.watch_rest_for_alive(self, timeout=t, wait_normal_token_owner=wait_normal_token_owner)
self.watch_rest_for_alive(node, timeout=t, wait_normal_token_owner=wait_normal_token_owner)

return self._process_scylla

Expand Down Expand Up @@ -458,7 +459,7 @@ def parse_size(s):

# Scylla Overload start
def start(self, join_ring=True, no_wait=False, verbose=False,
update_pid=True, wait_other_notice=None, replace_token=None,
update_pid=True, wait_other_notice=None, wait_normal_token_owner=None, replace_token=None,
replace_address=None, replace_node_host_id=None, jvm_args=None, wait_for_binary_proto=None,
profile_options=None, use_jna=False, quiet_start=False):
"""
Expand All @@ -470,6 +471,9 @@ def start(self, join_ring=True, no_wait=False, verbose=False,
- wait_other_notice: if True, this method returns only when all other
live node of the cluster
have marked this node UP.
- wait_normal_token_owner: if wait_other_notice is True and wait_normal_token_owner
is True or None, this method returns only when all other nodes see this node as normal
token owner, and vice-versa
- replace_token: start the node with the -Dcassandra.replace_token
option.
- replace_node_host_id: start the node with the
Expand All @@ -490,6 +494,8 @@ def start(self, join_ring=True, no_wait=False, verbose=False,
wait_for_binary_proto = self.cluster.force_wait_for_cluster_start and not no_wait
if wait_other_notice is None:
wait_other_notice = self.cluster.force_wait_for_cluster_start and not no_wait
if wait_normal_token_owner is None and wait_other_notice:
wait_normal_token_owner = True
if jvm_args is None:
jvm_args = []

Expand Down Expand Up @@ -676,10 +682,11 @@ def process_opts(opts):
message = f"Starting scylla: args={args} wait_other_notice={wait_other_notice} wait_for_binary_proto={wait_for_binary_proto}"
self.debug(message)

scylla_process = self._start_scylla(args, marks, update_pid,
wait_other_notice,
wait_for_binary_proto,
ext_env)
scylla_process = self._start_scylla(args=args, marks=marks, update_pid=update_pid,
wait_other_notice=wait_other_notice,
wait_normal_token_owner=wait_normal_token_owner,
wait_for_binary_proto=wait_for_binary_proto,
ext_env=ext_env)
self._start_jmx(data)

ip_addr, _ = self.network_interfaces['storage']
Expand Down Expand Up @@ -1345,13 +1352,16 @@ def hostid(self, timeout=60, force_refresh=False):
self.error(f"Failed to get hostid using {url}: {e}")
return None

def watch_rest_for_alive(self, nodes, timeout=120):
def watch_rest_for_alive(self, nodes, timeout=120, wait_normal_token_owner=True):
"""
Use the REST API to wait until this node detects that the nodes listed
in "nodes" become fully operational as normal token owners.
in "nodes" become fully operational.
This is similar to watch_log_for_alive but uses ScyllaDB's REST API
instead of the log file and waits for the node to be really useable,
not just "UP" (see issue #461)
Params:
- wait_normal_token_owner: return only when this node sees all other nodes as normal token owner (True by default).
"""
logging.getLogger('urllib3.connectionpool').disabled = True
try:
Expand Down Expand Up @@ -1386,6 +1396,8 @@ def watch_rest_for_alive(self, nodes, timeout=120):
if response.text == '[]':
have_no_tokens.add(n)
if not have_no_tokens:
if not wait_normal_token_owner:
return
# and that the node knows that the others' are normal token owners.
host_id_map = dict()
response = requests.get(url=url_host_ids)
Expand Down

0 comments on commit 1411230

Please sign in to comment.