Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 29 additions & 24 deletions lib/typesense/api_call.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def initialize(configuration)

@logger = @configuration.logger

@nodes_mutex = Mutex.new
initialize_metadata_for_nodes
@current_node_index = -1
end
Expand Down Expand Up @@ -129,32 +130,34 @@ def uri_for(endpoint, node)
# But if no healthy nodes are found, it will just return the next node, even if it's unhealthy
# so we can try the request for good measure, in case that node has become healthy since
def next_node
# Check if nearest_node is set and is healthy, if so return it
unless @nearest_node.nil?
@logger.debug "Nodes health: Node #{@nearest_node[:index]} is #{@nearest_node[:is_healthy] == true ? 'Healthy' : 'Unhealthy'}"
if @nearest_node[:is_healthy] == true || node_due_for_healthcheck?(@nearest_node)
@logger.debug "Updated current node to Node #{@nearest_node[:index]}"
return @nearest_node
@nodes_mutex.synchronize do
# Check if nearest_node is set and is healthy, if so return it
unless @nearest_node.nil?
@logger.debug "Nodes health: Node #{@nearest_node[:index]} is #{@nearest_node[:is_healthy] == true ? 'Healthy' : 'Unhealthy'}"
if @nearest_node[:is_healthy] == true || node_due_for_healthcheck?(@nearest_node)
@logger.debug "Updated current node to Node #{@nearest_node[:index]}"
return @nearest_node
end
@logger.debug 'Falling back to individual nodes'
end
@logger.debug 'Falling back to individual nodes'
end

# Fallback to nodes as usual
@logger.debug "Nodes health: #{@nodes.each_with_index.map { |node, i| "Node #{i} is #{node[:is_healthy] == true ? 'Healthy' : 'Unhealthy'}" }.join(' || ')}"
candidate_node = nil
(0..@nodes.length).each do |_i|
@current_node_index = (@current_node_index + 1) % @nodes.length
candidate_node = @nodes[@current_node_index]
if candidate_node[:is_healthy] == true || node_due_for_healthcheck?(candidate_node)
@logger.debug "Updated current node to Node #{candidate_node[:index]}"
return candidate_node
# Fallback to nodes as usual
@logger.debug "Nodes health: #{@nodes.each_with_index.map { |node, i| "Node #{i} is #{node[:is_healthy] == true ? 'Healthy' : 'Unhealthy'}" }.join(' || ')}"
candidate_node = nil
(0..@nodes.length).each do |_i|
@current_node_index = (@current_node_index + 1) % @nodes.length
candidate_node = @nodes[@current_node_index]
if candidate_node[:is_healthy] == true || node_due_for_healthcheck?(candidate_node)
@logger.debug "Updated current node to Node #{candidate_node[:index]}"
return candidate_node
end
end
end

# None of the nodes are marked healthy, but some of them could have become healthy since last health check.
# So we will just return the next node.
@logger.debug "No healthy nodes were found. Returning the next node, Node #{candidate_node[:index]}"
candidate_node
# None of the nodes are marked healthy, but some of them could have become healthy since last health check.
# So we will just return the next node.
@logger.debug "No healthy nodes were found. Returning the next node, Node #{candidate_node[:index]}"
candidate_node
end
end

def node_due_for_healthcheck?(node)
Expand All @@ -175,8 +178,10 @@ def initialize_metadata_for_nodes
end

def set_node_healthcheck(node, is_healthy:)
node[:is_healthy] = is_healthy
node[:last_access_timestamp] = Time.now.to_i
@nodes_mutex.synchronize do
node[:is_healthy] = is_healthy
node[:last_access_timestamp] = Time.now.to_i
end
end

def custom_exception_klass_for(response)
Expand Down
128 changes: 128 additions & 0 deletions spec/typesense/api_call_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -258,4 +258,132 @@
it_behaves_like 'General error handling', :delete
it_behaves_like 'Node selection', :delete
end

describe 'concurrent node rotation' do
it 'distributes selection evenly across nodes when called from many threads' do
thread_count = 16
iterations_per_thread = 90
num_nodes = typesense.configuration.nodes.length

counts = Array.new(num_nodes, 0)
counts_mutex = Mutex.new

threads = Array.new(thread_count) do
Thread.new do
local_counts = Array.new(num_nodes, 0)
iterations_per_thread.times do
node = api_call.send(:next_node)
local_counts[node[:index]] += 1
end
counts_mutex.synchronize do
local_counts.each_with_index { |c, i| counts[i] += c }
end
end
end
threads.each(&:join)

expected_per_node = (thread_count * iterations_per_thread) / num_nodes
expect(counts).to all(eq(expected_per_node))
end

it 'never returns a node held unhealthy while next_node is called concurrently' do
unhealthy_node = api_call.instance_variable_get(:@nodes)[1]
api_call.send(:set_node_healthcheck, unhealthy_node, is_healthy: false)

threads = Array.new(8) do
Thread.new do
Array.new(200) { api_call.send(:next_node)[:index] }
end
end

results = threads.flat_map(&:value)
expect(results).not_to include(1)
expect(results).to include(0).and include(2)
end

it 'still returns a node when every node is unhealthy under concurrent calls' do
nodes = api_call.instance_variable_get(:@nodes)
nodes.each { |node| api_call.send(:set_node_healthcheck, node, is_healthy: false) }

threads = Array.new(8) do
Thread.new do
Array.new(50) { api_call.send(:next_node) }
end
end

results = threads.flat_map(&:value)
expect(results.length).to eq(8 * 50)
expect(results).to all(be_a(Hash))
expect(results.map { |n| n[:index] }).to all(be_between(0, nodes.length - 1).inclusive)
end

context 'with a single node' do
let(:typesense) do
Typesense::Client.new(
api_key: 'abcd',
nodes: [{ host: 'node0', port: 8108, protocol: 'http' }],
connection_timeout_seconds: 10,
retry_interval_seconds: 0.01,
log_level: Logger::ERROR
)
end

it 'returns the single node and keeps health state consistent under concurrent writes' do
node = typesense.configuration.nodes[0]

threads = Array.new(8) do |i|
Thread.new do
50.times do
api_call.send(:set_node_healthcheck, node, is_healthy: i.even?)
api_call.send(:next_node)
end
end
end
threads.each(&:join)

expect(node[:is_healthy]).to be(true).or be(false)
expect(node[:last_access_timestamp]).to be_a(Integer)
end
end

context 'with nearest_node configured' do
let(:typesense) do
Typesense::Client.new(
api_key: 'abcd',
nearest_node: { host: 'nearestNode', port: 6108, protocol: 'http' },
nodes: [
{ host: 'node0', port: 8108, protocol: 'http' },
{ host: 'node1', port: 8108, protocol: 'http' },
{ host: 'node2', port: 8108, protocol: 'http' }
],
connection_timeout_seconds: 10,
retry_interval_seconds: 0.01,
log_level: Logger::ERROR
)
end

it 'serializes reads and writes of nearest_node health state under concurrent access' do
nearest_node = api_call.instance_variable_get(:@nearest_node)

writer_threads = Array.new(4) do |i|
Thread.new do
100.times { api_call.send(:set_node_healthcheck, nearest_node, is_healthy: i.even?) }
end
end

reader_threads = Array.new(8) do
Thread.new do
Array.new(100) { api_call.send(:next_node) }
end
end

writer_threads.each(&:join)
reader_results = reader_threads.flat_map(&:value)

expect(reader_results).to all(be_a(Hash))
expect(nearest_node[:is_healthy]).to be(true).or be(false)
expect(nearest_node[:last_access_timestamp]).to be_a(Integer)
end
end
end
end
Loading