Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ rubocop_result.txt
git_push.sh
test_run.sh
test.js
review.txt

# act.secrets
act.secrets
Expand Down
21 changes: 3 additions & 18 deletions lib/solarwinds_apm/config.rb
Original file line number Diff line number Diff line change
Expand Up @@ -192,29 +192,14 @@ def self.[]=(key, value)
case key
when :sampling_rate
SolarWindsAPM.logger.warn do
"[#{name}/#{__method__}] sampling_rate is not a supported setting for SolarWindsAPM::Config. Please use :sample_rate."
'[Deprecated] sampling_rate is not a supported setting for SolarWindsAPM::Config.'
end

when :sample_rate
unless value.is_a?(Integer) || value.is_a?(Float)
SolarWindsAPM.logger.warn do
"[#{name}/#{__method__}] :sample_rate must be a number between 0 and 1000000 (1m) (provided: #{value}), corrected to 0"
end
value = 0
end

# Validate :sample_rate value
unless value.between?(0, 1e6)
new_value = value.negative? ? 0 : 1_000_000
SolarWindsAPM.logger.warn do
"[#{name}/#{__method__}] :sample_rate must be between 0 and 1000000 (1m) (provided: #{value}), corrected to #{new_value}"
end
SolarWindsAPM.logger.warn do
'[Deprecated] sample_rate is not a supported setting for SolarWindsAPM::Config.'
end

# Assure value is an integer
@@config[key.to_sym] = new_value.to_i
SolarWindsAPM.sample_rate(new_value)

when :transaction_settings
compile_settings(value)

Expand Down
30 changes: 15 additions & 15 deletions lib/solarwinds_apm/sampling/oboe_sampler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ def initialize(logger)
}
@settings = {} # parsed setting from swo backend
@settings_mutex = ::Mutex.new

@buckets.each_value(&:start)
end

# return sampling result
Expand Down Expand Up @@ -289,9 +287,9 @@ def disabled_algo(sample_state)
end

def update_settings(settings)
return unless settings[:timestamp] > (@settings[:timestamp] || 0)

@settings_mutex.synchronize do
return unless settings[:timestamp] > (@settings[:timestamp] || 0)

@settings = settings
@buckets.each do |type, bucket|
bucket.update(@settings[:buckets][type]) if @settings[:buckets][type]
Expand Down Expand Up @@ -324,18 +322,20 @@ def sw_from_span_and_decision(parent_span, otel_decision)
end

def get_settings(params)
return if @settings.empty?

expiry = (@settings[:timestamp] + @settings[:ttl]) * 1000
time_now = Time.now.to_i * 1000
if time_now > expiry
@logger.debug { "[#{self.class}/#{__method__}] settings expired, removing" }
@settings = {}
return
@settings_mutex.synchronize do
return if @settings.empty?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was looking with copilot help on the question of early returns from a mutex synchronized block, whether that would release the lock. Seems yes (at least for MRI) but also seems break could be better. It also raised the question that the early return here, and later if settings expired in line 333, means the return value for get_settings could be nil and the caller needs to properly handle that--does it?


expiry = (@settings[:timestamp] + @settings[:ttl]) * 1000
time_now = Time.now.to_i * 1000
if time_now > expiry
@logger.debug { "[#{self.class}/#{__method__}] settings expired, removing" }
@settings = {}
return
end
sampling_setting = SolarWindsAPM::SamplingSettings.merge(@settings, local_settings(params))
@logger.debug { "[#{self.class}/#{__method__}] sampling_setting: #{sampling_setting.inspect}" }
sampling_setting
end
sampling_setting = SolarWindsAPM::SamplingSettings.merge(@settings, local_settings(params))
@logger.debug { "[#{self.class}/#{__method__}] sampling_setting: #{sampling_setting.inspect}" }
sampling_setting
end
end
end
150 changes: 56 additions & 94 deletions lib/solarwinds_apm/sampling/token_bucket.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,126 +10,88 @@
# capacity is updated through update_settings
module SolarWindsAPM
class TokenBucket
# Maximum value of a signed 32-bit integer
MAX_INTERVAL = (2**31) - 1

attr_reader :capacity, :rate, :interval, :tokens, :type
attr_reader :type

def initialize(token_bucket_settings)
self.capacity = token_bucket_settings.capacity || 0
self.rate = token_bucket_settings.rate || 0
self.interval = token_bucket_settings.interval || MAX_INTERVAL
self.tokens = @capacity
@capacity = token_bucket_settings.capacity || 0
@rate = token_bucket_settings.rate || 0
@tokens = @capacity
@last_update_time = Time.now.to_f
@type = token_bucket_settings.type
@timer = nil
@lock = ::Mutex.new
end

# oboe sampler update_settings will update the token
# (thread safe as update_settings is guarded by mutex from oboe sampler)
def update(settings)
settings.instance_of?(Hash) ? update_from_hash(settings) : update_from_token_bucket_settings(settings)
def capacity
@lock.synchronize { @capacity }
end

def update_from_hash(settings)
if settings[:capacity]
difference = settings[:capacity] - @capacity
self.capacity = settings[:capacity]
self.tokens = @tokens + difference
end

self.rate = settings[:rate] if settings[:rate]

return unless settings[:interval]

self.interval = settings[:interval]
return unless running

stop
start
def rate
@lock.synchronize { @rate }
end

def update_from_token_bucket_settings(settings)
if settings.capacity
difference = settings.capacity - @capacity
self.capacity = settings.capacity
self.tokens = @tokens + difference
def tokens
@lock.synchronize do
calculate_tokens
@tokens
end

self.rate = settings.rate if settings.rate

return unless settings.interval

self.interval = settings.interval
return unless running

stop
start
end

def capacity=(capacity)
@capacity = [0, capacity].max
end

def rate=(rate)
@rate = [0, rate].max
end

# self.interval= sets the @interval and @sleep_interval
# @sleep_interval is used in the timer thread to sleep between replenishing the bucket
def interval=(interval)
@interval = interval.clamp(0, MAX_INTERVAL)
@sleep_interval = @interval / 1000.0
end

def tokens=(tokens)
@tokens = tokens.clamp(0, @capacity)
# oboe sampler update_settings will update the token
def update(settings)
settings.instance_of?(Hash) ? update_from_hash(settings) : update_from_hash(tb_to_hash(settings))
end

# Attempts to consume tokens from the bucket
# @param n [Integer] Number of tokens to consume
# @param token [Integer] Number of tokens to consume
# @return [Boolean] Whether there were enough tokens
# TODO: we need to include thread-safety here since sampler is shared across threads
# and we may have multiple threads trying to consume tokens at the same time
def consume(token = 1)
if @tokens >= token
self.tokens = @tokens - token
SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] #{@type} Consumed #{token} from total #{@tokens} (#{(@tokens.to_f / @capacity * 100).round(1)}% remaining)" }
true
else
SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] #{@type} Token consumption failed: requested=#{token}, available=#{@tokens}, capacity=#{@capacity}" }
false
end
end

# Starts replenishing the bucket
def start
return if running

@timer = Thread.new do
loop do
task
sleep(@sleep_interval)
@lock.synchronize do
calculate_tokens
if @tokens >= token
@tokens -= token
SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] #{@type} Consumed #{token} from total #{@tokens} (#{(@tokens.to_f / @capacity * 100).round(1)}% remaining)" }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor, the "from total " would show remaining tokens and imho misleading, i would remove this bit.

true
else
SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] #{@type} Token consumption failed: requested=#{token}, available=#{@tokens}, capacity=#{@capacity}" }
false
end
end
end

# Stops replenishing the bucket
def stop
return unless running
private

@timer.kill
@timer = nil
def calculate_tokens
now = Time.now.to_f
elapsed = now - @last_update_time
@last_update_time = now
@tokens += elapsed * @rate
Copy link

Copilot AI Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The token calculation happens on every consume call and on every tokens getter call. For high-frequency operations, this could lead to redundant calculations. Consider memoizing the result within a short time window or only recalculating when necessary.

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

@xuan-cao-swi xuan-cao-swi Jan 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copilot suggests to have following to avoid expensive calculation

# Skip recalculation if insufficient time has elapsed
RECALCULATION_THRESHOLD = 0.001 # 1ms

elapsed = now - @last_update_time
return if elapsed < RECALCULATION_THRESHOLD

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The following json is a setting from prod.

{
    "value": 1000000,
    "flags": "SAMPLE_START,SAMPLE_THROUGH_ALWAYS,SAMPLE_BUCKET_ENABLED,TRIGGER_TRACE",
    "timestamp": 1767640328,
    "ttl": 120,
    "arguments": {
        "BucketCapacity": 2,
        "BucketRate": 1,
        "TriggerRelaxedBucketCapacity": 20,
        "TriggerRelaxedBucketRate": 1,
        "TriggerStrictBucketCapacity": 6,
        "TriggerStrictBucketRate": 0.1,
        "SignatureKey": "<key>"
    }
}

(BucketRate, TriggerRelaxedBucketRate, TriggerStrictBucketRate) are (1, 1, 0.1) per second. If we skip the calculation within 1ms windows, the accuracy of the rate calculation will be decreased from (1/1000, 1/1000, 0.1/1000, we are using linux time in ms) to (1/500, 1/500, 0.1/500).

I think the token calculation shouldn't cause too much as it is just arithmetic operations and it is for local root span only. I don't think it is worth to sacrifice the accuracy of the token calculation for this.

@cheempz @raphael-theriault-swi Do you have other opinion? I recalled @raphael-theriault-swi suggested to use nanoseconds to calculate the token in a standup

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I meant during standup is that these operations are all extremely fast and should only take a couple nanoseconds, so I'm not worried about the performance impact. The comparison and branch that Copilot suggests is probably more costly than the calculation.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I agree with @jerrytfleung that there's no reason to sacrifice accuracy.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree, no need for that optimization.

@tokens = [@tokens, @capacity].min
end

# Whether the bucket is actively being replenished
def running
!@timer.nil?
end
# settings is from json sampler
def update_from_hash(settings)
@lock.synchronize do
calculate_tokens

if settings[:capacity]
new_capacity = [0, settings[:capacity]].max
difference = new_capacity - @capacity
@capacity = new_capacity
@tokens += difference
@tokens = [0, @tokens].max
end

private
@rate = [0, settings[:rate]].max if settings[:rate]
end
end

def task
self.tokens = tokens + @rate
# settings is from http sampler
def tb_to_hash(settings)
tb_hash = {}
tb_hash[:capacity] = settings.capacity if settings.respond_to?(:capacity)
tb_hash[:rate] = settings.rate if settings.respond_to?(:rate)
tb_hash[:type] = settings.type if settings.respond_to?(:type)
tb_hash
end
end
end
11 changes: 1 addition & 10 deletions test/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -60,19 +60,10 @@ RUN echo 'alias be="bundle exec"' >> ~/.profile
# install rubies to build our gem against
RUN . ~/.profile \
&& cd /root/.rbenv/plugins/ruby-build && git pull && cd - \
&& rbenv install 3.1.0
&& rbenv install 3.2.6
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We still have minimum required ruby version as 3.1.0. Is this Dockerfile used for adhoc local tests and should it really not test against 3.1.0?


RUN echo 'gem: --no-document' >> ~/.gemrc

# install swig 4.0.2
RUN curl -SL https://github.com/swig/swig/archive/refs/tags/v4.0.2.tar.gz \
| tar xzC /tmp \
&& cd /tmp/swig-4.0.2 \
&& ./autogen.sh \
&& ./configure && make && make install \
&& cd - \
&& rm -rf /tmp/swig-4.0.2

# set up github package credentials for pushing
RUN mkdir ~/.gem \
&& echo "---\n:github: Bearer ${BUNDLE_RUBYGEMS__PKG__GITHUB__COM}" >> ~/.gem/credentials \
Expand Down
16 changes: 8 additions & 8 deletions test/sampling/oboe_sampler_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -425,8 +425,8 @@
sample_source: SolarWindsAPM::SampleSource::LOCAL_DEFAULT,
flags: SolarWindsAPM::Flags::SAMPLE_START | SolarWindsAPM::Flags::TRIGGERED_TRACE,
buckets: {
SolarWindsAPM::BucketType::TRIGGER_STRICT => SolarWindsAPM::TokenBucket.new(SolarWindsAPM::TokenBucketSettings.new(10, 5, BUCKET_INTERVAL)),
SolarWindsAPM::BucketType::TRIGGER_RELAXED => SolarWindsAPM::TokenBucket.new(SolarWindsAPM::TokenBucketSettings.new(0, 0, BUCKET_INTERVAL))
SolarWindsAPM::BucketType::TRIGGER_STRICT => { capacity: 10, rate: 5 },
SolarWindsAPM::BucketType::TRIGGER_RELAXED => { capacity: 0, rate: 0 }
},
timestamp: Time.now.to_i,
ttl: 10
Expand Down Expand Up @@ -464,8 +464,8 @@
sample_source: SolarWindsAPM::SampleSource::LOCAL_DEFAULT,
flags: SolarWindsAPM::Flags::SAMPLE_START | SolarWindsAPM::Flags::TRIGGERED_TRACE,
buckets: {
SolarWindsAPM::BucketType::TRIGGER_STRICT => SolarWindsAPM::TokenBucket.new(SolarWindsAPM::TokenBucketSettings.new(0, 0, BUCKET_INTERVAL)),
SolarWindsAPM::BucketType::TRIGGER_RELAXED => SolarWindsAPM::TokenBucket.new(SolarWindsAPM::TokenBucketSettings.new(20, 10, BUCKET_INTERVAL))
SolarWindsAPM::BucketType::TRIGGER_STRICT => { capacity: 0, rate: 0 },
SolarWindsAPM::BucketType::TRIGGER_RELAXED => { capacity: 20, rate: 10 }
},
timestamp: Time.now.to_i,
ttl: 10
Expand Down Expand Up @@ -502,8 +502,8 @@
sample_source: SolarWindsAPM::SampleSource::LOCAL_DEFAULT,
flags: SolarWindsAPM::Flags::SAMPLE_START | SolarWindsAPM::Flags::TRIGGERED_TRACE,
buckets: {
SolarWindsAPM::BucketType::TRIGGER_STRICT => SolarWindsAPM::TokenBucket.new(SolarWindsAPM::TokenBucketSettings.new(0, 0, BUCKET_INTERVAL)),
SolarWindsAPM::BucketType::TRIGGER_RELAXED => SolarWindsAPM::TokenBucket.new(SolarWindsAPM::TokenBucketSettings.new(20, 10, BUCKET_INTERVAL))
SolarWindsAPM::BucketType::TRIGGER_STRICT => { capacity: 0, rate: 0 },
SolarWindsAPM::BucketType::TRIGGER_RELAXED => { capacity: 20, rate: 10 }
},
signature_key: 'key',
timestamp: Time.now.to_i,
Expand Down Expand Up @@ -546,8 +546,8 @@
sample_source: SolarWindsAPM::SampleSource::LOCAL_DEFAULT,
flags: SolarWindsAPM::Flags::SAMPLE_START | SolarWindsAPM::Flags::TRIGGERED_TRACE,
buckets: {
SolarWindsAPM::BucketType::TRIGGER_STRICT => SolarWindsAPM::TokenBucket.new(SolarWindsAPM::TokenBucketSettings.new(10, 5, BUCKET_INTERVAL)),
SolarWindsAPM::BucketType::TRIGGER_RELAXED => SolarWindsAPM::TokenBucket.new(SolarWindsAPM::TokenBucketSettings.new(0, 0, BUCKET_INTERVAL))
SolarWindsAPM::BucketType::TRIGGER_STRICT => { capacity: 10, rate: 5 },
SolarWindsAPM::BucketType::TRIGGER_RELAXED => { capacity: 0, rate: 0 }
},
signature_key: 'key',
timestamp: Time.now.to_i,
Expand Down
Loading