Skip to content

Commit

Permalink
Log error when probes iteration takes too long (#1088)
Browse files Browse the repository at this point in the history
  • Loading branch information
tombruijn authored May 24, 2024
1 parent e6d98f5 commit 704a7d2
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 4 deletions.
6 changes: 6 additions & 0 deletions .changesets/log-slow-probes-error.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
bump: patch
type: change
---

When the minutely probes thread takes more than 60 seconds to run all the registered probes, log an error. This helps find issues with the metrics reported by the probes not being accurately reported for every minute.
20 changes: 16 additions & 4 deletions lib/appsignal/probes.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# frozen_string_literal: true

module Appsignal
module Probes
module Probes # rubocop:disable Metrics/ModuleLength
ITERATION_IN_SECONDS = 60

class ProbeCollection
def initialize
@probes = {}
Expand Down Expand Up @@ -180,6 +182,7 @@ def start
sleep initial_wait_time
initialize_probes
loop do
start_time = Time.now
logger = Appsignal.internal_logger
mutex.synchronize do
logger.debug("Gathering minutely metrics with #{probe_instances.count} probes")
Expand All @@ -191,6 +194,15 @@ def start
logger.debug ex.backtrace.join("\n")
end
end
end_time = Time.now
duration = end_time - start_time
if duration >= ITERATION_IN_SECONDS
logger.error(
"The minutely probes took more than 60 seconds. " \
"The probes should not take this long as metrics will not " \
"be accurately reported."
)
end
sleep wait_time
end
end
Expand All @@ -214,16 +226,16 @@ def stop

# @api private
def wait_time
60 - Time.now.sec
ITERATION_IN_SECONDS - Time.now.sec
end

private

def initial_wait_time
remaining_seconds = 60 - Time.now.sec
remaining_seconds = ITERATION_IN_SECONDS - Time.now.sec
return remaining_seconds if remaining_seconds > 30

remaining_seconds + 60
remaining_seconds + ITERATION_IN_SECONDS
end

def initialize_probes
Expand Down
23 changes: 23 additions & 0 deletions spec/lib/appsignal/probes_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,29 @@ def call
end
end

context "with a probe that takes 60 seconds" do
it "logs an error and continues calling the probes every <wait_time>" do
stub_const("Appsignal::Probes::ITERATION_IN_SECONDS", 0.2)
calls = 0
probe = lambda do
calls += 1
sleep 0.2
end
Appsignal::Probes.register :my_probe, probe
Appsignal::Probes.register :other_probe, lambda {}
Appsignal::Probes.start

wait_for("enough probe calls") { calls >= 2 }

expect(log).to contains_log(
:error,
"The minutely probes took more than 60 seconds. " \
"The probes should not take this long as metrics will not " \
"be accurately reported."
)
end
end

it "ensures only one minutely probes thread is active at a time" do
alive_thread_counter = proc { Thread.list.reject { |t| t.status == "dead" }.length }
probe = MockProbe.new
Expand Down

0 comments on commit 704a7d2

Please sign in to comment.