diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 9716d5d539..ed91af93ea 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -45,6 +45,7 @@ on: - web - sidekiq - ops + - metrics default: all run_pre_deploy_migrations: description: Run `pre-deploy` job (schema-migrations) before service deployment. @@ -103,7 +104,7 @@ jobs: fail-fast: true matrix: service: >- - ${{ inputs.server_types == 'all' && fromJSON('["web", "sidekiq", "ops"]') || + ${{ inputs.server_types == 'all' && fromJSON('["web", "sidekiq", "ops", "metrics"]') || fromJSON(format('["{0}"]', inputs.server_types)) }} env: repository_name: ${{ matrix.service == 'ops' && 'mavis/ops' || 'mavis/webapp' }} @@ -328,7 +329,7 @@ jobs: fail-fast: false matrix: service: >- - ${{ inputs.server_types == 'all' && fromJSON('["web", "sidekiq", "ops"]') || + ${{ inputs.server_types == 'all' && fromJSON('["web", "sidekiq", "ops", "metrics"]') || fromJSON(format('["{0}"]', inputs.server_types)) }} steps: - name: Configure AWS Credentials diff --git a/bin/docker-start b/bin/docker-start index a9e3ec711d..5b7a813d1a 100755 --- a/bin/docker-start +++ b/bin/docker-start @@ -12,10 +12,15 @@ elif [ "$SERVER_TYPE" == "sidekiq" ]; then "$BIN_DIR"/prometheus_exporter & sleep 5 exec "$BIN_DIR"/sidekiq +elif [ "$SERVER_TYPE" == "metrics" ]; then + echo "Starting metrics publisher..." + "$BIN_DIR"/prometheus_exporter skip-server-labels & + sleep 5 + exec "$BIN_DIR"/metrics-publisher elif [ "$SERVER_TYPE" == "none" ]; then echo "No server started" exec tail -f /dev/null # Keep container running else - echo "SERVER_TYPE variable: '$SERVER_TYPE' unknown. Allowed values: web, sidekiq, none" + echo "SERVER_TYPE variable: '$SERVER_TYPE' unknown. Allowed values: web, sidekiq, metrics, none" exit 1 fi diff --git a/bin/metrics-publisher b/bin/metrics-publisher new file mode 100755 index 0000000000..a41f62452e --- /dev/null +++ b/bin/metrics-publisher @@ -0,0 +1,12 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require_relative "../config/environment" +require "prometheus_exporter/instrumentation" + +PrometheusExporter::Instrumentation::SidekiqStats.start +PrometheusExporter::Instrumentation::SidekiqQueue.start(all_queues: true) + +at_exit { PrometheusExporter::Client.default.stop(wait_timeout_seconds: 10) } + +Kernel.sleep diff --git a/bin/prometheus_exporter b/bin/prometheus_exporter index 911c10de80..c98f790b78 100755 --- a/bin/prometheus_exporter +++ b/bin/prometheus_exporter @@ -27,18 +27,31 @@ def fetch_ecs_task_id end end -if ARGV.include?("local") - task_id = "N/A" - service_name = "N/A" -else - task_id = fetch_ecs_task_id - service_name = ENV["SERVICE_NAME"] +# An explicitly-set env var (including empty string) overrides the default. +# An empty value omits the label entirely — useful for the metrics task, +# whose series identity must stay stable across container replacements. +def resolve_label(env_key, &default) + if ENV.key?(env_key) + value = ENV[env_key] + return value.empty? ? nil : value + end + + default.call end +labels = { + "TaskId" => resolve_label("PROMETHEUS_TASK_ID_LABEL") do + ARGV.include?("skip-server-labels") ? nil : fetch_ecs_task_id + end, + "ServiceName" => resolve_label("PROMETHEUS_SERVICE_NAME_LABEL") do + ARGV.include?("skip-server-labels") ? nil : ENV["SERVICE_NAME"] + end +}.compact + runner = PrometheusExporter::Server::Runner.new( port: 9394, bind: "0.0.0.0", - label: { "TaskId" => task_id, "ServiceName" => service_name }, + label: labels, type_collectors: [PrometheusExporter::CustomActiveRecordCollector], ) diff --git a/config/initializers/sidekiq.rb b/config/initializers/sidekiq.rb index 1f7833ece5..8cb4902576 100644 --- a/config/initializers/sidekiq.rb +++ b/config/initializers/sidekiq.rb @@ -49,8 +49,9 @@ def call(_worker, job, _queue) PrometheusExporter::Instrumentation::Process.start type: "sidekiq" PrometheusExporter::Instrumentation::ActiveRecord.start PrometheusExporter::Instrumentation::SidekiqProcess.start - PrometheusExporter::Instrumentation::SidekiqQueue.start - PrometheusExporter::Instrumentation::SidekiqStats.start + # SidekiqStats and SidekiqQueue are global Redis-backed metrics published + # by the metrics task (bin/metrics-publisher) to avoid duplicate + # series across Sidekiq containers. end at_exit do