@@ -20,8 +20,9 @@ import (
2020 "go.opentelemetry.io/otel/metric"
2121 "golang.org/x/sync/errgroup"
2222
23- "github.com/elastic/apm-server/internal/logs"
2423 "github.com/elastic/elastic-agent-libs/logp"
24+
25+ "github.com/elastic/apm-server/internal/logs"
2526)
2627
2728const (
@@ -56,6 +57,10 @@ const (
5657 defaultValueLogSize = 0
5758
5859 gb = float64 (1 << 30 )
60+
61+ // configuredDiskUsageThresholdMultiplier is the multiplier for the stored configured disk usage threshold.
62+ // It is used to convert float64 disk usage to uint64 to be stored in atomic.Uint64.
63+ configuredDiskUsageThresholdMultiplier = 1000
5964)
6065
6166type StorageManagerOptions func (* StorageManager )
@@ -140,11 +145,21 @@ type StorageManager struct {
140145 // meterProvider is the OTel meter provider
141146 meterProvider metric.MeterProvider
142147 storageMetrics storageMetrics
148+
149+ // configuredStorageLimit stores the configured storage limit (0 means unlimited)
150+ configuredStorageLimit atomic.Uint64
151+ // configuredDiskUsageThreshold stores the configured disk usage threshold as percentage (0-1),
152+ // multiplied by configuredDiskUsageThresholdMultiplier
153+ configuredDiskUsageThreshold atomic.Uint64
143154}
144155
145156type storageMetrics struct {
146- lsmSizeGauge metric.Int64Gauge
147- valueLogSizeGauge metric.Int64Gauge
157+ lsmSizeGauge metric.Int64Gauge
158+ valueLogSizeGauge metric.Int64Gauge
159+ storageLimitGauge metric.Int64Gauge
160+ diskUsedGauge metric.Int64Gauge
161+ diskTotalGauge metric.Int64Gauge
162+ diskUsageThresholdGauge metric.Float64Gauge
148163}
149164
150165// NewStorageManager returns a new StorageManager with pebble DB at storageDir.
@@ -175,6 +190,10 @@ func NewStorageManager(storageDir string, logger *logp.Logger, opts ...StorageMa
175190
176191 sm .storageMetrics .lsmSizeGauge , _ = meter .Int64Gauge ("apm-server.sampling.tail.storage.lsm_size" )
177192 sm .storageMetrics .valueLogSizeGauge , _ = meter .Int64Gauge ("apm-server.sampling.tail.storage.value_log_size" )
193+ sm .storageMetrics .storageLimitGauge , _ = meter .Int64Gauge ("apm-server.sampling.tail.storage.storage_limit" )
194+ sm .storageMetrics .diskUsedGauge , _ = meter .Int64Gauge ("apm-server.sampling.tail.storage.disk_used" )
195+ sm .storageMetrics .diskTotalGauge , _ = meter .Int64Gauge ("apm-server.sampling.tail.storage.disk_total" )
196+ sm .storageMetrics .diskUsageThresholdGauge , _ = meter .Float64Gauge ("apm-server.sampling.tail.storage.disk_usage_threshold_pct" )
178197 }
179198
180199 if err := sm .reset (); err != nil {
@@ -280,10 +299,22 @@ func (sm *StorageManager) updateDiskUsage() {
280299 sm .storageMetrics .valueLogSizeGauge .Record (context .Background (), int64 (defaultValueLogSize ))
281300 }
282301
302+ // Record storage limit metric
303+ if sm .storageMetrics .storageLimitGauge != nil {
304+ sm .storageMetrics .storageLimitGauge .Record (context .Background (), int64 (sm .configuredStorageLimit .Load ()))
305+ }
306+
283307 if sm .getDiskUsageFailed .Load () {
284308 // Skip GetDiskUsage under the assumption that
285309 // it will always get the same error if GetDiskUsage ever returns one,
286310 // such that it does not keep logging GetDiskUsage errors.
311+ // Record zero values for disk metrics when disk usage check failed
312+ if sm .storageMetrics .diskUsedGauge != nil {
313+ sm .storageMetrics .diskUsedGauge .Record (context .Background (), 0 )
314+ }
315+ if sm .storageMetrics .diskTotalGauge != nil {
316+ sm .storageMetrics .diskTotalGauge .Record (context .Background (), 0 )
317+ }
287318 return
288319 }
289320 usage , err := sm .getDiskUsage ()
@@ -292,10 +323,32 @@ func (sm *StorageManager) updateDiskUsage() {
292323 sm .getDiskUsageFailed .Store (true )
293324 sm .cachedDiskStat .used .Store (0 )
294325 sm .cachedDiskStat .total .Store (0 ) // setting total to 0 to disable any running disk usage threshold checks
326+ // Record zero values for disk metrics when disk usage check failed
327+ if sm .storageMetrics .diskUsedGauge != nil {
328+ sm .storageMetrics .diskUsedGauge .Record (context .Background (), 0 )
329+ }
330+ if sm .storageMetrics .diskTotalGauge != nil {
331+ sm .storageMetrics .diskTotalGauge .Record (context .Background (), 0 )
332+ }
295333 return
296334 }
297335 sm .cachedDiskStat .used .Store (usage .UsedBytes )
298336 sm .cachedDiskStat .total .Store (usage .TotalBytes )
337+
338+ // Record disk utilization metrics
339+ if sm .storageMetrics .diskUsedGauge != nil {
340+ sm .storageMetrics .diskUsedGauge .Record (context .Background (), int64 (usage .UsedBytes ))
341+ }
342+ if sm .storageMetrics .diskTotalGauge != nil {
343+ sm .storageMetrics .diskTotalGauge .Record (context .Background (), int64 (usage .TotalBytes ))
344+ }
345+ // Record disk usage threshold as a percentage (0-1)
346+ if sm .storageMetrics .diskUsageThresholdGauge != nil {
347+ sm .storageMetrics .diskUsageThresholdGauge .Record (
348+ context .Background (),
349+ float64 (sm .configuredDiskUsageThreshold .Load ())/ configuredDiskUsageThresholdMultiplier ,
350+ )
351+ }
299352}
300353
301354// diskUsed returns the actual used disk space in bytes.
@@ -429,6 +482,11 @@ func (sm *StorageManager) WriteSubscriberPosition(data []byte) error {
429482
430483// NewReadWriter returns a read writer configured with storage limit and disk usage threshold.
431484func (sm * StorageManager ) NewReadWriter (storageLimit uint64 , diskUsageThreshold float64 ) RW {
485+ // Store configured values for monitoring metrics
486+ sm .configuredStorageLimit .Store (storageLimit )
487+ // Store disk usage threshold as percentage (0-1), multiplied by configuredDiskUsageThresholdMultiplier
488+ sm .configuredDiskUsageThreshold .Store (uint64 (diskUsageThreshold * configuredDiskUsageThresholdMultiplier ))
489+
432490 var rw RW = SplitReadWriter {
433491 eventRW : sm .eventStorage .NewReadWriter (),
434492 decisionRW : sm .decisionStorage .NewReadWriter (),
0 commit comments