@@ -32,16 +32,15 @@ type containerCPUState struct {
3232
3333// Collector collects metrics from DBLab components.
3434type Collector struct {
35- mu sync.Mutex
36- metrics * Metrics
37- cloning * cloning.Base
38- retrieval * retrieval.Retrieval
39- pm * pool.Manager
40- engProps * global.EngineProps
41- dockerClient * client.Client
42- startedAt time.Time
43- prevCPUStats map [string ]containerCPUState
44- prevCPUStatsMu sync.RWMutex
35+ mu sync.Mutex
36+ metrics * Metrics
37+ cloning * cloning.Base
38+ retrieval * retrieval.Retrieval
39+ pm * pool.Manager
40+ engProps * global.EngineProps
41+ dockerClient * client.Client
42+ startedAt time.Time
43+ prevCPUStats map [string ]containerCPUState
4544}
4645
4746// NewCollector creates a new metrics collector.
@@ -221,22 +220,27 @@ func (c *Collector) getContainerStats(ctx context.Context, clones []*models.Clon
221220 return result
222221 }
223222
223+ activeCloneIDs := make (map [string ]struct {})
224+
224225 for _ , clone := range clones {
225226 if clone == nil || clone .Status .Code != models .StatusOK {
226227 continue
227228 }
228229
230+ activeCloneIDs [clone .ID ] = struct {}{}
231+
229232 stats , err := c .dockerClient .ContainerStatsOneShot (ctx , clone .ID )
230233 if err != nil {
231234 log .Dbg (fmt .Sprintf ("failed to get container stats for clone %s: %v" , clone .ID , err ))
232235 continue
233236 }
234237
235- defer stats .Body .Close ()
236-
237238 var statsJSON container.StatsResponse
238- if err := json .NewDecoder (stats .Body ).Decode (& statsJSON ); err != nil {
239- log .Dbg (fmt .Sprintf ("failed to decode container stats for clone %s: %v" , clone .ID , err ))
239+ decodeErr := json .NewDecoder (stats .Body ).Decode (& statsJSON )
240+ stats .Body .Close ()
241+
242+ if decodeErr != nil {
243+ log .Dbg (fmt .Sprintf ("failed to decode container stats for clone %s: %v" , clone .ID , decodeErr ))
240244 continue
241245 }
242246
@@ -251,6 +255,8 @@ func (c *Collector) getContainerStats(ctx context.Context, clones []*models.Clon
251255 }
252256 }
253257
258+ c .cleanupStaleCPUStats (activeCloneIDs )
259+
254260 return result
255261}
256262
@@ -261,17 +267,13 @@ func (c *Collector) calculateCPUPercent(cloneID string, stats *container.StatsRe
261267 currentSystemUsage := stats .CPUStats .SystemUsage
262268 now := time .Now ()
263269
264- c .prevCPUStatsMu .RLock ()
265270 prevStats , hasPrev := c .prevCPUStats [cloneID ]
266- c .prevCPUStatsMu .RUnlock ()
267271
268- c .prevCPUStatsMu .Lock ()
269272 c .prevCPUStats [cloneID ] = containerCPUState {
270273 totalUsage : currentTotalUsage ,
271274 systemUsage : currentSystemUsage ,
272275 timestamp : now ,
273276 }
274- c .prevCPUStatsMu .Unlock ()
275277
276278 if ! hasPrev {
277279 return 0
@@ -282,10 +284,14 @@ func (c *Collector) calculateCPUPercent(cloneID string, stats *container.StatsRe
282284 return 0
283285 }
284286
287+ if currentTotalUsage < prevStats .totalUsage || currentSystemUsage < prevStats .systemUsage {
288+ return 0
289+ }
290+
285291 cpuDelta := float64 (currentTotalUsage - prevStats .totalUsage )
286292 systemDelta := float64 (currentSystemUsage - prevStats .systemUsage )
287293
288- if systemDelta <= 0 || cpuDelta < 0 {
294+ if systemDelta <= 0 {
289295 return 0
290296 }
291297
@@ -301,6 +307,14 @@ func (c *Collector) calculateCPUPercent(cloneID string, stats *container.StatsRe
301307 return (cpuDelta / systemDelta ) * cpuCount * 100.0
302308}
303309
310+ func (c * Collector ) cleanupStaleCPUStats (activeCloneIDs map [string ]struct {}) {
311+ for cloneID := range c .prevCPUStats {
312+ if _ , ok := activeCloneIDs [cloneID ]; ! ok {
313+ delete (c .prevCPUStats , cloneID )
314+ }
315+ }
316+ }
317+
304318func (c * Collector ) collectSnapshotMetrics () {
305319 snapshots , err := c .cloning .GetSnapshots ()
306320 if err != nil {
0 commit comments