Skip to content

Commit a528eca

Browse files
authored
Merge pull request #2372 from bugsnag/PLAT-15150/stack-sampling
AppHang: Optional Stack sampling
2 parents 3af4104 + c6e602b commit a528eca

11 files changed

Lines changed: 1214 additions & 18 deletions

File tree

bugsnag-plugin-android-apphang/api/bugsnag-plugin-android-apphang.api

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
public final class com/bugsnag/android/AppHangConfiguration {
22
public fun <init> ()V
3-
public fun <init> (JLandroid/os/Looper;)V
4-
public synthetic fun <init> (JLandroid/os/Looper;ILkotlin/jvm/internal/DefaultConstructorMarker;)V
3+
public fun <init> (JLandroid/os/Looper;Ljava/lang/Long;J)V
4+
public synthetic fun <init> (JLandroid/os/Looper;Ljava/lang/Long;JILkotlin/jvm/internal/DefaultConstructorMarker;)V
55
public final fun getAppHangThresholdMillis ()J
6+
public final fun getStackSamplingIntervalMillis ()J
7+
public final fun getStackSamplingThresholdMillis ()Ljava/lang/Long;
68
public final fun getWatchedLooper ()Landroid/os/Looper;
79
public final fun setAppHangThresholdMillis (J)V
10+
public final fun setStackSamplingIntervalMillis (J)V
11+
public final fun setStackSamplingThresholdMillis (Ljava/lang/Long;)V
812
public final fun setWatchedLooper (Landroid/os/Looper;)V
913
}
1014

bugsnag-plugin-android-apphang/src/main/java/com/bugsnag/android/AppHangConfiguration.kt

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,41 @@ class AppHangConfiguration(
1919
* Defaults to [Looper.getMainLooper]
2020
*/
2121
var watchedLooper: Looper = Looper.getMainLooper(),
22+
/**
23+
* How long after a heartbeat before the monitored thread should start being stack sampled.
24+
* Setting this to a value between 1 and [appHangThresholdMillis] enables stack sampling,
25+
* which produces a significantly higher-quality error at the expense of some runtime slowdown
26+
* and memory.
27+
*
28+
* When enabled, potential AppHangs are identified early and the monitored thread's stack is
29+
* sampled repeatedly until either the thread recovers or an AppHang error is raised. If a
30+
* full AppHang error is reported, a secondary stack trace of the most frequently seen stack
31+
* path will be attached to the report. These error reports tend to group better than typical
32+
* AppHangs and ANRs, and provide more actionable insights.
33+
*
34+
* A reasonable starting value is 1 second (`1000`) which is a noticeable pause for a user,
35+
* but not long enough to trigger many false-positives.
36+
*
37+
* Set to `null` to disable stack sampling (default).
38+
*
39+
* @see [stackSamplingIntervalMillis]
40+
*/
41+
var stackSamplingThresholdMillis: Long? = null,
42+
/**
43+
* How many milliseconds to wait between stack samples. This is a best-effort value and the
44+
* real sampling rate may be different.
45+
*
46+
* This property only takes effect when [stackSamplingThresholdMillis] is set to a non-null
47+
* value to enable stack sampling.
48+
*
49+
* Defaults to 50
50+
*/
51+
var stackSamplingIntervalMillis: Long = DEFAULT_SAMPLING_INTERVAL,
2252
) {
2353
constructor() : this(DEFAULT_APP_HANG_THRESHOLD)
2454

2555
internal companion object {
2656
internal const val DEFAULT_APP_HANG_THRESHOLD: Long = 3000L
57+
internal const val DEFAULT_SAMPLING_INTERVAL: Long = 50L
2758
}
2859
}

bugsnag-plugin-android-apphang/src/main/java/com/bugsnag/android/BugsnagAppHangPlugin.kt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package com.bugsnag.android
22

33
import androidx.annotation.VisibleForTesting
44
import com.bugsnag.android.internal.LooperMonitorThread
5+
import com.bugsnag.android.internal.ThreadSampler
56

67
/**
78
* An alternative to Application Not Responding (ANR) reporting with configurable timeouts.
@@ -10,6 +11,8 @@ class BugsnagAppHangPlugin @JvmOverloads constructor(
1011
configuration: AppHangConfiguration = AppHangConfiguration()
1112
) : Plugin {
1213
private val appHangThresholdMillis = configuration.appHangThresholdMillis
14+
private val samplingThresholdMillis = configuration.stackSamplingThresholdMillis ?: 0
15+
private val samplingRateMillis = configuration.stackSamplingIntervalMillis
1316
private val watchedLooper = configuration.watchedLooper
1417

1518
private var client: Client? = null
@@ -37,7 +40,7 @@ class BugsnagAppHangPlugin @JvmOverloads constructor(
3740
client = null
3841
}
3942

40-
private fun reportAppHang(timeSinceLastHeartbeat: Long) {
43+
private fun reportAppHang(timeSinceLastHeartbeat: Long, sampler: ThreadSampler?) {
4144
val watchedThread = watchedLooper.thread
4245
val stackTrace = watchedThread.stackTrace
4346
val threadName = watchedThread.name
@@ -49,6 +52,7 @@ class BugsnagAppHangPlugin @JvmOverloads constructor(
4952
)
5053
) { event ->
5154
event.errors.firstOrNull()?.errorClass = "AppHang"
55+
sampler?.createError(event)
5256

5357
@Suppress("DEPRECATION")
5458
event.setErrorReportingThread(watchedThread.id)
@@ -65,6 +69,8 @@ class BugsnagAppHangPlugin @JvmOverloads constructor(
6569
monitorThread = LooperMonitorThread(
6670
watchedLooper,
6771
appHangThresholdMillis,
72+
if (samplingThresholdMillis in 1..appHangThresholdMillis) samplingThresholdMillis else 0,
73+
if (samplingRateMillis in 1..appHangThresholdMillis) samplingRateMillis else 0,
6874
this::reportAppHang
6975
)
7076

bugsnag-plugin-android-apphang/src/main/java/com/bugsnag/android/internal/LooperMonitorThread.kt

Lines changed: 71 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,27 @@ import android.os.SystemClock
66
import java.util.concurrent.TimeUnit
77
import java.util.concurrent.atomic.AtomicBoolean
88
import java.util.concurrent.locks.LockSupport
9+
import kotlin.compareTo
10+
import kotlin.text.compareTo
11+
import kotlin.text.get
12+
import kotlin.text.set
913

1014
internal class LooperMonitorThread(
1115
watchedLooper: Looper,
1216
private val appHangThresholdMillis: Long,
13-
private val onAppHangDetected: (timeSinceLastHeartbeat: Long) -> Unit
17+
private val samplingThresholdMillis: Long,
18+
private val samplingRateMillis: Long,
19+
private val onAppHangDetected: (timeSinceLastHeartbeat: Long, ThreadSampler?) -> Unit
1420
) : Thread("Bugsnag AppHang Monitor: ${watchedLooper.thread.name}") {
1521
private val handler: Handler = Handler(watchedLooper)
1622

23+
private val threadSampler: ThreadSampler? =
24+
if (samplingThresholdMillis > 0) ThreadSampler(watchedLooper.thread)
25+
else null
26+
27+
@Volatile
28+
private var lastStackSampleTimestamp = 0L
29+
1730
@Volatile
1831
private var lastHeartbeatTimestamp = 0L
1932

@@ -23,9 +36,6 @@ internal class LooperMonitorThread(
2336

2437
private val heartbeat: Runnable = Heartbeat()
2538

26-
private fun calculateTimeToAppHang(now: Long): Long =
27-
(lastHeartbeatTimestamp + appHangThresholdMillis) - now
28-
2939
fun startMonitoring() {
3040
if (isRunning.compareAndSet(false, true)) {
3141
start()
@@ -49,40 +59,87 @@ internal class LooperMonitorThread(
4959
}
5060

5161
isAppHangDetected = true
52-
onAppHangDetected(timeSinceLastHeartbeat)
62+
onAppHangDetected(timeSinceLastHeartbeat, threadSampler)
5363
}
5464

5565
override fun run() {
5666
handler.post(heartbeat)
5767

5868
while (isRunning.get()) {
59-
val waitThreshold =
60-
if (lastHeartbeatTimestamp <= 0L) appHangThresholdMillis
61-
else calculateTimeToAppHang(SystemClock.uptimeMillis())
69+
val now = SystemClock.uptimeMillis()
70+
val timeSinceHeartbeat = now - lastHeartbeatTimestamp
6271

63-
val waitThresholdNanos = TimeUnit.MILLISECONDS.toNanos(waitThreshold)
64-
LockSupport.parkNanos(waitThresholdNanos)
72+
// Wait until next sample time or hang detection time, whichever comes first
73+
val waitMillis = calculateNextWaitTime(now, timeSinceHeartbeat)
74+
LockSupport.parkNanos(TimeUnit.MILLISECONDS.toNanos(waitMillis))
6575

6676
if (!isRunning.get()) break
6777

68-
val timeSinceLastHeartbeat = SystemClock.uptimeMillis() - lastHeartbeatTimestamp
78+
val currentTime = SystemClock.uptimeMillis()
79+
val currentTimeSinceHeartbeat = currentTime - lastHeartbeatTimestamp
80+
81+
if (shouldTakeSample(currentTime, currentTimeSinceHeartbeat)) {
82+
threadSampler?.captureSample()
83+
lastStackSampleTimestamp = currentTime
84+
}
6985

70-
if (timeSinceLastHeartbeat >= appHangThresholdMillis) {
71-
reportAppHang(timeSinceLastHeartbeat)
86+
if (currentTimeSinceHeartbeat >= appHangThresholdMillis) {
87+
reportAppHang(currentTimeSinceHeartbeat)
7288
}
7389

7490
if (!handler.post(heartbeat)) {
75-
// handler.post returning false means the Looper has likely quit
7691
isRunning.set(false)
7792
}
7893
}
7994
}
8095

96+
private fun calculateNextWaitTime(now: Long, timeSinceHeartbeat: Long): Long {
97+
if (lastHeartbeatTimestamp <= 0L) return appHangThresholdMillis
98+
if (timeSinceHeartbeat >= appHangThresholdMillis) return Long.MAX_VALUE
99+
100+
val timeToHang = appHangThresholdMillis - timeSinceHeartbeat
101+
if (threadSampler == null) return timeToHang
102+
103+
return calculateTimeToNextStackSample(now, timeToHang, timeSinceHeartbeat)
104+
}
105+
106+
private fun calculateTimeToNextStackSample(
107+
now: Long,
108+
timeToHang: Long,
109+
timeSinceHeartbeat: Long
110+
): Long {
111+
return if (lastStackSampleTimestamp > 0L) {
112+
// Already sampling - wait for next sample
113+
val timeToNextSample = samplingRateMillis - (now - lastStackSampleTimestamp)
114+
minOf(timeToNextSample, timeToHang)
115+
} else {
116+
val timeToSamplingStart = samplingThresholdMillis - timeSinceHeartbeat
117+
minOf(timeToSamplingStart, timeToHang)
118+
}
119+
}
120+
121+
private fun shouldTakeSample(currentTime: Long, timeSinceHeartbeat: Long): Boolean {
122+
if (threadSampler == null) return false
123+
if (timeSinceHeartbeat < samplingThresholdMillis) return false
124+
125+
val timeSinceLastSample = if (lastStackSampleTimestamp <= 0L) {
126+
Long.MAX_VALUE
127+
} else {
128+
currentTime - lastStackSampleTimestamp
129+
}
130+
131+
return timeSinceLastSample >= samplingRateMillis
132+
}
133+
81134
private inner class Heartbeat : Runnable {
82135
override fun run() {
83136
lastHeartbeatTimestamp = SystemClock.uptimeMillis()
84137
isAppHangDetected = false
85138

139+
// Reset sampler when thread recovers
140+
threadSampler?.resetSampling()
141+
lastStackSampleTimestamp = 0L
142+
86143
resetHeartbeatTimer()
87144
}
88145

0 commit comments

Comments
 (0)