Android 获取进程的 backtrace 信息

1. 使用kill 发送 SIGNAL_QUIT

这种方法只能用于zygote 的子进程 (比如所有的 app 进程, 都是由zygote fork 而来).

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# kill -3 pid
# cat /data/anr/traces.txt
...
suspend all histogram: Sum: 290us 99% C.I. 2us-40us Avg: 14.500us Max: 40us
DALVIK THREADS (12):
"Signal Catcher" daemon prio=5 tid=2 Runnable
| group="system" sCount=0 dsCount=0 obj=0x32c070a0 self=0xaecca000
| sysTid=1918 nice=0 cgrp=bg_non_interactive sched=0/0 handle=0xb4406930
| state=R schedstat=( 228351607 17443703 83 ) utm=12 stm=9 core=0 HZ=100
| stack=0xb430a000-0xb430c000 stackSize=1014KB
| held mutexes= "mutator lock"(shared held)
native: #00 pc 00370e01 /system/lib/libart.so (_ZN3art15DumpNativeStackERNSt3__113basic_ostreamIcNS0_11char_traitsIcEEEEiPKcPNS_9ArtMethodEPv+160)
native: #01 pc 0035046f /system/lib/libart.so (_ZNK3art6Thread4DumpERNSt3__113basic_ostreamIcNS1_11char_traitsIcEEEE+150)
native: #02 pc 0035a373 /system/lib/libart.so (_ZN3art14DumpCheckpoint3RunEPNS_6ThreadE+442)
native: #03 pc 0035af31 /system/lib/libart.so (_ZN3art10ThreadList13RunCheckpointEPNS_7ClosureE+212)
native: #04 pc 0035b45f /system/lib/libart.so (_ZN3art10ThreadList4DumpERNSt3__113basic_ostreamIcNS1_11char_traitsIcEEEE+142)
native: #05 pc 0035bb6f /system/lib/libart.so (_ZN3art10ThreadList14DumpForSigQuitERNSt3__113basic_ostreamIcNS1_11char_traitsIcEEEE+334)
native: #06 pc 00333cb7 /system/lib/libart.so (_ZN3art7Runtime14DumpForSigQuitERNSt3__113basic_ostreamIcNS1_11char_traitsIcEEEE+74)
native: #07 pc 0033b01d /system/lib/libart.so (_ZN3art13SignalCatcher13HandleSigQuitEv+928)
native: #08 pc 0033b901 /system/lib/libart.so (_ZN3art13SignalCatcher3RunEPv+340)
native: #09 pc 0003f45f /system/lib/libc.so (_ZL15__pthread_startPv+30)
native: #10 pc 00019b43 /system/lib/libc.so (__start_thread+6)
(no managed stack frames)
"main" prio=5 tid=1 Native
| group="main" sCount=1 dsCount=0 obj=0x74abb2a0 self=0xb4d76500
| sysTid=1914 nice=0 cgrp=bg_non_interactive sched=0/0 handle=0xb6f3fb34
| state=S schedstat=( 76934470 21396828 203 ) utm=3 stm=3 core=0 HZ=100
| stack=0xbe55e000-0xbe560000 stackSize=8MB
| held mutexes=
native: #00 pc 00040894 /system/lib/libc.so (__epoll_pwait+20)
native: #01 pc 00019e6f /system/lib/libc.so (epoll_pwait+26)
...

结果写在 /data/anr/traces.txt 文件中, anr 是 ANR(Application Not Response)的意思.

https://android.googlesource.com/platform/art/+/android-7.1.1_r13/runtime/runtime.cc

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
void Runtime::InitNonZygoteOrPostFork(
JNIEnv* env, bool is_system_server, NativeBridgeAction action, const char* isa) {
...
// Create the thread pools.
heap_->CreateThreadPool();
// Reset the gc performance data at zygote fork so that the GCs
// before fork aren't attributed to an app.
heap_->ResetGcPerformanceInfo();
if (!is_system_server &&
!safe_mode_ &&
(jit_options_->UseJitCompilation() || jit_options_->GetSaveProfilingInfo()) &&
jit_.get() == nullptr) {
// Note that when running ART standalone (not zygote, nor zygote fork),
// the jit may have already been created.
CreateJit();
}
StartSignalCatcher();
// Start the JDWP thread. If the command-line debugger flags specified "suspend=y",
// this will pause the runtime, so we probably want this to come last.
Dbg::StartJdwp();
}

可以看出非zygote的进程都会启动 Signal Catcher的 线程.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
root@shamu:/ # ps -t 2089
USER PID PPID VSIZE RSS WCHAN PC NAME
u0_a58 2089 386 1565280 53732 SyS_epoll_ b6cb0894 S com.hujiang.dict:pushservice
u0_a58 2092 2089 1565280 53732 do_sigtime b6cb0b68 S Signal Catcher
u0_a58 2095 2089 1565280 53732 unix_strea b6cb194c S JDWP
u0_a58 2096 2089 1565280 53732 futex_wait b6c875e8 S ReferenceQueueD
u0_a58 2097 2089 1565280 53732 futex_wait b6c875e8 S FinalizerDaemon
u0_a58 2099 2089 1565280 53732 futex_wait b6c875e8 S FinalizerWatchd
u0_a58 2100 2089 1565280 53732 futex_wait b6c875e8 S HeapTaskDaemon
u0_a58 2101 2089 1565280 53732 binder_thr b6cb09c0 S Binder_1
u0_a58 2102 2089 1565280 53732 binder_thr b6cb09c0 S Binder_2
u0_a58 2107 2089 1565280 53732 SyS_epoll_ b6cb0894 S Thread-123
u0_a58 2108 2089 1565280 53732 futex_wait b6c875e8 S taskService-pro
zygote 自己没有该线程.
root@shamu:/ # ps | grep -i zy
root 386 1 1528448 67416 poll_sched b6cb0a5c S zygote
127|root@shamu:/ # ps -t 386
USER PID PPID VSIZE RSS WCHAN PC NAME
root 386 1 1528448 67416 poll_sched b6cb0a5c S zygote
root 2203 386 1528448 67416 futex_wait b6c875e8 S ReferenceQueueD
root 2204 386 1528448 67416 futex_wait b6c875e8 S FinalizerDaemon
root 2205 386 1528448 67416 futex_wait b6c875e8 S FinalizerWatchd
root 2206 386 1528448 67416 futex_wait b6c875e8 S HeapTaskDaemon

https://android.googlesource.com/platform/art/+/android-7.1.1_r13/runtime/signal_catcher.cc

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
...
void SignalCatcher::Output(const std::string& s) {
if (stack_trace_file_.empty()) {
LOG(INFO) << s;
return;
}
...
void SignalCatcher::HandleSigQuit() {
Runtime* runtime = Runtime::Current();
std::ostringstream os;
os << "\n"
<< "----- pid " << getpid() << " at " << GetIsoDate() << " -----\n";
DumpCmdLine(os);
// Note: The strings "Build fingerprint:" and "ABI:" are chosen to match the format used by
// debuggerd. This allows, for example, the stack tool to work.
std::string fingerprint = runtime->GetFingerprint();
os << "Build fingerprint: '" << (fingerprint.empty() ? "unknown" : fingerprint) << "'\n";
os << "ABI: '" << GetInstructionSetString(runtime->GetInstructionSet()) << "'\n";
os << "Build type: " << (kIsDebugBuild ? "debug" : "optimized") << "\n";
runtime->DumpForSigQuit(os);
if ((false)) {
std::string maps;
if (ReadFileToString("/proc/self/maps", &maps)) {
os << "/proc/self/maps:\n" << maps;
}
}
os << "----- end " << getpid() << " -----\n";
Output(os.str());
}
...
while (true) {
int signal_number = signal_catcher->WaitForSignal(self, signals);
if (signal_catcher->ShouldHalt()) {
runtime->DetachCurrentThread();
return nullptr;
}
switch (signal_number) {
case SIGQUIT:
signal_catcher->HandleSigQuit();
break;
case SIGUSR1:
signal_catcher->HandleSigUsr1();
break;
default:
LOG(ERROR) << "Unexpected signal %d" << signal_number;
break;
}
}
...

从源码中发现除了SIGQUIT 还可以发送 SIGUSR1 , 这个信号可以使进程java 虚拟机执行GC
操作 kill -10 pid

1
2
3
4
void SignalCatcher::HandleSigUsr1() {
LOG(INFO) << "SIGUSR1 forcing GC (no HPROF)";
Runtime::Current()->GetHeap()->CollectGarbage(false);
}

2. 使用 debugged 命令行

这种方法是全系统通用的, 可以用于非zygote的进程.

1
2
Usage: -b [<tid>]
-b dump backtrace to console, otherwise dump full tombstone file

通过 -b 参数指定进程pid, 即可.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# debuggerd -b 23850
Sending request to dump task 23850.
----- pid 23850 at 1970-01-27 03:57:11 -----
Cmd line: /sbin/adbd
ABI: 'arm'
"adbd" sysTid=23850
#00 pc 0002b158 /sbin/adbd
#01 pc 0002467f /sbin/adbd
#02 pc 00020854 [stack]
"adbd" sysTid=23851
#00 pc 0002fd38 /sbin/adbd
#01 pc 0002a501 /sbin/adbd
#02 pc 0000000b <unknown>
"adbd" sysTid=23852
#00 pc 0002b624 /sbin/adbd
#01 pc 000106cf /sbin/adbd
#02 pc 00010301 /sbin/adbd
#03 pc 0002a613 /sbin/adbd
#04 pc 00030283 /sbin/adbd
"adbd" sysTid=23853
#00 pc 0002b628 /sbin/adbd
#01 pc 00013999 /sbin/adbd
#02 pc 000112ed /sbin/adbd
#03 pc 000104e1 /sbin/adbd
#04 pc 0002a613 /sbin/adbd
#05 pc 00030283 /sbin/adbd
"adbd" sysTid=23862
#00 pc 0002b888 /sbin/adbd
#01 pc 0000a503 /sbin/adbd
#02 pc 00009527 /sbin/adbd
#03 pc 0002a613 /sbin/adbd
#04 pc 00030283 /sbin/adbd
----- end 23850 -----
...

3. java 代码中打印调用栈

1
2
3
4
5
6
try {
...
} catch (RemoteException e) {
e.printStackTrace();
...
}

4. C++代码中打印调用栈

CallStack.cpp

1
2
3
4
5
6
7
8
9
#include <utils/CallStack.h>
int main() {
android::CallStack stack;
stack.update();
stack.dump(1);
return 0;
}

Android.mk

1
2
3
4
5
6
7
8
9
10
11
LOCAL_PATH:= $(call my-dir)
include $(CLEAR_VARS)
LOCAL_SRC_FILES:= CallStack.cpp
LOCAL_SHARED_LIBRARIES += libutils
LOCAL_LDLIBS += -ldl -lutils
LOCAL_CFLAGS := $(common_CFLAGS)
LOCAL_MODULE := CallStack
include $(BUILD_EXECUTABLE)

执行后显示类似下面的结果

1
2
3
4
5
root@shamu:/data/local/tmp # ./CallStack
#00 pc 000006d1 /data/local/tmp/CallStack
#01 pc 00017359 /system/lib/libc.so (__libc_init+44)
#02 pc 0000074c /data/local/tmp/CallStack