📄 morebacktrace.c
字号:
assert(arch != NULL); assert(task != MACH_PORT_NULL); assert(threadState != NULL); assert( ((stackBottom == 0) && (stackBottom == stackTop)) || (stackBottom < stackTop) ); assert( (frameArrayCount == 0) || (frameArray != NULL) ); assert( frameCountPtr != NULL ); // Create the context, do the backtrace, and return the frame count. err = InitContext( &context, arch, task, threadState, MachReadBytes, NULL, stackBottom, stackTop, frameArray, frameArrayCount ); if (err == 0) { err = BacktraceCore(&context); } *frameCountPtr = context.frameCountOut; return err;}#pragma mark ***** CPU Specific#pragma mark - PowerPC/* PowerPC Stack Frame Basics -------------------------- Offset Size Purpose ------ ---- ------- low memory fp == sp == r1 -> 0 X pointer to next frame X X place to save CR 2X X place to save LR 3X 2X reserved 5X X place to save TOC (CFM only) high memory where X is the address size (4 bytes for 32-bits, 8 bytes for 64-bits) To get from one frame to the next, you have to indirect an offset of 0. To extract the PC from a frame (which, notably, is the address of the code running in that frame, not a return address), you have to indirect an offset of 2X bytes (8 or 16). There's enough commonality between 32- and 64-bit PowerPC architectures that it's easy to handle them both with the same code.*/static bool PowerPCIsSystemCall(MoreBTContext *context, MoreBTAddr pc) // Using the PC from the thread state, walk back through // the code stream for 3 instructions looking for a "sc" instruction. // If we find one, it's almost certain that we're in a system call // frameless leaf routine.{ int err; bool isSystemCall; int count; uint8_t inst[4]; isSystemCall = false; count = 0; do { err = context->readBytes(context, pc, &inst, sizeof(inst)); if (err == 0) { isSystemCall = (inst[0] == 0x44) // PPC "sc" instruction && (inst[1] == 0x00) // PPC instructions are always big && (inst[2] == 0x00) // endian, so we compare it byte at && (inst[3] == 0x02); // time for endian neutrality } if ( (err == 0) && ! isSystemCall ) { count += 1; pc -= sizeof(inst); } } while ( (err == 0) && ! isSystemCall && (count < 3) ); err = 0; return isSystemCall;}static int PowerPCHandleLeaf(MoreBTContext *context, MoreBTAddr *pcPtr, MoreBTAddr *framePtr) // This is the handleLeaf routine for the PowerPC // architecture. See the description of MoreBTHandleLeafProc // for a detailed discussion of its parameters. // // The top most frame may be in a weird state because of the // possible variations in the routine prologue. There are a // variety of combinations, such as: // // 1. a normal routine, with its return address stored in // its caller's stack frame // // 2. a system call routine, which is a leaf routine with // no frame and the return address is in LR // // 3. a leaf routine with no frame, where the return address // is in LR // // 4. a leaf routine with no frame that accesses a global, where // the return address is in r0 // // 5. a normal routine that was stopped midway through // constructing its prolog, where the return address is // typically in r0 // // Of these, 1 and 2 are most common, and they're the cases I // handle. General support for all of the cases requires the // ability to accurately determine the start of the routine // which is not something that I can do with my current // infrastructure. // // Note that don't handle any cases where the return address is // in r0, although r0 is available as part of the threadState // if I need it in the future.{ int err; MoreBTAddr pc; MoreBTAddr lr; // Get the pc and lr from the thread state. if (context->arch->is64Bit) { pc = ((const ppc_thread_state64_t *) context->threadState)->srr0; lr = ((const ppc_thread_state64_t *) context->threadState)->lr; } else { pc = ((const ppc_thread_state_t *) context->threadState)->srr0; lr = ((const ppc_thread_state_t *) context->threadState)->lr; } // If we find that we're in a system call frameless leaf routine, // add a dummy stack frame (with no frame, because the frame actually // belows to frameArray[1]). err = 0; if ( PowerPCIsSystemCall(context, pc) ) { AddFrame(context, pc, 0, kMoreBTFrameBadMask); pc = lr; } // Pass the initial pc and frame back to the caller. *pcPtr = pc; if (context->arch->is64Bit) { *framePtr = ((const ppc_thread_state64_t *) context->threadState)->r1; } else { *framePtr = ((const ppc_thread_state_t *) context->threadState)->r1; } return err;}static bool PowerPCValidPC(MoreBTContext *context, MoreBTAddr pc) // This is the validPC routine for the PowerPC // architecture. See the description of // MoreBTValidPCProc for a detailed discussion // of its parameters. // // PowerPC instructions must be word aligned. Also, I check that // it's possible to read the instruction. I don't do anything // clever like check that the resulting value is a valid instruction.{ uint32_t junkInst; return ((pc & 0x03) == 0) && (context->readBytes(context, pc, &junkInst, sizeof(junkInst)) == 0);}static int PowerPCGetFrameNextPC(MoreBTContext *context, MoreBTAddr thisFrame, MoreBTAddr nextFrame, MoreBTAddr *nextPCPtr) // This is the getFrameNextPC routine for the PowerPC // architecture. See the description of // MoreBTGetFrameNextPCProc for a detailed discussion // of its parameters.{ MoreBTAddr offset; if ( context->arch->is64Bit ) { offset = 16; } else { offset = 8; } return ReadAddr(context, nextFrame + offset, nextPCPtr);}/* PowerPC Signal Stack Frames --------------------------- In the current Mac OS X architecture, there is no guaranteed reliable way to backtrace a PowerPC signal stack frame. The problem is that the kernel pushes a variable amount of data on to the stack when it invokes the user space signal trampoline (_sigtramp), and the only handle to the information about how much data was pushed is passed in a register parameter to _sigtramp. _sigtramp stashes that value away in a non-volatile register. So, when _sigtramp calls the user-supplied signal handler, there's no way to work out where that register ends up being saved. Thus, we devolve into guesswork. It turns out that the offset from the stack of the kernel data to the information we need (the place where the interrupted thread's registers were stored) is a (relatively) constant for any given system release. So, we can just simply add the appropriate offset to the frame pointer and grab the data we need. On recent systems (10.3 and later) this fails if the signal handle requests 'dual contexts', that is, it requests both 32- and 64-bit PowerPC registers. In that case, the size of the pushed data changes, and that affects the relative alignment of the data and the stack pointer, and things break. I don't know of any way to work around this <rdar://problem/4411774>. Finally, these constant vary from release to release. This code handles the significant cases that I know about (Mac OS X 10.1.x and earlier, Mac OS X 10.2, and Mac OS 10.3 and later), but there's no guarantee that this offset won't change again in the future. When the kernel invokes the user space signal trampoline, it pushes the following items on to the stack. Mac OS X 10.1.x --------------- Size Purpose ---- ------- low memory 0x030 bytes for C linkage 0x040 bytes for saving PowerPC parameters 0x0c0 ppc_saved_state 0x110 ppc_float_state 0x018 struct sigcontext 0x0e0 red zone high memory The previous frame's SP is at offset 0x00C within ppc_saved_state, which makes it equal to 0x030 + 0x040 + 0x00C, or 0x07C. The offset to the previous PC (0x84) follows from that. Mac OS X 10.2.x --------------- Size Purpose ---- ------- low memory 0x030 bytes for C linkage 0x040 bytes for saving PowerPC parameters 0x008 alignment padding 0x408 struct mcontext, comprised of: 0x020 ppc_exception_state_t 0x0A0 ppc_thread_state_t 0x108 ppc_float_state_t 0x240 ppc_vector_state_t 0x040 siginfo_t 0x020 ucontext 0x0e0 red zone high memory The previous frame's SP is at offset 0x00C within ppc_thread_state_t, which it equal to 0x030 + 0x040 + 0x008 + 0x020 + 0x00C, or 0x0A4. The offsets to the previous PC and LR (0x98 and 0x128) follow from that. Mac OS X 10.3.x and 10.4.x -------------------------- Size, 32 Size, 64 Purpose -------- -------- ------- low memory align16 align32 alignment 0x030 0x030 bytes for C linkage 0x040 0x040 bytes for saving PowerPC parameters 0x008 0x018 alignment 0x040 0x068 siginfo_t, user_siginfo_t 0x020 0x038 ucontext64 0x408 [0x408] mcontext [0x498] 0x498 mcontext64 align16 align32 alignment 0x0e0 0x140 redzone high memory Some things to note about the above diagram: o The items in square brackets are only pushed if the signal handler requests dual contexts. o For a 64-bit process, the kernel aligns the stack to a 32 byte boundary, even though the runtime architecture only requires a 16 byte boundary. o The final alignment is done last, but the space that it creates is effectively created between the parameter save area and the [user_]siginfo_t because the C linkage area and param save areas are both defined to be a fixed offset from the frame pointer. On 32-bit, the previous PC is stored at offset 0x18 within the siginfo_t and the previous SP is stored at offset 0x024. So the total offset is 0x030 + 0x040 + 0x008 + 0x018/0x024, or 0x090 and 0x09C, respectively. On 64-bit, the previous PC is stored at offset 0x018 within the user_siginfo_t and the previous SP is stored at offset 0x030. So the total offset is 0x030 + 0x040 + 0x018 + 0x018/0x030, or 0x0A0 and 0x0B8, respectively. To get the previous LR (necessary for tracing through frameless leaf routines that are interrupted by a signal, most notably system calls), you have to delve even further up the stack, into the mcontext structures. I won't bore you with the details.*/static int PowerPCCrossSignalFrame(MoreBTContext *context, MoreBTAddr thisFrame, MoreBTAddr *nextPCPtr, MoreBTAddr *nextFramePtr) // This is the crossSignalFrame routine for the PowerPC // architecture. See the description of MoreBTCrossSignalFrameProc // for a detailed discussion of its parameters.{ int err; MoreBTAddr nextFrame; MoreBTAddr offsetToPC; MoreBTAddr offsetToFP; MoreBTAddr offsetToLR; assert(gOSRelease.major != 0); if (context->arch->is64Bit) { offsetToPC = 0xa0; offsetToFP = 0xb8; offsetToLR = 0x260; } else { if ( gOSRelease.major < 6 ) { // Darwin 6 == Mac OS X 10.2 // 10.0 through 10.1.x assert(false); // these values haven't been tested offsetToPC = 0x84; offsetToFP = 0x7c; // offsetToLR = ?; } else if ( gOSRelease.major < 7 ) { // Darwin 7 == Mac OS X 10.3 // Mac OS X 10.2.x assert(false); // these values haven't been tested offsetToPC = 0x98; offsetToFP = 0xa4; offsetToLR = 0x128; // What about G5 10.2.x systems? It's probably the same // as 10.3, but I'm not sure and, even if I was, I have no // idea how to detect such a system at runtime. } else { // Mac OS X 10.3 and later offsetToPC = 0x90; offsetToFP = 0x9c; offsetToLR = 0x188; } } // Read the address of the frame below the _sigtramp frame, because // that where all the action is. err = ReadAddr(context, thisFrame, &nextFrame); // Go grab the saved PC and SP. if (err == 0) { err = ReadAddr(context, nextFrame + offsetToPC, nextPCPtr); } if (err == 0) { err = ReadAddr(context, nextFrame + offsetToFP, nextFramePtr); } // If the PC is a system call, add a dummy leaf for that PC // and then get the next frame's PC from LR. if ( (err == 0) && PowerPCIsSystemCall(context, *nextPCPtr) ) { AddFrame(context, *nextPCPtr, 0, kMoreBTFrameBadMask); err = ReadAddr(context, nextFrame + offsetToLR, nextPCPtr); } return err;}#ifdef CPU_TYPE_X86#pragma mark - Intel/* Intel Stack Frame Basics ------------------------ Offset Size Purpose ------ ---- ------- low memory sp == esp -> -?? ?? general work area -?? ?? local variables fp == ebp -> 0 4 pointer to next frame 4 4 return address -?? ?? parameters high memory The stack frame on Intel is remarkably traditional. Two registers are used to manage the stack: esp points to the bottom of the stack, and ebp points to the stack frame itself. The memory at offset 0 stores the address of the next stack frame. The memory at offset 4 stores the saved PC for the next stack frame (that is, the return address for this stack frame).*/static bool IntelIsSystemCall(MoreBTContext *context, MoreBTAddr pc) // Using the PC from the thread state, look back in the code // stream to see if the previous bytes look something like a // system call. This is a heuristic rather than solid design. // Because Intel instructions are of variable length, there's no // guarantee that these bytes are part of some other instruction. // Still, it works most of the time. // // The instruction's were looking for are the two system call // primitives on Mac OS X: // // o INT 81 is used for Mach system calls // o sysenter is used by BSD system calls // // We detect INT 81 simply by looking for its bytes. It's no // so easy to detect sysenter, because the PC we get is an // address in the specific system call, which actually calls // another routine (_sysenter_trap) to do the sysenter. // We look for the CALL disp32 instruction and, if we see, // work out the address that it calls. We then get the // instructions from that address. If that looks like a // sysenter, we're probably looking at a system call.{ int err; bool isSystemCall; uint8_t buf[5]; uint32_t sysEnterOffset; isSystemCall = false; err = context->readBytes(context, pc - sizeof(buf), buf, sizeof(buf)); if (err == 0) { isSystemCall = ( buf[3] == 0xcd && buf[4] == 0x81); // INT 81 if ( ! isSystemCall && (buf[0] == 0xe8) ) { // CALL disp32 // Get the disp32. sysEnterOffset = (buf[1] | (buf[2] << 8) | (buf[3] << 16) | (buf[4] << 24));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -