diff options
-rwxr-xr-x | bin/win32kprof.py | 124 | ||||
-rw-r--r-- | src/gallium/auxiliary/util/p_debug_prof.c | 308 |
2 files changed, 233 insertions, 199 deletions
diff --git a/bin/win32kprof.py b/bin/win32kprof.py index 4ffa4cc694..c36317d23a 100755 --- a/bin/win32kprof.py +++ b/bin/win32kprof.py @@ -117,15 +117,7 @@ class Reader: self.symbols = [] self.symbol_cache = {} self.base_addr = None - self.last_stamp = 0 - self.stamp_base = 0 - def unwrap_stamp(self, stamp): - if stamp < self.last_stamp: - self.stamp_base += 1 << 32 - self.last_stamp = stamp - return self.stamp_base + stamp - def read_map(self, mapfile): # See http://msdn.microsoft.com/en-us/library/k7xkk3e2.aspx last_addr = 0 @@ -140,10 +132,6 @@ class Reader: continue section, offset = section_offset.split(':') addr = int(offset, 16) - if last_addr == addr: - # TODO: handle collapsed functions - #assert last_name == name - continue self.symbols.append((addr, name)) last_addr = addr last_name = name @@ -170,12 +158,12 @@ class Reader: e = i continue if addr == end_addr: - return next_name + return next_name, addr - start_addr if addr > end_addr: s = i continue return name, addr - start_addr - return "0x%08x" % addr, 0 + raise ValueError def lookup_symbol(self, name): for symbol_addr, symbol_name in self.symbols: @@ -187,96 +175,76 @@ class Reader: profile = Profile() fp = file(data, "rb") - entry_format = "II" + entry_format = "IIII" entry_size = struct.calcsize(entry_format) caller = None caller_stack = [] - last_stamp = 0 - delta = 0 while True: entry = fp.read(entry_size) if len(entry) < entry_size: break - addr_exit, stamp = struct.unpack(entry_format, entry) - if addr_exit == 0 and stamp == 0: - break - addr = addr_exit & 0xfffffffe - exit = addr_exit & 0x00000001 + caller_addr, callee_addr, samples_lo, samples_hi = struct.unpack(entry_format, entry) + if caller_addr == 0 and callee_addr == 0: + continue if self.base_addr is None: - ref_addr = self.lookup_symbol('___debug_profile_reference2@0') + ref_addr = self.lookup_symbol('___debug_profile_reference@0') if ref_addr: - self.base_addr = (addr - ref_addr) & ~(options.align - 1) + self.base_addr = (caller_addr - ref_addr) & ~(options.align - 1) else: self.base_addr = 0 - #print hex(self.base_addr) - rel_addr = addr - self.base_addr - #print hex(addr - self.base_addr) - - symbol, offset = self.lookup_addr(rel_addr) - stamp = self.unwrap_stamp(stamp) - delta = stamp - last_stamp - - if not exit: - if options.verbose >= 2: - sys.stderr.write("%08x >> 0x%08x\n" % (stamp, addr)) - if options.verbose: - sys.stderr.write("%+8u >> %s+%u\n" % (delta, symbol, offset)) + sys.stderr.write('Base addr: %08x\n' % self.base_addr) + + samples = (samples_hi << 32) | samples_lo + + try: + caller_raddr = caller_addr - self.base_addr + caller_sym, caller_ofs = self.lookup_addr(caller_raddr) + + try: + caller = profile.functions[caller_sym] + except KeyError: + caller_name = demangle(caller_sym) + caller = Function(caller_sym, caller_name) + profile.add_function(caller) + caller[CALLS] = 0 + caller[SAMPLES] = 0 + except ValueError: + caller = None + + if not callee_addr: + if caller: + caller[SAMPLES] += samples else: - if options.verbose >= 2: - sys.stderr.write("%08x << 0x%08x\n" % (stamp, addr)) - if options.verbose: - sys.stderr.write("%+8u << %s+%u\n" % (delta, symbol, offset)) - - # Eliminate outliers - if exit and delta > 0x1000000: - # TODO: Use a statistic test instead of a threshold - sys.stderr.write("warning: ignoring excessive delta of +%u in function %s\n" % (delta, symbol)) - delta = 0 - - # Remove overhead - # TODO: compute the overhead automatically - delta = max(0, delta - 84) - - if caller is not None: - caller[SAMPLES] += delta - - if not exit: - # Function call + callee_raddr = callee_addr - self.base_addr + callee_sym, callee_ofs = self.lookup_addr(callee_raddr) + try: - callee = profile.functions[symbol] + callee = profile.functions[callee_sym] except KeyError: - name = demangle(symbol) - callee = Function(symbol, name) + callee_name = demangle(callee_sym) + callee = Function(callee_sym, callee_name) profile.add_function(callee) - callee[CALLS] = 1 + callee[CALLS] = samples callee[SAMPLES] = 0 else: - callee[CALLS] += 1 + callee[CALLS] += samples if caller is not None: try: call = caller.calls[callee.id] except KeyError: call = Call(callee.id) - call[CALLS] = 1 + call[CALLS] = samples caller.add_call(call) else: - call[CALLS] += 1 - caller_stack.append(caller) - - caller = callee - - else: - # Function return - if caller is not None: - assert caller.id == symbol - try: - caller = caller_stack.pop() - except IndexError: - caller = None - - last_stamp = stamp + call[CALLS] += samples + + if options.verbose: + if not callee_addr: + sys.stderr.write('%s+%u: %u\n' % (caller_sym, caller_ofs, samples)) + else: + sys.stderr.write('%s+%u -> %s+%u: %u\n' % (caller_sym, caller_ofs, callee_sym, callee_ofs, samples)) # compute derived data profile.validate() diff --git a/src/gallium/auxiliary/util/p_debug_prof.c b/src/gallium/auxiliary/util/p_debug_prof.c index 69c914c780..5f9772ef91 100644 --- a/src/gallium/auxiliary/util/p_debug_prof.c +++ b/src/gallium/auxiliary/util/p_debug_prof.c @@ -46,97 +46,83 @@ #include "util/u_string.h" -#define PROFILE_FILE_SIZE 4*1024*1024 +#define PROFILE_TABLE_SIZE (1024*1024) #define FILE_NAME_SIZE 256 -static WCHAR wFileName[FILE_NAME_SIZE] = L"\\??\\c:\\00000000.trace"; -static ULONG_PTR iFile = 0; -static BYTE *pMap = NULL; -static BYTE *pMapBegin = NULL; -static BYTE *pMapEnd = NULL; +struct debug_profile_entry +{ + uintptr_t caller; + uintptr_t callee; + uint64_t samples; +}; +static unsigned long enabled = 0; -void __declspec(naked) __cdecl -debug_profile_close(void) -{ - _asm { - push eax - push ebx - push ecx - push edx - push ebp - push edi - push esi - } +static WCHAR wFileName[FILE_NAME_SIZE] = L"\\??\\c:\\00000000.prof"; +static ULONG_PTR iFile = 0; - if(iFile) { - EngUnmapFile(iFile); - /* Truncate the file */ - pMap = EngMapFile(wFileName, pMap - pMapBegin, &iFile); - if(pMap) - EngUnmapFile(iFile); - } - iFile = 0; - pMapBegin = pMapEnd = pMap = NULL; - - _asm { - pop esi - pop edi - pop ebp - pop edx - pop ecx - pop ebx - pop eax - ret - } -} +static struct debug_profile_entry *table = NULL; +static unsigned long free_table_entries = 0; +static unsigned long max_table_entries = 0; +uint64_t start_stamp = 0; +uint64_t end_stamp = 0; -void __declspec(naked) __cdecl -debug_profile_open(void) -{ - WCHAR *p; - - _asm { - push eax - push ebx - push ecx - push edx - push ebp - push edi - push esi - } - debug_profile_close(); +static void +debug_profile_entry(uintptr_t caller, uintptr_t callee, uint64_t samples) +{ + unsigned hash = ( caller + callee ) & PROFILE_TABLE_SIZE - 1; - // increment starting from the less significant digit - p = &wFileName[14]; while(1) { - if(*p == '9') { - *p-- = '0'; + if(table[hash].caller == 0 && table[hash].callee == 0) { + table[hash].caller = caller; + table[hash].callee = callee; + table[hash].samples = samples; + --free_table_entries; + break; } - else { - *p += 1; + else if(table[hash].caller == caller && table[hash].callee == callee) { + table[hash].samples += samples; break; } + else { + ++hash; + } } +} - pMap = EngMapFile(wFileName, PROFILE_FILE_SIZE, &iFile); - if(pMap) { - pMapBegin = pMap; - pMapEnd = pMap + PROFILE_FILE_SIZE; - } - - _asm { - pop esi - pop edi - pop ebp - pop edx - pop ecx - pop ebx - pop eax - ret - } + +static uintptr_t caller_stack[1024]; +static unsigned last_caller = 0; + + +static int64_t delta(void) { + int64_t result = end_stamp - start_stamp; + if(result > UINT64_C(0xffffffff)) + result = 0; + return result; +} + + +static void __cdecl +debug_profile_enter(uintptr_t callee) +{ + uintptr_t caller = last_caller ? caller_stack[last_caller - 1] : 0; + + if (caller) + debug_profile_entry(caller, 0, delta()); + debug_profile_entry(caller, callee, 1); + caller_stack[last_caller++] = callee; +} + + +static void __cdecl +debug_profile_exit(uintptr_t callee) +{ + debug_profile_entry(callee, 0, delta()); + if(last_caller) + --last_caller; } @@ -148,31 +134,49 @@ debug_profile_open(void) void __declspec(naked) __cdecl _penter(void) { _asm { - push ebx -retry: - mov ebx, [pMap] - test ebx, ebx - jz done - cmp ebx, [pMapEnd] - jne ready - call debug_profile_open - jmp retry -ready: push eax + mov eax, [enabled] + test eax, eax + jz skip + push edx - mov eax, [esp+12] - and eax, 0xfffffffe - mov [ebx], eax - add ebx, 4 + rdtsc - mov [ebx], eax - add ebx, 4 - mov [pMap], ebx + mov dword ptr [end_stamp], eax + mov dword ptr [end_stamp+4], edx + + xor eax, eax + mov [enabled], eax + + mov eax, [esp+8] + + push ebx + push ecx + push ebp + push edi + push esi + + push eax + call debug_profile_enter + add esp, 4 + + pop esi + pop edi + pop ebp + pop ecx + pop ebx + + mov eax, 1 + mov [enabled], eax + + rdtsc + mov dword ptr [start_stamp], eax + mov dword ptr [start_stamp+4], edx + pop edx +skip: pop eax -done: - pop ebx - ret + ret } } @@ -185,30 +189,48 @@ done: void __declspec(naked) __cdecl _pexit(void) { _asm { - push ebx -retry: - mov ebx, [pMap] - test ebx, ebx - jz done - cmp ebx, [pMapEnd] - jne ready - call debug_profile_open - jmp retry -ready: push eax + mov eax, [enabled] + test eax, eax + jz skip + push edx - mov eax, [esp+12] - or eax, 0x00000001 - mov [ebx], eax - add ebx, 4 + rdtsc - mov [ebx], eax - add ebx, 4 - mov [pMap], ebx + mov dword ptr [end_stamp], eax + mov dword ptr [end_stamp+4], edx + + xor eax, eax + mov [enabled], eax + + mov eax, [esp+8] + + push ebx + push ecx + push ebp + push edi + push esi + + push eax + call debug_profile_exit + add esp, 4 + + pop esi + pop edi + pop ebp + pop ecx + pop ebx + + mov eax, 1 + mov [enabled], eax + + rdtsc + mov dword ptr [start_stamp], eax + mov dword ptr [start_stamp+4], edx + pop edx +skip: pop eax -done: - pop ebx ret } } @@ -230,15 +252,53 @@ __debug_profile_reference(void) { void debug_profile_start(void) { - debug_profile_open(); - - if(pMap) { + WCHAR *p; + + // increment starting from the less significant digit + p = &wFileName[14]; + while(1) { + if(*p == '9') { + *p-- = '0'; + } + else { + *p += 1; + break; + } + } + + table = EngMapFile(wFileName, + PROFILE_TABLE_SIZE*sizeof(struct debug_profile_entry), + &iFile); + if(table) { unsigned i; + + free_table_entries = max_table_entries = PROFILE_TABLE_SIZE; + memset(table, 0, PROFILE_TABLE_SIZE*sizeof(struct debug_profile_entry)); + + table[0].caller = (uintptr_t)&__debug_profile_reference; + table[0].callee = 0; + table[0].samples = 0; + --free_table_entries; + + _asm { + push edx + push eax + + rdtsc + mov dword ptr [start_stamp], eax + mov dword ptr [start_stamp+4], edx + + pop edx + pop eax + } + + last_caller = 0; + + enabled = 1; + for(i = 0; i < 8; ++i) { _asm { - call _penter call __debug_profile_reference - call _pexit } } } @@ -248,7 +308,13 @@ debug_profile_start(void) void debug_profile_stop(void) { - debug_profile_close(); + enabled = 0; + + if(iFile) + EngUnmapFile(iFile); + iFile = 0; + table = NULL; + free_table_entries = max_table_entries = 0; } #endif /* PROFILE */ |