X-Git-Url: https://git.stderr.nl/gitweb?a=blobdiff_plain;f=interpreters%2Fglulxe%2Fprofile-analyze.py;h=ae2b2fb04c046b145ca546166662c583c1b9af71;hb=2ab029c7428eb3e4894e7d165815c90fbc39a8b2;hp=c7f7345ab26cc118949ffdb4f0d7ccbbb0ebf6fe;hpb=c4142afddd5b220686eb64c4bd346a722f9e20c1;p=projects%2Fchimara%2Fchimara.git diff --git a/interpreters/glulxe/profile-analyze.py b/interpreters/glulxe/profile-analyze.py index c7f7345..ae2b2fb 100644 --- a/interpreters/glulxe/profile-analyze.py +++ b/interpreters/glulxe/profile-analyze.py @@ -11,29 +11,36 @@ Optionally, this script can also read the debug output of the Inform 6 compiler (or the assembly output), and use that to figure out the names of all the functions that were profiled. +You can also generate profiling output in the same form as dumbfrotz's +Z-machine profiling output. (If that happens to be what you want.) Use +the --dumbfrotz argument. + Using this script is currently a nuisance. The requirements: - You must compile Glulxe with profiling (the VM_PROFILING compile-time option). - (If you want function names) you should compile your Inform 6 source using the -k switch. This generates a "gameinfo.dbg" file. -- Run Glulxe, play some of the game, and quit. This generates a data - file called "profile-raw". +- Run Glulxe, using the "--profile profile-raw" option. Play some of + the game, and quit. This generates a data file called "profile-raw". - Run this script, giving gameinfo.dbg and profile-raw as arguments. + (You can provide dispatch_dump.xml as an optional third argument. + This file gives the names of Glk functions; it is available from + https://github.com/erkyrath/glk-dev/tree/master/dispatch_dump .) To sum up, in command-line form: % inform -G -k game.inf -% glulxe game.ulx -% python profile-analyze.py profile-raw gameinfo.dbg +% glulxe --profile profile-raw game.ulx +% python profile-analyze.py profile-raw gameinfo.dbg dispatch_dump.xml You can also use the assembly output of the Inform compiler, which you get with the -a switch. Save the output and use it instead of the debug file: % inform -G -a game.inf > game.asm -% glulxe game.ulx -% python profile-analyze.py profile-raw game.asm +% glulxe --profile profile-raw game.ulx +% python profile-analyze.py profile-raw game.asm dispatch_dump.xml The limitations: @@ -50,10 +57,14 @@ you use that data, these will not be named; they will be listed as often the most costly ones. (Therefore, you'll almost certainly want to use -k.) +If you leave off the "dispatch_dump.xml" argument, everything will +still work, but @glk function entries will be listed by number rather +than by name. + You can explore the profiling data in more detail by running the script interactively: -% python -i profile-analyze.py profile-raw game.asm +% python -i profile-analyze.py profile-raw game.asm dispatch_dump.xml After it runs, you'll be left at a Python prompt. The environment will contain mappings called "functions" (mapping addresses to @@ -89,27 +100,35 @@ A Function object has lots of attributes: function). self_ops=INT: The number of opcodes executed during all calls to the function, excluding time spent in subcalls. + max_depth=INT: The deepest this function has been nested on the + stack, during any call. + max_stack_use=INT: The greatest number of words on the stack, during + any call. (This is measured when the function returns, so it may + not capture the peak stack usage. If a function never returns, e.g. + Main__(), then this value is approximate.) (The self_time is the "cost" used for the original listing.) Note that if a function does not make any function calls, total_time will be the same as self_time (and total_ops the same as self_ops). -Two special function entries may be included. The function with address -"1" (which is not a legal Glulx function address) represents time spent -in @glk opcode calls. This will typically have a large self_time, -because it includes all the time spent waiting for input. +Some of the function entries refer to special interpreter operations. +(These have high addresses, outside the range of normal game files.) +Functions with addresses in the 0xE0000000 range are the interpreter's +output opcodes: @streamchar, @streamunichar, @streamnum, @streamstr. -The function with address "2" represents the time spent printing string -data (the @streamchar, @streamunichar, @streamnum, and @streamstr -opcodes). +Functions with addresses in the 0xF0000000 range are @glk opcode calls. +The number in the lower bits specifies which Glk function was called. +You will always see a large self_time for function 0xF00000C0; this +represents all the time spent waiting for input in glk_select(). -(Both "1" and "2" represent time spent in the Glk library, but they -get there by different code paths.) +(Both the 0xE0000000 and 0xF0000000 entries represent time spent in the +Glk library, but they get there by different code paths.) -The function with the lowest address (ignoring "1" and "2") is the -top-level Main__() function generated by the compiler. Its total_time -is the running time of the entire program. +The function with the lowest address is the top-level Main__() +function generated by the compiler. Its total_time is the running time +of the entire program; its total_ops is the number of opcodes executed +by the entire program; its max_depth is zero. """ @@ -117,8 +136,14 @@ import sys, os.path import xml.sax from struct import unpack +dumb_frotz_mode = False + +if ('--dumbfrotz' in sys.argv): + sys.argv.remove('--dumbfrotz') + dumb_frotz_mode = True + if (len(sys.argv) < 2): - print "Usage: profile-analyze.py profile-raw [ gameinfo.dbg | game.asm ]" + print "Usage: profile-analyze.py [--dumbfrotz] profile-raw [ gameinfo.dbg | game.asm ] [ dispatch_dump.xml ]" sys.exit(1) profile_raw = sys.argv[1] @@ -133,10 +158,21 @@ if (len(sys.argv) >= 3): print 'File not readable:', game_asm sys.exit(1) +dispatch_dump = None +if (len(sys.argv) >= 4): + dispatch_dump = sys.argv[3] + if (not os.path.exists(dispatch_dump)): + print 'File not readable:', dispatch_dump + sys.exit(1) + special_functions = { - 1: 'glk', 2: 'streamout' + 0xE0000001: 'streamchar', + 0xE0000002: 'streamunichar', + 0xE0000003: 'streamnum', + 0xE0000004: 'streamstr', } -max_special_functions = max(special_functions.keys()) + +glk_functions = {} functions = None sourcemap = None @@ -146,7 +182,14 @@ class Function: self.addr = addr self.hexaddr = hexaddr val = special_functions.get(addr) - if (val is None): + if (addr >= 0xF0000000): + name = glk_functions.get(addr-0xF0000000) + if (not name): + name = hex(addr-0xF0000000)[2:] + name = '$' + name.replace('L', '') + self.name = '<@glk_' + name + '>' + self.special = True + elif (val is None): self.name = '' self.special = False else: @@ -161,6 +204,10 @@ class Function: self.total_time = float(attrs['total_time']) self.self_ops = int(attrs['self_ops']) self.self_time = float(attrs['self_time']) + if (attrs.has_key('max_depth')): + self.max_depth = int(attrs['max_depth']) + if (attrs.has_key('max_stack_use')): + self.max_stack_use = int(attrs['max_stack_use']) def __repr__(self): return '' @@ -174,6 +221,23 @@ class Function: print ' %.6f sec (%d ops) spent executing' % (self.self_time, self.self_ops) print ' %.6f sec (%d ops) including child calls' % (self.total_time, self.total_ops) + def dump_dumbfrotz_style(self): + percent1 = ' ' + percent2 = ' ' + pc1 = int(100*(float(self.self_ops)/float(ops_executed))) + pc2 = int(100*(float(self.total_ops)/float(ops_executed))) + if (pc1 > 0): + percent1 = "%3d%%" % pc1 + if (pc2 > 0): + percent2 = "%3d%%" % pc2 + print '%-36s %s %-10lu %s %-10lu %-10lu %-4d' % (self.name, percent1, self.self_ops, percent2, self.total_ops, self.call_count, self.max_depth) + +class DispatchDumpHandler(xml.sax.handler.ContentHandler): + def startElement(self, name, attrs): + if (name == 'function'): + addr = int(attrs['id']) + glk_functions[addr] = str(attrs['name']) + class ProfileRawHandler(xml.sax.handler.ContentHandler): def startElement(self, name, attrs): global functions @@ -304,7 +368,7 @@ class DebugFile: dat = fl.read(1) num = unpack('>B', dat)[0] name = self.read_string(fl) - self.arrays[num] = name + self.globals[num] = name def read_array_rec(self, fl): dat = fl.read(2) @@ -416,19 +480,25 @@ class DebugFile: # Begin the work +if (dispatch_dump): + xml.sax.parse(dispatch_dump, DispatchDumpHandler()) + xml.sax.parse(profile_raw, ProfileRawHandler()) source_start = min([ func.addr for func in functions.values() if not func.special ]) -print 'Code segment begins at', hex(source_start) -print len(functions), 'called functions found in', profile_raw +if (not dumb_frotz_mode): + print 'Code segment begins at', hex(source_start) + print len(functions), 'called functions found in', profile_raw if (game_asm): fl = open(game_asm, 'rb') val = fl.read(2) fl.close() - if (val == '\xde\xbf'): + if (not val): + pass + elif (val == '\xde\xbf'): fl = open(game_asm, 'rb') debugfile = DebugFile(fl) fl.close() @@ -454,8 +524,9 @@ if (sourcemap): func.name = funcname func.linenum = linenum - if (badls): - print len(badls), 'functions from', profile_raw, 'did not appear in asm (veneer functions)' + if (not dumb_frotz_mode): + if (badls): + print len(badls), 'functions from', profile_raw, 'did not appear in asm (veneer functions)' function_names = {} for func in functions.values(): @@ -463,11 +534,29 @@ if (sourcemap): if (sourcemap): uncalled_funcs = [ funcname for (addr, (linenum, funcname)) in sourcemap.items() if (addr+source_start) not in functions ] - print len(uncalled_funcs), 'functions found in', game_asm, 'were never called' - -print 'Functions that consumed the most time (excluding children):' -ls = functions.values() -ls.sort(lambda x1, x2: cmp(x2.self_time, x1.self_time)) -for func in ls[:10]: - func.dump() - + if (not dumb_frotz_mode): + print len(uncalled_funcs), 'functions found in', game_asm, 'were never called' + +if (dumb_frotz_mode): + ls = functions.values() + ls.sort(lambda x1, x2: cmp(x2.total_ops, x1.total_ops)) + ops_executed = 0 + routine_calls = 0 + max_stack_use = max([func.max_stack_use for func in ls]) + for func in ls: + if (func.total_ops > ops_executed): + ops_executed = func.total_ops + routine_calls = routine_calls + func.call_count + print 'Total opcodes: %lu' % ops_executed + print 'Total routine calls: %lu' % routine_calls + print 'Max. stack usage: %li' % max_stack_use + print '' + print '%-35s %-10s %-10s %-10s %-4s' % ('Routine', 'Ops', 'Ops(+Subs)', 'Calls', 'Nest') + for func in ls: + func.dump_dumbfrotz_style() +else: + print 'Functions that consumed the most time (excluding children):' + ls = functions.values() + ls.sort(lambda x1, x2: cmp(x2.self_time, x1.self_time)) + for func in ls[:10]: + func.dump()