#!/bin/rc # Dave Eckhardt, 2009-04-12 rfork ne fn usage { msg='usage: '^`{basename $0}^' [-b] executable-file [object-file|library...]' echo $msg echo ' -b includes BSS (present in memory but absent from executable file)' echo ' -f report blame by file, not by symbol (you must provide objs/libs)' exit $msg } fn missing { msg=$1^': not found'; echo $msg; exit $msg } ~ $#* 0 && usage # Plan # # Parse command line into flags, objects, libraries, and one executable. # Inventory objects and libraries into a symbol->origin hash. # For each executable symbol, compute size and origin, building blame hash. # Print blame hash. libs=() ; objs = () ; execf = () for (arg) { switch ($arg) { case -b dobss=1 case -f blamefiles=1 case -* usage case *.a test -r $arg || missing $arg libs=($libs $arg) case *.? *.a? test -r $arg || missing $arg objs=($objs $arg) case * test -r $arg || missing $arg execf=($execf $arg) } } # Exactly one executable file. ~ $#execf 1 || usage # If we have only one intermediate file, nm will helpfully # omit to print the file name in its output. So double it. ~ $#objs 1 && objs=($objs $objs) BSSawk='BEGIN { dobss = 0 }' ~ $#dobss 1 && BSSawk='BEGIN { dobss = 1}' BLAMEawk='BEGIN { blamefiles = 0 }' ~ $#blamefiles 1 && BLAMEawk='BEGIN { blamefiles = 1 }' # Ok, enough meditation. Let's get to work. sizeoutput=(`{size $execf}) size=$sizeoutput(7) # We run nm manually multiple times so we can decorate # member names with the name of the library. Note that # we trim out symbol references here via grep to save awk # some work. Also, the order of these steps matters: the # executable must be last and sorted numerically. { for (lib in $libs) { # Instead of saying that chatty9p comes from "thread.8", # rewrite to say it comes from "lib9p(thread.8)". b=`{basename $lib | sed -e 's/\.a//'} nm $lib | sed -e 's/(^[^:]*):/'^$b'(\1):/' } ~ $#objs 0 || nm $objs nm -n $execf } | grep -v ' U ' | \ \ awk 'BEGIN { totalsize = sprintf("%x",'^$size^') }' ^ ' ' ^ $"BSSawk ^ $"BLAMEawk ^ ' BEGIN { for (i=0; i<16; i++) _unhex[sprintf("%x", i)] = _unhex[sprintf("%X", i)] = i got1origin = 0; } function unhex(s, i, v) { v = 0 for (i=1; i <= length(s); i++) v = v*16 + _unhex[substr(s,i,1)] return v } function originates(sym, file) { origin[sym] = file; got1origin = 1; } function entity(sym, start, end, kind, mysize, myfile) { # Ignore "small" things - ints in header files are "defined" multiply, but are only noise for our purposes if ((mysize = unhex(end) - unhex(start)) <= 64) return; if (!dobss && (kind == "b" || kind == "B")) return; if ((myfile = origin[sym]) == "") { myfile = "???" } if (blamefiles) { blame[myfile] += mysize } else { if (got1origin) print mysize, sym, myfile; else print mysize, sym; } } # First we expect to see lines like this: # xalloc.8: T xsummary # We key off the colon. (($1 ~ /^..*:$/) && (NF == 3)) { originates($3, substr($1,1,length($1)-1)); } # Then we expect lines like this: # f0100020 T _startKADDR (($1 ~ /^[a-z0-9][a-z0-9]*$/) && (NF == 3)) { if (!donefirst) { # "fill pipe" oldhex = $1; oldkind = $2; oldsymbol = $3; donefirst = 1; } else { entity(oldsymbol, oldhex, $1, oldkind); oldhex = $1; oldkind = $2; oldsymbol = $3; } } END { # first flush last symbol from pipe entity(oldsymbol, oldhex, totalsize, oldkind); if (blamefiles) { for (file in blame) { print blame[file], file; } } } ' # awk code for unhex due to Russ Cox exit ''