Added 'profilehooks' profiling module
authorThierry Florac <tflorac@ulthar.net>
Wed, 02 Dec 2009 23:11:49 +0100
changeset 26 06aa7b6a131c
parent 25 7d4d896fd1ce
child 27 5581b89ad027
Added 'profilehooks' profiling module
ztfy/utils/profilehooks.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ztfy/utils/profilehooks.py	Wed Dec 02 23:11:49 2009 +0100
@@ -0,0 +1,599 @@
+"""
+Profiling hooks
+
+This module contains a couple of decorators (`profile` and `coverage`) that
+can be used to wrap functions and/or methods to produce profiles and line
+coverage reports.
+
+Usage example (Python 2.4 or newer):
+
+    from profilehooks import profile, coverage
+
+    @profile    # or @coverage
+    def fn(n):
+        if n < 2: return 1
+        else: return n * fn(n-1)
+
+    print fn(42)
+
+Usage example (Python 2.3 or older):
+
+    from profilehooks import profile, coverage
+
+    def fn(n):
+        if n < 2: return 1
+        else: return n * fn(n-1)
+
+    # Now wrap that function in a decorator
+    fn = profile(fn) # or coverage(fn)
+
+    print fn(42)
+
+Reports for all thusly decorated functions will be printed to sys.stdout
+on program termination.  You can alternatively request for immediate
+reports for each call by passing immediate=True to the profile decorator.
+
+There's also a @timecall decorator for printing the time to sys.stderr
+every time a function is called, when you just want to get a rough measure
+instead of a detailed (but costly) profile.
+
+Caveats
+
+  A thread on python-dev convinced me that hotshot produces bogus numbers.
+  See http://mail.python.org/pipermail/python-dev/2005-November/058264.html
+
+  I don't know what will happen if a decorated function will try to call
+  another decorated function.  All decorators probably need to explicitly
+  support nested profiling (currently TraceFuncCoverage is the only one that
+  supports this, while HotShotFuncProfile has support for recursive functions.)
+
+  Profiling with hotshot creates temporary files (*.prof and *.prof.pickle for
+  profiling, *.cprof for coverage) in the current directory.  These files are
+  not cleaned up.  (In fact, the *.pickle versions are intentionally written
+  out in case you want to look at the profiler results without having to parse
+  the big *.prof file with hotshot.stats.load, which takes ages.  Just unpickle
+  the file and you'll get a pstats object.)
+
+  Coverage analysis with hotshot seems to miss some executions resulting in
+  lower line counts and some lines errorneously marked as never executed.  For
+  this reason coverage analysis now uses trace.py which is slower, but more
+  accurate.
+
+  Decorating functions causes doctest.testmod() to ignore doctests in those
+  functions.
+
+Copyright (c) 2004--2006 Marius Gedminas <marius@pov.lt>
+
+Released under the MIT licence since December 2006:
+
+    Permission is hereby granted, free of charge, to any person obtaining a
+    copy of this software and associated documentation files (the "Software"),
+    to deal in the Software without restriction, including without limitation
+    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    and/or sell copies of the Software, and to permit persons to whom the
+    Software is furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+(Previously it was distributed under the GNU General Public Licence.)
+"""
+# $Id: profilehooks.py,v 1.2 2008/07/18 14:51:08 tflorac Exp $
+
+__author__ = "Marius Gedminas (marius@gedmin.as)"
+__copyright__ = "Copyright 2004-2006, Marius Gedminas"
+__license__ = "MIT"
+__version__ = "1.0"
+__date__ = "2006-12-06"
+
+
+import atexit
+import inspect
+import sys
+import re
+import cPickle as pickle
+
+# For profiling
+from profile import Profile
+import pstats
+
+# For hotshot profiling (inaccurate!)
+import hotshot
+import hotshot.stats
+
+# For trace.py coverage
+import trace
+
+# For hotshot coverage (inaccurate!; uses undocumented APIs; might break)
+import _hotshot
+import hotshot.log
+
+# For timecall
+import time
+
+
+def profile(fn=None, skip=0, filename=None, immediate=False):
+    """Mark `fn` for profiling.
+
+    If `immediate` is False, profiling results will be printed to sys.stdout on
+    program termination.  Otherwise results will be printed after each call.
+
+    If `skip` is > 0, first `skip` calls to `fn` will not be profiled.
+
+    If `filename` is specified, the profile stats will be pickled and stored in
+    a file.
+
+    Usage:
+
+        def fn(...):
+            ...
+        fn = profile(fn, skip=1)
+
+    If you are using Python 2.4, you should be able to use the decorator
+    syntax:
+
+        @profile(skip=3)
+        def fn(...):
+            ...
+
+    or just
+
+        @profile
+        def fn(...):
+            ...
+
+    """
+    if fn is None: # @profile() syntax -- we are a decorator maker
+        def decorator(fn):
+            return profile(fn, skip=skip, filename=filename,
+                           immediate=immediate)
+        return decorator
+    # @profile syntax -- we are a decorator.
+    fp = FuncProfile(fn, skip=skip, filename=filename, immediate=immediate)
+         # or HotShotFuncProfile
+    # We cannot return fp or fp.__call__ directly as that would break method
+    # definitions, instead we need to return a plain function.
+    def new_fn(*args, **kw):
+        return fp(*args, **kw)
+    new_fn.__doc__ = fn.__doc__
+    return new_fn
+
+
+def coverage(fn):
+    """Mark `fn` for line coverage analysis.
+
+    Results will be printed to sys.stdout on program termination.
+
+    Usage:
+
+        def fn(...):
+            ...
+        fn = coverage(fn)
+
+    If you are using Python 2.4, you should be able to use the decorator
+    syntax:
+
+        @coverage
+        def fn(...):
+            ...
+
+    """
+    fp = TraceFuncCoverage(fn) # or HotShotFuncCoverage
+    # We cannot return fp or fp.__call__ directly as that would break method
+    # definitions, instead we need to return a plain function.
+    def new_fn(*args, **kw):
+        return fp(*args, **kw)
+    new_fn.__doc__ = fn.__doc__
+    return new_fn
+
+
+def coverage_with_hotshot(fn):
+    """Mark `fn` for line coverage analysis.
+
+    Uses the 'hotshot' module for fast coverage analysis.
+
+    BUG: Produces inaccurate results.
+
+    See the docstring of `coverage` for usage examples.
+    """
+    fp = HotShotFuncCoverage(fn)
+    # We cannot return fp or fp.__call__ directly as that would break method
+    # definitions, instead we need to return a plain function.
+    def new_fn(*args, **kw):
+        return fp(*args, **kw)
+    new_fn.__doc__ = fn.__doc__
+    return new_fn
+
+
+class FuncProfile:
+    """Profiler for a function (uses profile)."""
+
+    # This flag is shared between all instances
+    in_profiler = False
+
+    def __init__(self, fn, skip=0, filename=None, immediate=False):
+        """Creates a profiler for a function.
+
+        Every profiler has its own log file (the name of which is derived from
+        the function name).
+
+        HotShotFuncProfile registers an atexit handler that prints profiling
+        information to sys.stderr when the program terminates.
+
+        The log file is not removed and remains there to clutter the current
+        working directory.
+        """
+        self.fn = fn
+        self.filename = filename
+        self.immediate = immediate
+        self.stats = pstats.Stats(Profile())
+        self.ncalls = 0
+        self.skip = skip
+        self.skipped = 0
+        atexit.register(self.atexit)
+
+    def __call__(self, *args, **kw):
+        """Profile a singe call to the function."""
+        self.ncalls += 1
+        if self.skip > 0:
+            self.skip -= 1
+            self.skipped += 1
+            return self.fn(*args, **kw)
+        if FuncProfile.in_profiler:
+            # handle recursive calls
+            return self.fn(*args, **kw)
+        # You cannot reuse the same profiler for many calls and accumulate
+        # stats that way.  :-/
+        profiler = Profile()
+        try:
+            FuncProfile.in_profiler = True
+            return profiler.runcall(self.fn, *args, **kw)
+        finally:
+            FuncProfile.in_profiler = False
+            self.stats.add(profiler)
+            if self.immediate:
+                self.print_stats()
+                self.reset_stats()
+
+    def print_stats(self):
+        """Print profile information to sys.stdout."""
+        funcname = self.fn.__name__
+        filename = self.fn.func_code.co_filename
+        lineno = self.fn.func_code.co_firstlineno
+        print
+        print "*** PROFILER RESULTS ***"
+        print "%s (%s:%s)" % (funcname, filename, lineno)
+        print "function called %d times" % self.ncalls,
+        if self.skipped:
+            print "(%d calls not profiled)" % self.skipped
+        else:
+            print
+        print
+        stats = self.stats
+        if self.filename:
+            pickle.dump(stats, file(self.filename, 'w'))
+        stats.strip_dirs()
+        stats.sort_stats('cumulative', 'time', 'calls')
+        stats.print_stats(40)
+
+    def reset_stats(self):
+        """Reset accumulated profiler statistics."""
+        self.stats = pstats.Stats(Profile())
+        self.ncalls = 0
+        self.skipped = 0
+
+    def atexit(self):
+        """Stop profiling and print profile information to sys.stdout.
+
+        This function is registered as an atexit hook.
+        """
+        if not self.immediate:
+            self.print_stats()
+
+
+class HotShotFuncProfile:
+    """Profiler for a function (uses hotshot)."""
+
+    # This flag is shared between all instances
+    in_profiler = False
+
+    def __init__(self, fn, skip=0, filename=None):
+        """Creates a profiler for a function.
+
+        Every profiler has its own log file (the name of which is derived from
+        the function name).
+
+        HotShotFuncProfile registers an atexit handler that prints profiling
+        information to sys.stderr when the program terminates.
+
+        The log file is not removed and remains there to clutter the current
+        working directory.
+        """
+        self.fn = fn
+        self.filename = filename
+        if self.filename:
+            self.logfilename = filename + ".raw"
+        else:
+            self.logfilename = fn.__name__ + ".prof"
+        self.profiler = hotshot.Profile(self.logfilename)
+        self.ncalls = 0
+        self.skip = skip
+        self.skipped = 0
+        atexit.register(self.atexit)
+
+    def __call__(self, *args, **kw):
+        """Profile a singe call to the function."""
+        self.ncalls += 1
+        if self.skip > 0:
+            self.skip -= 1
+            self.skipped += 1
+            return self.fn(*args, **kw)
+        if HotShotFuncProfile.in_profiler:
+            # handle recursive calls
+            return self.fn(*args, **kw)
+        try:
+            HotShotFuncProfile.in_profiler = True
+            return self.profiler.runcall(self.fn, *args, **kw)
+        finally:
+            HotShotFuncProfile.in_profiler = False
+
+    def atexit(self):
+        """Stop profiling and print profile information to sys.stderr.
+
+        This function is registered as an atexit hook.
+        """
+        self.profiler.close()
+        funcname = self.fn.__name__
+        filename = self.fn.func_code.co_filename
+        lineno = self.fn.func_code.co_firstlineno
+        print
+        print "*** PROFILER RESULTS ***"
+        print "%s (%s:%s)" % (funcname, filename, lineno)
+        print "function called %d times" % self.ncalls,
+        if self.skipped:
+            print "(%d calls not profiled)" % self.skipped
+        else:
+            print
+        print
+        stats = hotshot.stats.load(self.logfilename)
+        # hotshot.stats.load takes ages, and the .prof file eats megabytes, but
+        # a pickled stats object is small and fast
+        if self.filename:
+            pickle.dump(stats, file(self.filename, 'w'))
+        # it is best to pickle before strip_dirs
+        stats.strip_dirs()
+        stats.sort_stats('cumulative', 'time', 'calls')
+        stats.print_stats(200)
+
+
+class HotShotFuncCoverage:
+    """Coverage analysis for a function (uses _hotshot).
+
+    HotShot coverage is reportedly faster than trace.py, but it appears to
+    have problems with exceptions; also line counts in coverage reports
+    are generally lower from line counts produced by TraceFuncCoverage.
+    Is this my bug, or is it a problem with _hotshot?
+    """
+
+    def __init__(self, fn):
+        """Creates a profiler for a function.
+
+        Every profiler has its own log file (the name of which is derived from
+        the function name).
+
+        HotShotFuncCoverage registers an atexit handler that prints profiling
+        information to sys.stderr when the program terminates.
+
+        The log file is not removed and remains there to clutter the current
+        working directory.
+        """
+        self.fn = fn
+        self.logfilename = fn.__name__ + ".cprof"
+        self.profiler = _hotshot.coverage(self.logfilename)
+        self.ncalls = 0
+        atexit.register(self.atexit)
+
+    def __call__(self, *args, **kw):
+        """Profile a singe call to the function."""
+        self.ncalls += 1
+        return self.profiler.runcall(self.fn, args, kw)
+
+    def atexit(self):
+        """Stop profiling and print profile information to sys.stderr.
+
+        This function is registered as an atexit hook.
+        """
+        self.profiler.close()
+        funcname = self.fn.__name__
+        filename = self.fn.func_code.co_filename
+        lineno = self.fn.func_code.co_firstlineno
+        print
+        print "*** COVERAGE RESULTS ***"
+        print "%s (%s:%s)" % (funcname, filename, lineno)
+        print "function called %d times" % self.ncalls
+        print
+        fs = FuncSource(self.fn)
+        reader = hotshot.log.LogReader(self.logfilename)
+        for what, (filename, lineno, funcname), tdelta in reader:
+            if filename != fs.filename:
+                continue
+            if what == hotshot.log.LINE:
+                fs.mark(lineno)
+            if what == hotshot.log.ENTER:
+                # hotshot gives us the line number of the function definition
+                # and never gives us a LINE event for the first statement in
+                # a function, so if we didn't perform this mapping, the first
+                # statement would be marked as never executed
+                if lineno == fs.firstlineno:
+                    lineno = fs.firstcodelineno
+                fs.mark(lineno)
+        reader.close()
+        print fs
+
+
+class TraceFuncCoverage:
+    """Coverage analysis for a function (uses trace module).
+
+    HotShot coverage analysis is reportedly faster, but it appears to have
+    problems with exceptions.
+    """
+
+    # Shared between all instances so that nested calls work
+    tracer = trace.Trace(count=True, trace=False,
+                         ignoredirs=[sys.prefix, sys.exec_prefix])
+
+    # This flag is also shared between all instances
+    tracing = False
+
+    def __init__(self, fn):
+        """Creates a profiler for a function.
+
+        Every profiler has its own log file (the name of which is derived from
+        the function name).
+
+        TraceFuncCoverage registers an atexit handler that prints profiling
+        information to sys.stderr when the program terminates.
+
+        The log file is not removed and remains there to clutter the current
+        working directory.
+        """
+        self.fn = fn
+        self.logfilename = fn.__name__ + ".cprof"
+        self.ncalls = 0
+        atexit.register(self.atexit)
+
+    def __call__(self, *args, **kw):
+        """Profile a singe call to the function."""
+        self.ncalls += 1
+        if TraceFuncCoverage.tracing:
+            return self.fn(*args, **kw)
+        try:
+            TraceFuncCoverage.tracing = True
+            return self.tracer.runfunc(self.fn, *args, **kw)
+        finally:
+            TraceFuncCoverage.tracing = False
+
+    def atexit(self):
+        """Stop profiling and print profile information to sys.stderr.
+
+        This function is registered as an atexit hook.
+        """
+        funcname = self.fn.__name__
+        filename = self.fn.func_code.co_filename
+        lineno = self.fn.func_code.co_firstlineno
+        print
+        print "*** COVERAGE RESULTS ***"
+        print "%s (%s:%s)" % (funcname, filename, lineno)
+        print "function called %d times" % self.ncalls
+        print
+        fs = FuncSource(self.fn)
+        for (filename, lineno), count in self.tracer.counts.items():
+            if filename != fs.filename:
+                continue
+            fs.mark(lineno, count)
+        print fs
+        never_executed = fs.count_never_executed()
+        if never_executed:
+            print "%d lines were not executed." % never_executed
+
+
+class FuncSource:
+    """Source code annotator for a function."""
+
+    blank_rx = re.compile(r"^\s*finally:\s*(#.*)?$")
+
+    def __init__(self, fn):
+        self.fn = fn
+        self.filename = inspect.getsourcefile(fn)
+        self.source, self.firstlineno = inspect.getsourcelines(fn)
+        self.sourcelines = {}
+        self.firstcodelineno = self.firstlineno
+        self.find_source_lines()
+
+    def find_source_lines(self):
+        """Mark all executable source lines in fn as executed 0 times."""
+        strs = trace.find_strings(self.filename)
+        lines = trace.find_lines_from_code(self.fn.func_code, strs)
+        self.firstcodelineno = sys.maxint
+        for lineno in lines:
+            self.firstcodelineno = min(self.firstcodelineno, lineno)
+            self.sourcelines.setdefault(lineno, 0)
+        if self.firstcodelineno == sys.maxint:
+            self.firstcodelineno = self.firstlineno
+
+    def mark(self, lineno, count=1):
+        """Mark a given source line as executed count times.
+
+        Multiple calls to mark for the same lineno add up.
+        """
+        self.sourcelines[lineno] = self.sourcelines.get(lineno, 0) + count
+
+    def count_never_executed(self):
+        """Count statements that were never executed."""
+        lineno = self.firstlineno
+        counter = 0
+        for line in self.source:
+            if self.sourcelines.get(lineno) == 0:
+                if not self.blank_rx.match(line):
+                    counter += 1
+            lineno += 1
+        return counter
+
+    def __str__(self):
+        """Return annotated source code for the function."""
+        lines = []
+        lineno = self.firstlineno
+        for line in self.source:
+            counter = self.sourcelines.get(lineno)
+            if counter is None:
+                prefix = ' ' * 7
+            elif counter == 0:
+                if self.blank_rx.match(line):
+                    prefix = ' ' * 7
+                else:
+                    prefix = '>' * 6 + ' '
+            else:
+                prefix = '%5d: ' % counter
+            lines.append(prefix + line)
+            lineno += 1
+        return ''.join(lines)
+
+
+def timecall(fn):
+    """Wrap `fn` and print its execution time.
+
+    Example:
+
+        @timecall
+        def somefunc(x, y):
+            time.sleep(x * y)
+
+        somefunc(2, 3)
+
+    will print
+
+        somefunc: 6.0 seconds
+
+    """
+    def new_fn(*args, **kw):
+        try:
+            start = time.time()
+            return fn(*args, **kw)
+        finally:
+            duration = time.time() - start
+            funcname = fn.__name__
+            filename = fn.func_code.co_filename
+            lineno = fn.func_code.co_firstlineno
+            print >> sys.stderr, "\n  %s (%s:%s):\n    %.3f seconds\n" % (
+                                        funcname, filename, lineno, duration)
+    new_fn.__doc__ = fn.__doc__
+    return new_fn
+