7.2. Micro-benchmarking

We should forget about small efficiencies, say about 97% of the time: premature optimization is the root of all evil -- Donald Knuth

7.2.1. Evaluation

  • Fresh start of Python process

  • Clean memory before start

  • Same data

  • Same start conditions, CPU load, RAM usage, iostat

  • Do not measure how long Python wakes up

  • Check what you measure

7.2.2. timeit

7.2.2.1. Programmatic use

Listing 7.15. Timeit simple statement
from timeit import timeit


setup = """name = 'José Jiménez'"""
stmt = """result = f'My name... {name}'"""

duration = timeit(stmt, setup, number=10000)

print(duration)
# 0.0005737080000000061
Listing 7.16. Timeit multiple statements with setup code
from timeit import timeit


setup = """
firstname = 'José'
lastname = 'Jiménez'
"""

TEST = dict()
TEST[0] = 'name = f"{firstname} {lastname}"'
TEST[1] = 'name = "{0} {1}".format(firstname, lastname)'
TEST[2] = 'name = firstname + " " + lastname'
TEST[3] = 'name = " ".join([firstname, lastname])'


for stmt in TEST.values():
    duration = timeit(stmt, setup, number=10000)
    print(f'{duration:.5}\t{stmt}')

# 0.00071559    name = f"{firstname} {lastname}"
# 0.0026514     name = "{0} {1}".format(firstname, lastname)
# 0.001015      name = firstname + " " + lastname
# 0.0013494     name = " ".join([firstname, lastname])
Listing 7.17. Timeit with globals()
from timeit import timeit


def factorial(n: int) -> int:
    if n == 0:
        return 1
    else:
        return n * factorial(n-1)


duration = timeit(
    stmt='factorial(500); factorial(400); factorial(450)',
    globals=globals(),
    number=10000,
)

duration = round(duration, 6)

print(f'factorial time: {duration} seconds')
# factorial time: 2.845382 seconds

7.2.2.2. Console use

Listing 7.18. Timeit
python3 -m timeit -n100000 -r100 --setup='name="Jose Jimenez"' 'output = f"My name... {name}"'
# 100000 loops, best of 100: 55.9 nsec per loop

python3 -m timeit -n100000 -r100 --setup='name="Jose Jimenez"' 'output = "My name... {name}".format(name=name)'
# 100000 loops, best of 100: 327 nsec per loop

python3 -m timeit -n100000 -r100 --setup='name="Jose Jimenez"' 'output = "My name... %s" % name'
# 100000 loops, best of 100: 124 nsec per loop
-n N, --number=N
how many times to execute ‘statement’

-r N, --repeat=N
how many times to repeat the timer (default 5)

-s S, --setup=S
statement to be executed once initially (default pass)

-p, --process
measure process time, not wallclock time, using time.process_time() instead of time.perf_counter(), which is the default

-u, --unit=U
specify a time unit for timer output; can select nsec, usec, msec, or sec

-v, --verbose
print raw timing results; repeat for more digits precision

-h, --help
print a short usage message and exit

7.2.3. String Concatenation

from time import time


class Timeit:
    def __init__(self, name):
        self.name = name

    def __enter__(self):
        self.start = time()
        return self

    def __exit__(self, *arg):
        end = time()
        print(f'Duration of {self.name} is {end-self.start:.2f} second')


a = 1
b = 2
repetitions = int(1e7)


with Timeit('f-string'):
    for _ in range(repetitions):
        f'{a}{b}'

with Timeit('string concat'):
    for _ in range(repetitions):
        a + b

with Timeit('str.format()'):
    for _ in range(repetitions):
        '{0}{1}'.format(a, b)

with Timeit('str.format()'):
    for _ in range(repetitions):
        '{}{}'.format(a, b)

with Timeit('str.format()'):
    for _ in range(repetitions):
        '{a}{b}'.format(a=a, b=b)

with Timeit('%-style'):
    for _ in range(repetitions):
        '%s%s' % (a, b)

with Timeit('%-style'):
    for _ in range(repetitions):
        '%d%d' % (a, b)

with Timeit('%-style'):
    for _ in range(repetitions):
        '%f%f' % (a, b)

# Duration of f-string is 2.70 second
# Duration of string concat is 0.68 second
# Duration of str.format() is 3.46 second
# Duration of str.format() is 3.37 second
# Duration of str.format() is 4.85 second
# Duration of %-style is 2.59 second
# Duration of %-style is 2.59 second
# Duration of %-style is 3.82 second

7.2.4. Case Studies - Unique Keys

  • Runtime: Jupyter %%timeit

Listing 7.19. Setup code used for all examples
DATA = [
    {'Sepal length': 5.1, 'Sepal width': 3.5, 'Species': 'setosa'},
    {'Petal length': 4.1, 'Petal width': 1.3, 'Species': 'versicolor'},
    {'Sepal length': 6.3, 'Petal width': 1.8, 'Species': 'virginica'},
    {'Sepal length': 5.0, 'Petal width': 0.2, 'Species': 'setosa'},
    {'Sepal width': 2.8, 'Petal length': 4.1, 'Species': 'versicolor'},
    {'Sepal width': 2.9, 'Petal width': 1.8, 'Species': 'virginica'},
]
Listing 7.20. Append if object not in the list
%%timeit -r 10 -n 1000000

fieldnames = list()

for row in DATA:
    for key in row.keys():
        if key not in fieldnames:
            fieldnames.append(key)

# 2.16 µs ± 26.5 ns per loop (mean ± std. dev. of 10 runs, 1000000 loops each)
Listing 7.21. Append to list and deduplicate at the end
%%timeit -r 10 -n 1000000

fieldnames = list()

for row in DATA:
    for key in row.keys():
        fieldnames.append(key)

set(fieldnames)

# 2.5 µs ± 32.9 ns per loop (mean ± std. dev. of 10 runs, 1000000 loops each)
Listing 7.22. Add to set
%%timeit -r 10 -n 1000000

fieldnames = set()

for row in DATA:
    for key in row.keys():
        fieldnames.add(key)

# 2.12 µs ± 32.4 ns per loop (mean ± std. dev. of 10 runs, 1000000 loops each)
Listing 7.23. Update set
%%timeit -r 10 -n 1000000

unique_keys = set()

for row in DATA:
    unique_keys.update(row.keys())

# 1.57 µs ± 26.7 ns per loop (mean ± std. dev. of 10 runs, 1000000 loops each)
Listing 7.24. Set Comprehension
%%timeit -r 10 -n 1000000

fieldnames = set(key
    for record in DATA
        for key in record.keys())

# 2.06 µs ± 79.7 ns per loop (mean ± std. dev. of 10 runs, 1000000 loops each)
Listing 7.25. Add to Set Comprehension. Code appends generator object not values, this is why it is so fast!
%%timeit -r 10 -n 1000000

fieldnames = set()

fieldnames.add(key
    for record in DATA
       for key in record.keys())

# 447 ns ± 9.52 ns per loop (mean ± std. dev. of 10 runs, 1000000 loops each)
Listing 7.26. Update Set Comprehension
%%timeit -r 10 -n 1000000

fieldnames = set()
fieldnames.update(tuple(x.keys()) for x in DATA)

# 2.06 µs ± 45.9 ns per loop (mean ± std. dev. of 10 runs, 1000000 loops each)
%%timeit -r 10 -n 1000000

unique_keys = set()

for row in DATA:
    unique_keys.update(tuple(row))

# 2.09 µs ± 16.1 ns per loop (mean ± std. dev. of 10 runs, 1000000 loops each)
%%timeit -r 10 -n 1000000

unique_keys = set()

for row in DATA:
    unique_keys.update(list(row))

# 2.33 µs ± 30.2 ns per loop (mean ± std. dev. of 10 runs, 1000000 loops each)
%%timeit -r 10 -n 1000000

unique_keys = set()

for row in DATA:
    unique_keys.update(set(row))

# 1.71 µs ± 54 ns per loop (mean ± std. dev. of 10 runs, 1000000 loops each)

7.2.5. Case Study - Factorial

Listing 7.27. Recap information about factorial (n!)
"""
5! = 5 * 4!
4! = 4 * 3!
3! = 3 * 2!
2! = 2 * 1!
1! = 1 * 0!
0! = 1
"""

factorial(5)                                    # = 120
    return 5 * factorial(4)                     # 5 * 24 = 120
        return 4 * factorial(3)                 # 4 * 6 = 24
            return 3 * factorial(2)             # 3 * 2 = 6
                return 2 * factorial(1)         # 2 * 1 = 2
                    return 1 * factorial(0)     # 1 * 1 = 1
                        return 1                # 1
Listing 7.28. Cache with global scope
_cache = {}

def cache(func):
    def wrapper(n):
        if n not in _cache:
            _cache[n] = func(n)
        return _cache[n]
    return wrapper


@cache
def factorial(n):
    if n == 0:
        return 1
    else:
        return n * factorial(n-1)


factorial(500)
factorial(400)
factorial(450)
Listing 7.29. Cache with local scope
def cache(func):
    _cache = {}
    def wrapper(n):
        if n not in _cache:
            _cache[n] = func(n)
        return _cache[n]
    return wrapper


@cache
def factorial(n):
    if n == 0:
        return 1
    else:
        return n * factorial(n-1)


factorial(500)
factorial(400)
factorial(450)
Listing 7.30. Cache with embedded scope
def cache(func):
    def wrapper(n):
        if n not in wrapper._cache:
            wrapper._cache[n] = func(n)
        return wrapper._cache[n]
    if not hasattr(wrapper, '_cache'):
        setattr(wrapper, '_cache', {})
    return wrapper


@cache
def factorial(n: int) -> int:
    if n == 0:
        return 1
    else:
        return n * factorial(n-1)


factorial(500)
factorial(400)
factorial(450)
Listing 7.31. Without cache
%%timeit

def factorial(n: int) -> int:
    if n == 0:
        return 1
    else:
        return n * factorial(n-1)


factorial(500)
factorial(400)
factorial(450)

# 283 µs ± 6.63 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
Listing 7.32. Cache contains
%%timeit

_cache = {}

def factorial(n: int) -> int:
    if n in _cache:
        return _cache[n]

    if n == 0:
        return 1
    else:
        result = _cache[n] = n * factorial(n-1)
        return result


factorial(500)
factorial(400)
factorial(450)

# 153 µs ± 2.49 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
Listing 7.33. Cache get
%%timeit

_cache = {}

def factorial(n: int) -> int:
    result = _cache.get(n)

    if result:
        return result

    if n == 0:
        return 1
    else:
        result = _cache[n] = n * factorial(n-1)
        return result


factorial(500)
factorial(400)
factorial(450)

# 181 µs ± 10.3 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
Listing 7.34. Cache contains with exceptions
%%timeit

_cache = {}

def factorial(n: int) -> int:
    if n == 0:
        return 1

    try:
        return _cache[n]
    except KeyError:
        _cache[n] = result = n * factorial(n-1)
        return result


factorial(500)
factorial(400)
factorial(450)

# 618 µs ± 6.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
Listing 7.35. Adding cache layer
%%timeit

_cache = {}

def fac(n: int) -> int:

    def factorial(n: int) -> int:
        if n == 0:
            return 1
        return n * factorial(n-1)

    if not n in _cache:
        _cache[n] = factorial(n)

    return _cache[n]


fac(500)
fac(400)
fac(450)

# 283 µs ± 6.44 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
Listing 7.36. Get from cache
%%timeit

_cache = {}

def factorial(n: int) -> int:
    if n == 0:
        return 1

    if n in _cache:
        return _cache[n]

    result = _cache[n] = n * factorial(n-1)
    return result


factorial(500)
factorial(400)
factorial(450)

# 153 µs ± 9.64 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
_cache = {}


def factorial(n):
    if n == 0:
        return 1

    if (result := _cache.get(n)):
        return result

    result = n * factorial(n-1)
    _cache[n] = result
    return result