8.2. Functional Map¶
Map (convert) elements in sequence
Generator (lazy evaluated)
map(callable, *iterables)
required
callable
- Functionrequired
iterables
- 1 or many sequence or iterator objects
>>> from inspect import isgeneratorfunction, isgenerator
>>>
>>>
>>> isgeneratorfunction(map)
False
>>>
>>> result = map(float, [1,2,3])
>>> isgenerator(result)
False
8.2.1. Example¶
>>> result = (float(x) for x in range(0,5))
>>>
>>> list(result)
[0.0, 1.0, 2.0, 3.0, 4.0]
>>> result = map(float, range(0,5))
>>>
>>> list(result)
[0.0, 1.0, 2.0, 3.0, 4.0]
8.2.2. Problem¶
>>> data = [1, 2, 3]
>>> result = []
>>>
>>> for x in data:
... result.append(float(x))
>>>
>>> print(result)
[1.0, 2.0, 3.0]
8.2.3. Solution¶
>>> data = [1, 2, 3]
>>> result = map(float, data)
>>>
>>> list(result)
[1.0, 2.0, 3.0]
8.2.4. Lazy Evaluation¶
>>> data = [1, 2, 3]
>>> result = map(float, data)
>>>
>>> next(result)
1.0
>>> next(result)
2.0
>>> next(result)
3.0
>>> next(result)
Traceback (most recent call last):
StopIteration
8.2.5. Multi Parameters¶
>>> def myfunc(x):
... return sum(x)
>>>
>>>
>>> DATA = [
... (1,2),
... (3,4),
... ]
>>>
>>> result = map(myfunc, DATA)
>>> print(list(result))
[3, 7]
8.2.6. Starmap¶
>>> from itertools import starmap
>>>
>>>
>>> DATA = [
... (3.1415, 3),
... (2.71828, 2)]
>>>
>>> result = starmap(round, DATA) # round(number=3.1415, ndigits=2)
>>> print(list(result))
[3.142, 2.72]
8.2.7. Partial¶
>>> from functools import partial
>>>
>>>
>>> myround = partial(round, ndigits=1)
>>> DATA = [1.111, 2.222, 3.333]
>>>
>>> result = map(myround, DATA) # round(number=1.111, ndigits=1)
>>> print(list(result))
[1.1, 2.2, 3.3]
8.2.8. Performance¶
>>> def even(x):
... return x % 2 == 0
>>>
... %%timeit -r 1000 -n 1000
... result = [float(x) for x in data if even(x)]
1.9 µs ± 206 ns per loop (mean ± std. dev. of 1000 runs, 1,000 loops each)
>>>
... %%timeit -r 1000 -n 1000
... result = list(map(float, filter(parzysta, data)))
1.66 µs ± 175 ns per loop (mean ± std. dev. of 1000 runs, 1,000 loops each)
8.2.9. Use Case - 0x01¶
Built-in functions:
>>> DATA = [1, 2, 3]
>>> result = map(float, DATA)
>>>
>>> tuple(map(float, DATA))
(1.0, 2.0, 3.0)
>>> DATA = [1, 2, 3]
>>> result = map(float, DATA)
>>>
>>> set(map(float, DATA))
{1.0, 2.0, 3.0}
>>> DATA = [1, 2, 3]
>>> result = (float(x) for x in DATA)
>>>
>>> list(result)
[1.0, 2.0, 3.0]
>>> DATA = [1.1, 2.2, 3.3]
>>> result = map(round, DATA)
>>>
>>> list(result)
[1, 2, 3]
8.2.10. Use Case - 0x02¶
>>> def square(x):
... return x ** 2
>>>
>>>
>>> DATA = [1, 2, 3]
>>> result = map(square, DATA)
>>>
>>> list(result)
[1, 4, 9]
8.2.11. Use Case - 0x03¶
>>> def increment(x):
... return x + 1
>>>
>>>
>>> DATA = [1, 2, 3, 4]
>>> result = map(increment, DATA)
>>>
>>> list(result)
[2, 3, 4, 5]
8.2.12. Use Case - 0x04¶
>>> def translate(letter):
... return PL.get(letter, letter)
>>>
>>>
>>> DATA = 'zażółć gęślą jaźń'
>>> PL = {'ą': 'a', 'ć': 'c', 'ę': 'e',
... 'ł': 'l', 'ń': 'n', 'ó': 'o',
... 'ś': 's', 'ż': 'z', 'ź': 'z'}
>>>
>>> result = map(translate, DATA)
>>> ''.join(result)
'zazolc gesla jazn'
8.2.13. Use Case - 0x05¶
Standard input:
>>> import sys
>>>
>>>
... print(sum(map(int, sys.stdin)))
$ cat ~/.profile |grep addnum
alias addnum='python -c"import sys; print(sum(map(int, sys.stdin)))"'
8.2.14. Use Case - 0x06¶
>>> import httpx
>>>
>>> url = 'https://python3.info/_static/iris-dirty.csv'
>>>
>>> data = httpx.get(url).text
>>> header, *rows = data.splitlines()
>>> nrows, nfeatures, *class_labels = header.strip().split(',')
>>> label_encoder = dict(enumerate(class_labels))
>>> result = []
>>> for row in rows:
... *features, species = row.strip().split(',')
... features = map(float, features)
... species = label_encoder[int(species)]
... row = tuple(features) + (species,)
... result.append(row)
>>> def decode(row):
... *features, species = row.strip().split(',')
... features = map(float, features)
... species = label_encoder[int(species)]
... return tuple(features) + (species,)
>>>
>>> result = map(decode, rows)
>>> def decode(row):
... *features, species = row.strip().split(',')
... features = map(float, features)
... species = label_encoder[int(species)]
... return tuple(features) + (species,)
>>>
>>> with open('/tmp/myfile.csv') as file:
... header = file.readline()
... for line in map(decode, file):
... print(line)
8.2.15. Use Case - 0x07¶
SetUp:
>>> from doctest import testmod as run_tests
Data [1]:
>>> DATA = """150,4,setosa,versicolor,virginica
... 5.1,3.5,1.4,0.2,0
... 7.0,3.2,4.7,1.4,1
... 6.3,3.3,6.0,2.5,2
... 4.9,3.0,1.4,0.2,0
... 6.4,3.2,4.5,1.5,1
... 5.8,2.7,5.1,1.9,2"""
Definition:
>>> def get_labelencoder(header: str) -> dict[int, str]:
... """
... >>> get_labelencoder('150,4,setosa,versicolor,virginica')
... {0: 'setosa', 1: 'versicolor', 2: 'virginica'}
... """
... nrows, nfeatures, *class_labels = header.split(',')
... return dict(enumerate(class_labels))
>>>
>>> run_tests()
TestResults(failed=0, attempted=1)
>>> def get_data(line: str) -> tuple:
... """
... >>> convert('5.1,3.5,1.4,0.2,0')
... (5.1, 3.5, 1.4, 0.2, 'setosa')
... >>> convert('7.0,3.2,4.7,1.4,1')
... (7.0, 3.2, 4.7, 1.4, 'versicolor')
... >>> convert('6.3,3.3,6.0,2.5,2')
... (6.3, 3.3, 6.0, 2.5, 'virginica')
... """
... *values, species = line.split(',')
... values = map(float, values)
... species = label_encoder[int(species)]
... return tuple(values) + (species,)
>>>
>>> run_tests()
TestResults(failed=0, attempted=3)
>>> header, *lines = DATA.splitlines()
>>> label_encoder = get_labelencoder(header)
>>> result = map(get_data, lines)
>>> list(result)
[(5.1, 3.5, 1.4, 0.2, 'setosa'),
(7.0, 3.2, 4.7, 1.4, 'versicolor'),
(6.3, 3.3, 6.0, 2.5, 'virginica'),
(4.9, 3.0, 1.4, 0.2, 'setosa'),
(6.4, 3.2, 4.5, 1.5, 'versicolor'),
(5.8, 2.7, 5.1, 1.9, 'virginica')]
8.2.16. Use Case - 0x08¶
>>>
... import pandas as pd
...
...
... DATA = 'https://python3.info/_static/phones-pl.csv'
...
... result = (
... pd
... .read_csv(DATA, parse_dates=['datetime'])
... .set_index('datetime', drop=True)
... .drop(columns=['id'])
... .loc['2000-01-01':'2000-03-01']
... .query('item == "sms"')
... .groupby(['period','item'])
... .agg(
... duration_count = ('duration', 'count'),
... duration_sum = ('duration', 'sum'),
... duration_median = ('duration', 'median'),
... duration_mean = ('duration', 'mean'),
... duration_std = ('duration', 'std'),
... duration_var = ('duration', 'var'),
... value = ('duration', lambda column: column.mean().astype(int))
... )
... )