3.3. Serialization Pickle

3.3.1. What is pickle?

  • Python object serialization format

  • pickle vs. cPickle

3.3.2. Serialize data types

3.3.2.1. Dump to string

import pickle


pickle.dumps('Jan Twardowski')
# b'\x80\x03X\x0e\x00\x00\x00Jan Twardowskiq\x00.'

pickle.dumps(1)
# b'\x80\x03K\x01.'

pickle.dumps(1.0)
# b'\x80\x03G?\xf0\x00\x00\x00\x00\x00\x00.'

pickle.dumps(1.2)
# b'\x80\x03G?\xf3333333.'

pickle.dumps(1.5)
# b'\x80\x03G?\xf8\x00\x00\x00\x00\x00\x00.'

3.3.2.2. Load from string

import pickle


pickle.loads(b'\x80\x03X\x0e\x00\x00\x00Jan Twardowskiq\x00.')
# 'Jan Twardowski'

pickle.loads(b'\x80\x03K\x01.')
# 1

pickle.loads(b'\x80\x03G?\xf0\x00\x00\x00\x00\x00\x00.')
# 1.0

pickle.loads(b'\x80\x03G?\xf3333333.')
# 1.2

pickle.loads(b'\x80\x03G?\xf8\x00\x00\x00\x00\x00\x00.')
# 1.5

3.3.3. Serialize sequences

3.3.3.1. Dump to string

import pickle


pickle.dumps([1, 2, 3])
# b'\x80\x03]q\x00(K\x01K\x02K\x03e.

pickle.dumps((1, 2, 3))
# b'\x80\x03K\x01K\x02K\x03\x87q\x00.'

pickle.dumps({1, 2, 3})
# b'\x80\x03cbuiltins\nset\nq\x00]q\x01(K\x01K\x02K\x03e\x85q\x02Rq\x03.'

pickle.dumps({'a': 1, 'b': 2, 'c': 3})
# b'\x80\x03}q\x00(X\x01\x00\x00\x00aq\x01K\x01X\x01\x00\x00\x00bq\x02K\x02X\x01\x00\x00\x00cq\x03K\x03u.'

3.3.3.2. Load from string

import pickle


pickle.loads(b'\x80\x03]q\x00(K\x01K\x02K\x03e.)
# [1, 2, 3]

pickle.loads(b'\x80\x03K\x01K\x02K\x03\x87q\x00.')
# (1, 2, 3)

pickle.loads(b'\x80\x03cbuiltins\nset\nq\x00]q\x01(K\x01K\x02K\x03e\x85q\x02Rq\x03.')
# {1, 2, 3}

pickle.loads(b'\x80\x03}q\x00(X\x01\x00\x00\x00aq\x01K\x01X\x01\x00\x00\x00bq\x02K\x02X\x01\x00\x00\x00cq\x03K\x03u.')
# {'a': 1, 'b': 2, 'c': 3}

3.3.4. Serialize Dates and Datetimes

import pickle


dt = datetime(1969, 7, 21, 2, 56, 15)
pickle.dumps(dt)
# b'\x80\x03cdatetime\ndatetime\nq\x00C\n\x07\xb1\x07\x15\x028\x0f\x00\x00\x00q\x01\x85q\x02Rq\x03.'
import pickle


pickle.loads(b'\x80\x03cdatetime\ndatetime\nq\x00C\n\x07\xb1\x07\x15\x028\x0f\x00\x00\x00q\x01\x85q\x02Rq\x03.')
# datetime.datetime(1969, 7, 21, 2, 56, 15)

3.3.5. Serialize and deserialize objects

import pickle


class Astronaut:
    def __init__(self, firstname, lastname):
        self.firstname = firstname
        self.lastname = lastname

jan = Astronaut('Jan', 'Twardowski')

pickle.dumps(jan)
# b'\x80\x03c__main__\nAstronaut\nq\x00)\x81q\x01}q\x02(X\n\x00\x00\x00firstnameq\x03X\x03\x00\x00\x00Janq\x04X\t\x00\x00\x00lastnameq\x05X\n\x00\x00\x00Twardowskiq\x06ub.'

pickle.loads(b'\x80\x03c__main__\nAstronaut\nq\x00)\x81q\x01}q\x02(X\n\x00\x00\x00firstnameq\x03X\x03\x00\x00\x00Janq\x04X\t\x00\x00\x00lastnameq\x05X\n\x00\x00\x00Twardowskiq\x06ub.')
# <__main__.Astronaut object at 0x10585f8d0>

3.3.6. Serialize and deserialize to file

  • File extension pkl

3.3.6.1. Dump to file

Dump to file:

import pickle


DATA = [1, 2, 3]

with open('filename.pkl', mode='wb') as file:
    pickle.dump(DATA, file)

3.3.6.2. Load from file

Load from file:

import pickle


with open('filename.pkl', mode='rb') as file:
    result = pickle.load(file)

print(result)

3.3.7. Examples

Advanced Example:

import pickle
from datetime import datetime, timezone, timedelta


def month_ago(dt):
    return dt - timedelta(days=30)


class Astronaut:
    agency = 'NASA'

    def __init__(self, name):
        self.name = name


jose = Astronaut(name='José Jiménez')
now = datetime.now(tz=timezone.utc)


DATA = [
    jose,
    Astronaut,
    month_ago(now),
    str(now),
    now.__str__(),
    '{}'.format(now),
    f'{now}',
    {'imie': 'Иван', 'nazwisko': 'Иванович'},
    {10, 20, 30},
    (1,),
    10,
    10.5,
]

pickle.dumps(DATA)
# b'\x80\x03]q\x00(c__main__\nAstronaut\nq\x01)\x81q\x02}q\x03X\x04\x00\x00\x00nameq\x04X\x0c\x00\x00\x00Jose Jimenezq\x05sbh\x01cdatetime\ndatetime\nq\x06C\n\x07\xe2\t\x0b\r\n\x05\x04\xa9\xfdq\x07cdatetime\ntimezone\nq\x08cdatetime\ntimedelta\nq\tK\x00K\x00K\x00\x87q\nRq\x0b\x85q\x0cRq\r\x86q\x0eRq\x0fX \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x10X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x11X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x12X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x13}q\x14(X\x04\x00\x00\x00imieq\x15X\x08\x00\x00\x00\xd0\x98\xd0\xb2\xd0\xb0\xd0\xbdq\x16X\x08\x00\x00\x00nazwiskoq\x17X\x10\x00\x00\x00\xd0\x98\xd0\xb2\xd0\xb0\xd0\xbd\xd0\xbe\xd0\xb2\xd0\xb8\xd1\x87q\x18ucbuiltins\nset\nq\x19]q\x1a(K\nK\x14K\x1ee\x85q\x1bRq\x1cK\x01\x85q\x1dK\nG@%\x00\x00\x00\x00\x00\x00e.'

pickle.loads(b'\x80\x03]q\x00(c__main__\nAstronaut\nq\x01)\x81q\x02}q\x03X\x04\x00\x00\x00nameq\x04X\x0c\x00\x00\x00Jose Jimenezq\x05sbh\x01cdatetime\ndatetime\nq\x06C\n\x07\xe2\t\x0b\r\n\x05\x04\xa9\xfdq\x07cdatetime\ntimezone\nq\x08cdatetime\ntimedelta\nq\tK\x00K\x00K\x00\x87q\nRq\x0b\x85q\x0cRq\r\x86q\x0eRq\x0fX \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x10X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x11X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x12X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x13}q\x14(X\x04\x00\x00\x00imieq\x15X\x08\x00\x00\x00\xd0\x98\xd0\xb2\xd0\xb0\xd0\xbdq\x16X\x08\x00\x00\x00nazwiskoq\x17X\x10\x00\x00\x00\xd0\x98\xd0\xb2\xd0\xb0\xd0\xbd\xd0\xbe\xd0\xb2\xd0\xb8\xd1\x87q\x18ucbuiltins\nset\nq\x19]q\x1a(K\nK\x14K\x1ee\x85q\x1bRq\x1cK\x01\x85q\x1dK\nG@%\x00\x00\x00\x00\x00\x00e.')
# [
#   <__main__.Astronaut object at 0x10585f850>,
#   <class '__main__.Astronaut'>,
#   datetime.datetime(2018, 9, 11, 13, 10, 5, 305661, tzinfo=datetime.timezone.utc),
#   '2018-10-11 13:10:05.305661+00:00',
#   '2018-10-11 13:10:05.305661+00:00',
#   '2018-10-11 13:10:05.305661+00:00',
#   '2018-10-11 13:10:05.305661+00:00',
#   {'imie': 'Иван', 'nazwisko': 'Иванович'},
#   {10, 20, 30},
#   (1,),
#   10,
#   10.5
# ]

3.3.8. Assignments

Code 3.30. Solution
"""
* Assignment: Pickle Serialization
* Filename: serialization_pickle_dump_load.py
* Complexity: easy
* Lines of code: 4 lines
* Time: 5 min

English:
    1. Use data from "Given" section (see below)
    2. Using `pickle` save data structure to file
    3. Recreate data structure from file
    4. Compare result with "Tests" section (see below)

Polish:
    1. Użyj danych z sekcji "Given" (patrz poniżej)
    2. Za pomocą `pickle` zapisz strukturę danych do pliku
    3. Odtwórz strukturę danych na podstawie danych z pliku
    4. Porównaj wyniki z sekcją "Tests" (patrz poniżej)

Tests:
    >>> result  # doctest: +NORMALIZE_WHITESPACE
    [Astronaut(name='Jan Twardowski', missions=[Mission(year=1969, name='Apollo 18'), Mission(year=2024, name='Artemis 3')]),
     Astronaut(name='Mark Watney', missions=[Mission(year=2035, name='Ares 3')]),
     Astronaut(name='Melissa Lewis', missions=[])]
    >>> from os import remove
    >>> remove(FILE)
"""


# Given
import pickle
from dataclasses import dataclass, field

FILE = r'_temporary.pkl'


@dataclass
class Astronaut:
    name: str
    missions: list = field(default_factory=list)


@dataclass
class Mission:
    year: int
    name: str


CREW = [
    Astronaut('Jan Twardowski', missions=[
        Mission(1969, 'Apollo 18'),
        Mission(2024, 'Artemis 3')]),

    Astronaut('Mark Watney', missions=[
        Mission(2035, 'Ares 3')]),

    Astronaut('Melissa Lewis'),
]