2.1. Serialization Pickle

2.1.1. What is pickle?

  • Python object serialization format

  • pickle vs. cPickle

2.1.2. Serialize data types

2.1.2.1. Dump to string

import pickle


pickle.dumps('Jan Twardowski')
# b'\x80\x03X\x0e\x00\x00\x00Jan Twardowskiq\x00.'

pickle.dumps(1)
# b'\x80\x03K\x01.'

pickle.dumps(1.0)
# b'\x80\x03G?\xf0\x00\x00\x00\x00\x00\x00.'

pickle.dumps(1.2)
# b'\x80\x03G?\xf3333333.'

pickle.dumps(1.5)
# b'\x80\x03G?\xf8\x00\x00\x00\x00\x00\x00.'

2.1.2.2. Load from string

import pickle


pickle.loads(b'\x80\x03X\x0e\x00\x00\x00Jan Twardowskiq\x00.')
# 'Jan Twardowski'

pickle.loads(b'\x80\x03K\x01.')
# 1

pickle.loads(b'\x80\x03G?\xf0\x00\x00\x00\x00\x00\x00.')
# 1.0

pickle.loads(b'\x80\x03G?\xf3333333.')
# 1.2

pickle.loads(b'\x80\x03G?\xf8\x00\x00\x00\x00\x00\x00.')
# 1.5

2.1.3. Serialize sequences

2.1.3.1. Dump to string

import pickle


pickle.dumps([1, 2, 3])
# b'\x80\x03]q\x00(K\x01K\x02K\x03e.

pickle.dumps((1, 2, 3))
# b'\x80\x03K\x01K\x02K\x03\x87q\x00.'

pickle.dumps({1, 2, 3})
# b'\x80\x03cbuiltins\nset\nq\x00]q\x01(K\x01K\x02K\x03e\x85q\x02Rq\x03.'

pickle.dumps({'a': 1, 'b': 2, 'c': 3})
# b'\x80\x03}q\x00(X\x01\x00\x00\x00aq\x01K\x01X\x01\x00\x00\x00bq\x02K\x02X\x01\x00\x00\x00cq\x03K\x03u.'

2.1.3.2. Load from string

import pickle


pickle.loads(b'\x80\x03]q\x00(K\x01K\x02K\x03e.)
# [1, 2, 3]

pickle.loads(b'\x80\x03K\x01K\x02K\x03\x87q\x00.')
# (1, 2, 3)

pickle.loads(b'\x80\x03cbuiltins\nset\nq\x00]q\x01(K\x01K\x02K\x03e\x85q\x02Rq\x03.')
# {1, 2, 3}

pickle.loads(b'\x80\x03}q\x00(X\x01\x00\x00\x00aq\x01K\x01X\x01\x00\x00\x00bq\x02K\x02X\x01\x00\x00\x00cq\x03K\x03u.')
# {'a': 1, 'b': 2, 'c': 3}

2.1.4. Serialize Dates and Datetimes

import pickle


dt = datetime(1969, 7, 21, 2, 56, 15)
pickle.dumps(dt)
# b'\x80\x03cdatetime\ndatetime\nq\x00C\n\x07\xb1\x07\x15\x028\x0f\x00\x00\x00q\x01\x85q\x02Rq\x03.'
import pickle


pickle.loads(b'\x80\x03cdatetime\ndatetime\nq\x00C\n\x07\xb1\x07\x15\x028\x0f\x00\x00\x00q\x01\x85q\x02Rq\x03.')
# datetime.datetime(1969, 7, 21, 2, 56, 15)

2.1.5. Serialize and deserialize objects

import pickle


class Astronaut:
    def __init__(self, first_name, last_name):
        self.first_name = first_name
        self.last_name = last_name

jan = Astronaut('Jan', 'Twardowski')

pickle.dumps(jan)
# b'\x80\x03c__main__\nAstronaut\nq\x00)\x81q\x01}q\x02(X\n\x00\x00\x00first_nameq\x03X\x03\x00\x00\x00Janq\x04X\t\x00\x00\x00last_nameq\x05X\n\x00\x00\x00Twardowskiq\x06ub.'

pickle.loads(b'\x80\x03c__main__\nAstronaut\nq\x00)\x81q\x01}q\x02(X\n\x00\x00\x00first_nameq\x03X\x03\x00\x00\x00Janq\x04X\t\x00\x00\x00last_nameq\x05X\n\x00\x00\x00Twardowskiq\x06ub.')
# <__main__.Astronaut object at 0x10585f8d0>

2.1.6. Serialize and deserialize to file

  • File extension pkl

2.1.6.1. Dump to file

Listing 787. Dump to file
import pickle


INPUT = [1, 2, 3]

with open('filename.pkl', mode='wb') as file:
    pickle.dump(INPUT, file)

2.1.6.2. Load from file

Listing 788. Load from file
import pickle


with open('filename.pkl', mode='rb') as file:
    output = pickle.load(file)

print(output)

2.1.7. Examples

Listing 789. Advanced Example
import pickle
from datetime import datetime, timezone, timedelta


def month_ago(dt):
    return dt - timedelta(days=30)


class Astronaut:
    agency = 'NASA'

    def __init__(self, name):
        self.name = name


jose = Astronaut(name='Jose Jimenez')
now = datetime.now(tz=timezone.utc)


INPUT = [
    jose,
    Astronaut,
    month_ago(now),
    str(now),
    now.__str__(),
    '{}'.format(now),
    f'{now}',
    {'imie': 'Иван', 'nazwisko': 'Иванович'},
    {10, 20, 30},
    (1,),
    10,
    10.5,
]

pickle.dumps(INPUT)
# b'\x80\x03]q\x00(c__main__\nAstronaut\nq\x01)\x81q\x02}q\x03X\x04\x00\x00\x00nameq\x04X\x0c\x00\x00\x00Jose Jimenezq\x05sbh\x01cdatetime\ndatetime\nq\x06C\n\x07\xe2\t\x0b\r\n\x05\x04\xa9\xfdq\x07cdatetime\ntimezone\nq\x08cdatetime\ntimedelta\nq\tK\x00K\x00K\x00\x87q\nRq\x0b\x85q\x0cRq\r\x86q\x0eRq\x0fX \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x10X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x11X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x12X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x13}q\x14(X\x04\x00\x00\x00imieq\x15X\x08\x00\x00\x00\xd0\x98\xd0\xb2\xd0\xb0\xd0\xbdq\x16X\x08\x00\x00\x00nazwiskoq\x17X\x10\x00\x00\x00\xd0\x98\xd0\xb2\xd0\xb0\xd0\xbd\xd0\xbe\xd0\xb2\xd0\xb8\xd1\x87q\x18ucbuiltins\nset\nq\x19]q\x1a(K\nK\x14K\x1ee\x85q\x1bRq\x1cK\x01\x85q\x1dK\[email protected]%\x00\x00\x00\x00\x00\x00e.'

pickle.loads(b'\x80\x03]q\x00(c__main__\nAstronaut\nq\x01)\x81q\x02}q\x03X\x04\x00\x00\x00nameq\x04X\x0c\x00\x00\x00Jose Jimenezq\x05sbh\x01cdatetime\ndatetime\nq\x06C\n\x07\xe2\t\x0b\r\n\x05\x04\xa9\xfdq\x07cdatetime\ntimezone\nq\x08cdatetime\ntimedelta\nq\tK\x00K\x00K\x00\x87q\nRq\x0b\x85q\x0cRq\r\x86q\x0eRq\x0fX \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x10X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x11X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x12X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x13}q\x14(X\x04\x00\x00\x00imieq\x15X\x08\x00\x00\x00\xd0\x98\xd0\xb2\xd0\xb0\xd0\xbdq\x16X\x08\x00\x00\x00nazwiskoq\x17X\x10\x00\x00\x00\xd0\x98\xd0\xb2\xd0\xb0\xd0\xbd\xd0\xbe\xd0\xb2\xd0\xb8\xd1\x87q\x18ucbuiltins\nset\nq\x19]q\x1a(K\nK\x14K\x1ee\x85q\x1bRq\x1cK\x01\x85q\x1dK\n[email protected]%\x00\x00\x00\x00\x00\x00e.')
# [
#   <__main__.Astronaut object at 0x10585f850>,
#   <class '__main__.Astronaut'>,
#   datetime.datetime(2018, 9, 11, 13, 10, 5, 305661, tzinfo=datetime.timezone.utc),
#   '2018-10-11 13:10:05.305661+00:00',
#   '2018-10-11 13:10:05.305661+00:00',
#   '2018-10-11 13:10:05.305661+00:00',
#   '2018-10-11 13:10:05.305661+00:00',
#   {'imie': 'Иван', 'nazwisko': 'Иванович'},
#   {10, 20, 30},
#   (1,),
#   10,
#   10.5
# ]

2.1.8. Assignments

2.1.8.1. Pickle serialization

English
  1. Use data from input section (see below)

  2. Using pickle save data structure to file

  3. Recreate data structure from file

Polish
  1. Użyj danych z sekcji input (see below)

  2. Za pomocą pickle zapisz strukturę danych do pliku

  3. Odtwórz strukturę danych na podstawie danych z pliku

Input
class Astronaut:
    def __init__(self, name, missions=()):
        self.name = name
        self.missions = missions

    def __repr__(self):
        return f'\n\nAstronaut(name="{self.name}", missions={self.missions})'


class Mission:
    def __init__(self, year, name):
        self.year = year
        self.name = name

    def __repr__(self):
        return f'\n\tMission(year={self.year}, name="{self.name}")'


CREW = [
    Astronaut('Jan Twardowski', missions=(
        Mission(1969, 'Apollo 18'),
        Mission(2024, 'Artemis 3'))),

    Astronaut('Mark Watney', missions=(
        Mission(2035, 'Ares 3'))),

    Astronaut('Melissa Lewis'),
]
Output
output: list[Astronaut]
# [
#   Astronaut(name='Jan Twardowski', missions=[
#       Mission(year=1969, name='Apollo 11'),
#       Mission(year=2024, name='Artemis 3')]),
#
#   Astronaut(name='Mark Watney', missions=[
#       Mission(year=2035, name='Ares 3')]),
#
#   Astronaut(name='Melissa Lewis', missions=[])
# ]