9.3. Serialization Pickle

9.3.1. What is pickle?

  • Python object serialization format

  • pickle vs. cPickle

9.3.2. Serialize data types

9.3.2.1. Dump to string

import pickle


pickle.dumps('Jan Twardowski')
# b'\x80\x03X\x0e\x00\x00\x00Jan Twardowskiq\x00.'

pickle.dumps(1)
# b'\x80\x03K\x01.'

pickle.dumps(1.0)
# b'\x80\x03G?\xf0\x00\x00\x00\x00\x00\x00.'

pickle.dumps(1.2)
# b'\x80\x03G?\xf3333333.'

pickle.dumps(1.5)
# b'\x80\x03G?\xf8\x00\x00\x00\x00\x00\x00.'

9.3.2.2. Load from string

import pickle


pickle.loads(b'\x80\x03X\x0e\x00\x00\x00Jan Twardowskiq\x00.')
# 'Jan Twardowski'

pickle.loads(b'\x80\x03K\x01.')
# 1

pickle.loads(b'\x80\x03G?\xf0\x00\x00\x00\x00\x00\x00.')
# 1.0

pickle.loads(b'\x80\x03G?\xf3333333.')
# 1.2

pickle.loads(b'\x80\x03G?\xf8\x00\x00\x00\x00\x00\x00.')
# 1.5

9.3.3. Serialize sequences

9.3.3.1. Dump to string

import pickle


pickle.dumps([1, 2, 3])
# b'\x80\x03]q\x00(K\x01K\x02K\x03e.

pickle.dumps((1, 2, 3))
# b'\x80\x03K\x01K\x02K\x03\x87q\x00.'

pickle.dumps({1, 2, 3})
# b'\x80\x03cbuiltins\nset\nq\x00]q\x01(K\x01K\x02K\x03e\x85q\x02Rq\x03.'

pickle.dumps({'a': 1, 'b': 2, 'c': 3})
# b'\x80\x03}q\x00(X\x01\x00\x00\x00aq\x01K\x01X\x01\x00\x00\x00bq\x02K\x02X\x01\x00\x00\x00cq\x03K\x03u.'

9.3.3.2. Load from string

import pickle


pickle.loads(b'\x80\x03]q\x00(K\x01K\x02K\x03e.)
# [1, 2, 3]

pickle.loads(b'\x80\x03K\x01K\x02K\x03\x87q\x00.')
# (1, 2, 3)

pickle.loads(b'\x80\x03cbuiltins\nset\nq\x00]q\x01(K\x01K\x02K\x03e\x85q\x02Rq\x03.')
# {1, 2, 3}

pickle.loads(b'\x80\x03}q\x00(X\x01\x00\x00\x00aq\x01K\x01X\x01\x00\x00\x00bq\x02K\x02X\x01\x00\x00\x00cq\x03K\x03u.')
# {'a': 1, 'b': 2, 'c': 3}

9.3.4. Serialize Dates and Datetimes

import pickle


dt = datetime(1969, 7, 21, 2, 56, 15)
pickle.dumps(dt)
# b'\x80\x03cdatetime\ndatetime\nq\x00C\n\x07\xb1\x07\x15\x028\x0f\x00\x00\x00q\x01\x85q\x02Rq\x03.'
import pickle


pickle.loads(b'\x80\x03cdatetime\ndatetime\nq\x00C\n\x07\xb1\x07\x15\x028\x0f\x00\x00\x00q\x01\x85q\x02Rq\x03.')
# datetime.datetime(1969, 7, 21, 2, 56, 15)

9.3.5. Serialize and deserialize objects

import pickle


class Astronaut:
    def __init__(self, first_name, last_name):
        self.first_name = first_name
        self.last_name = last_name

jan = Astronaut('Jan', 'Twardowski')

pickle.dumps(jan)
# b'\x80\x03c__main__\nAstronaut\nq\x00)\x81q\x01}q\x02(X\n\x00\x00\x00first_nameq\x03X\x03\x00\x00\x00Janq\x04X\t\x00\x00\x00last_nameq\x05X\n\x00\x00\x00Twardowskiq\x06ub.'

pickle.loads(b'\x80\x03c__main__\nAstronaut\nq\x00)\x81q\x01}q\x02(X\n\x00\x00\x00first_nameq\x03X\x03\x00\x00\x00Janq\x04X\t\x00\x00\x00last_nameq\x05X\n\x00\x00\x00Twardowskiq\x06ub.')
# <__main__.Astronaut object at 0x10585f8d0>

9.3.6. Serialize and deserialize to file

  • File extension pkl

9.3.6.1. Dump to file

Listing 355. Dump to file
import pickle


INPUT = [1, 2, 3]

with open('filename.pkl', mode='wb') as file:
    pickle.dump(INPUT, file)

9.3.6.2. Load from file

Listing 356. Load from file
import pickle


with open('filename.pkl', mode='rb') as file:
    OUTPUT = pickle.load(file)

print(OUTPUT)

9.3.7. Advanced Example

Listing 357. Advanced Example
import pickle
from datetime import datetime, timezone, timedelta


def month_ago(dt):
    return dt - timedelta(days=30)


class Astronaut:
    agency = 'NASA'

    def __init__(self, name):
        self.name = name


jose = Astronaut(name='Jose Jimenez')
now = datetime.now(tz=timezone.utc)


INPUT = [
    jose,
    Astronaut,
    month_ago(now),
    str(now),
    now.__str__(),
    '{}'.format(now),
    f'{now}',
    {'imie': 'Иван', 'nazwisko': 'Иванович'},
    {10, 20, 30},
    (1,),
    10,
    10.5,
]

pickle.dumps(INPUT)
# b'\x80\x03]q\x00(c__main__\nAstronaut\nq\x01)\x81q\x02}q\x03X\x04\x00\x00\x00nameq\x04X\x0c\x00\x00\x00Jose Jimenezq\x05sbh\x01cdatetime\ndatetime\nq\x06C\n\x07\xe2\t\x0b\r\n\x05\x04\xa9\xfdq\x07cdatetime\ntimezone\nq\x08cdatetime\ntimedelta\nq\tK\x00K\x00K\x00\x87q\nRq\x0b\x85q\x0cRq\r\x86q\x0eRq\x0fX \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x10X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x11X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x12X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x13}q\x14(X\x04\x00\x00\x00imieq\x15X\x08\x00\x00\x00\xd0\x98\xd0\xb2\xd0\xb0\xd0\xbdq\x16X\x08\x00\x00\x00nazwiskoq\x17X\x10\x00\x00\x00\xd0\x98\xd0\xb2\xd0\xb0\xd0\xbd\xd0\xbe\xd0\xb2\xd0\xb8\xd1\x87q\x18ucbuiltins\nset\nq\x19]q\x1a(K\nK\x14K\x1ee\x85q\x1bRq\x1cK\x01\x85q\x1dK\[email protected]%\x00\x00\x00\x00\x00\x00e.'

pickle.loads(b'\x80\x03]q\x00(c__main__\nAstronaut\nq\x01)\x81q\x02}q\x03X\x04\x00\x00\x00nameq\x04X\x0c\x00\x00\x00Jose Jimenezq\x05sbh\x01cdatetime\ndatetime\nq\x06C\n\x07\xe2\t\x0b\r\n\x05\x04\xa9\xfdq\x07cdatetime\ntimezone\nq\x08cdatetime\ntimedelta\nq\tK\x00K\x00K\x00\x87q\nRq\x0b\x85q\x0cRq\r\x86q\x0eRq\x0fX \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x10X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x11X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x12X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x13}q\x14(X\x04\x00\x00\x00imieq\x15X\x08\x00\x00\x00\xd0\x98\xd0\xb2\xd0\xb0\xd0\xbdq\x16X\x08\x00\x00\x00nazwiskoq\x17X\x10\x00\x00\x00\xd0\x98\xd0\xb2\xd0\xb0\xd0\xbd\xd0\xbe\xd0\xb2\xd0\xb8\xd1\x87q\x18ucbuiltins\nset\nq\x19]q\x1a(K\nK\x14K\x1ee\x85q\x1bRq\x1cK\x01\x85q\x1dK\n[email protected]%\x00\x00\x00\x00\x00\x00e.')
# [
#   <__main__.Astronaut object at 0x10585f850>,
#   <class '__main__.Astronaut'>,
#   datetime.datetime(2018, 9, 11, 13, 10, 5, 305661, tzinfo=datetime.timezone.utc),
#   '2018-10-11 13:10:05.305661+00:00',
#   '2018-10-11 13:10:05.305661+00:00',
#   '2018-10-11 13:10:05.305661+00:00',
#   '2018-10-11 13:10:05.305661+00:00',
#   {'imie': 'Иван', 'nazwisko': 'Иванович'},
#   {10, 20, 30},
#   (1,),
#   10,
#   10.5
# ]

9.3.8. Assignments

9.3.8.1. Pickle serialization

English
  1. Use data from input section (see below)

  2. Using pickle save data structure to file

  3. Recreate data structure from file

Polish
  1. Użyj danych z sekcji input (see below)

  2. Za pomocą pickle zapisz strukturę danych do pliku

  3. Odtwórz strukturę danych na podstawie danych z pliku

Input
from dataclasses import dataclass
from typing import List


@dataclass
class Mission:
    year: int
    name: str


@dataclass
class Astronaut:
    name: str
    missions: List[Mission] = ()


INPUT = [
    Astronaut('Jan Twardowski', missions=[
        Mission(1969, 'Apollo 11'),
        Mission(2024, 'Artemis 3')]),

    Astronaut('Mark Watney', missions=[
        Mission(2035, 'Ares 3')]),

    Astronaut('Melissa Lewis', missions=[]),
]
Output
OUTPUT: list[Astronaut]
# [
#   Astronaut(name='Jan Twardowski', missions=[
#       Mission(year=1969, name='Apollo 11'),
#       Mission(year=2024, name='Artemis 3')]),
#
#   Astronaut(name='Mark Watney', missions=[
#       Mission(year=2035, name='Ares 3')]),
#
#   Astronaut(name='Melissa Lewis', missions=[])
# ]