3. Pickle serialization

3.1. What is pickle?

  • Python object serialization format

  • pickle vs. cPickle

3.2. Serialize data types

3.2.1. Dump to string

import pickle


pickle.dumps('Jan Twardowski')
# b'\x80\x03X\x0e\x00\x00\x00Jan Twardowskiq\x00.'

pickle.dumps(1)
# b'\x80\x03K\x01.'

pickle.dumps(1.0)
# b'\x80\x03G?\xf0\x00\x00\x00\x00\x00\x00.'

pickle.dumps(1.2)
# b'\x80\x03G?\xf3333333.'

pickle.dumps(1.5)
# b'\x80\x03G?\xf8\x00\x00\x00\x00\x00\x00.'

3.2.2. Load from string

import pickle


pickle.loads(b'\x80\x03X\x0e\x00\x00\x00Jan Twardowskiq\x00.')
# 'Jan Twardowski'

pickle.dumps(b'\x80\x03K\x01.')
# 1

pickle.dumps(b'\x80\x03G?\xf0\x00\x00\x00\x00\x00\x00.')
# 1.0

pickle.dumps(b'\x80\x03G?\xf3333333.')
# 1.2

pickle.dumps(b'\x80\x03G?\xf8\x00\x00\x00\x00\x00\x00.')
# 1.5

3.3. Serialize sequences

3.3.1. Dump to string

import pickle


pickle.dumps([1, 2, 3])
# b'\x80\x03]q\x00(K\x01K\x02K\x03e.

pickle.dumps((1, 2, 3))
# b'\x80\x03K\x01K\x02K\x03\x87q\x00.'

pickle.dumps({1, 2, 3})
# b'\x80\x03cbuiltins\nset\nq\x00]q\x01(K\x01K\x02K\x03e\x85q\x02Rq\x03.'

pickle.dumps({'a': 1, 'b': 2, 'c': 3})
# b'\x80\x03}q\x00(X\x01\x00\x00\x00aq\x01K\x01X\x01\x00\x00\x00bq\x02K\x02X\x01\x00\x00\x00cq\x03K\x03u.'

3.3.2. Load from string

import pickle


pickle.dumps(b'\x80\x03]q\x00(K\x01K\x02K\x03e.)
# [1, 2, 3]

pickle.dumps(b'\x80\x03K\x01K\x02K\x03\x87q\x00.')
# (1, 2, 3)

pickle.dumps(b'\x80\x03cbuiltins\nset\nq\x00]q\x01(K\x01K\x02K\x03e\x85q\x02Rq\x03.')
# {1, 2, 3}

pickle.dumps(b'\x80\x03}q\x00(X\x01\x00\x00\x00aq\x01K\x01X\x01\x00\x00\x00bq\x02K\x02X\x01\x00\x00\x00cq\x03K\x03u.')
# {'a': 1, 'b': 2, 'c': 3}

3.4. Serialize Dates and Datetimes

import pickle


dt = datetime(1969, 7, 21, 2, 56, 15)
pickle.dumps(dt)
# b'\x80\x03cdatetime\ndatetime\nq\x00C\n\x07\xb1\x07\x15\x028\x0f\x00\x00\x00q\x01\x85q\x02Rq\x03.'
import pickle


pickle.loads(b'\x80\x03cdatetime\ndatetime\nq\x00C\n\x07\xb1\x07\x15\x028\x0f\x00\x00\x00q\x01\x85q\x02Rq\x03.')
# datetime.datetime(1969, 7, 21, 2, 56, 15)

3.5. Serialize and deserialize objects

import pickle


class Astronaut:
    def __init__(self, first_name, last_name):
        self.first_name = first_name
        self.last_name = last_name

jan = Astronaut('Jan', 'Twardowski')

pickle.dumps(jan)
# b'\x80\x03c__main__\nAstronaut\nq\x00)\x81q\x01}q\x02(X\n\x00\x00\x00first_nameq\x03X\x03\x00\x00\x00Janq\x04X\t\x00\x00\x00last_nameq\x05X\n\x00\x00\x00Twardowskiq\x06ub.'

pickle.loads(b'\x80\x03c__main__\nAstronaut\nq\x00)\x81q\x01}q\x02(X\n\x00\x00\x00first_nameq\x03X\x03\x00\x00\x00Janq\x04X\t\x00\x00\x00last_nameq\x05X\n\x00\x00\x00Twardowskiq\x06ub.')
# <__main__.Astronaut object at 0x10585f8d0>

3.6. Serialize and deserialize to file

  • File extension pkl

3.6.1. Dump to file

Listing 223. Dump to file
import pickle


INPUT = [1, 2, 3]

with open('filename.pkl', mode='wb') as file:
    pickle.dump(INPUT, file)

3.6.2. Load from file

Listing 224. Load from file
import pickle


with open('filename.pkl', mode='rb') as file:
    OUTPUT = pickle.load(file)

print(OUTPUT)

3.7. Advanced Example

Listing 225. Advanced Example
import pickle
from datetime import datetime, timezone, timedelta


def month_ago(dt):
    return dt - timedelta(days=30)


class Astronaut:
    agency = 'NASA'

    def __init__(self, name):
        self.name = name


jose = Astronaut(name='Jose Jimenez')
now = datetime.now(tz=timezone.utc)


INPUT = [
    jose,
    Astronaut,
    month_ago(now),
    str(now),
    now.__str__(),
    '{}'.format(now),
    f'{now}',
    {'imie': 'Иван', 'nazwisko': 'Иванович'},
    {10, 20, 30},
    (1,),
    10,
    10.5,
]

pickle.dumps(INPUT)
# b'\x80\x03]q\x00(c__main__\nAstronaut\nq\x01)\x81q\x02}q\x03X\x04\x00\x00\x00nameq\x04X\x0c\x00\x00\x00Jose Jimenezq\x05sbh\x01cdatetime\ndatetime\nq\x06C\n\x07\xe2\t\x0b\r\n\x05\x04\xa9\xfdq\x07cdatetime\ntimezone\nq\x08cdatetime\ntimedelta\nq\tK\x00K\x00K\x00\x87q\nRq\x0b\x85q\x0cRq\r\x86q\x0eRq\x0fX \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x10X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x11X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x12X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x13}q\x14(X\x04\x00\x00\x00imieq\x15X\x08\x00\x00\x00\xd0\x98\xd0\xb2\xd0\xb0\xd0\xbdq\x16X\x08\x00\x00\x00nazwiskoq\x17X\x10\x00\x00\x00\xd0\x98\xd0\xb2\xd0\xb0\xd0\xbd\xd0\xbe\xd0\xb2\xd0\xb8\xd1\x87q\x18ucbuiltins\nset\nq\x19]q\x1a(K\nK\x14K\x1ee\x85q\x1bRq\x1cK\x01\x85q\x1dK\[email protected]%\x00\x00\x00\x00\x00\x00e.'

pickle.loads(b'\x80\x03]q\x00(c__main__\nAstronaut\nq\x01)\x81q\x02}q\x03X\x04\x00\x00\x00nameq\x04X\x0c\x00\x00\x00Jose Jimenezq\x05sbh\x01cdatetime\ndatetime\nq\x06C\n\x07\xe2\t\x0b\r\n\x05\x04\xa9\xfdq\x07cdatetime\ntimezone\nq\x08cdatetime\ntimedelta\nq\tK\x00K\x00K\x00\x87q\nRq\x0b\x85q\x0cRq\r\x86q\x0eRq\x0fX \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x10X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x11X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x12X \x00\x00\x002018-10-11 13:10:05.305661+00:00q\x13}q\x14(X\x04\x00\x00\x00imieq\x15X\x08\x00\x00\x00\xd0\x98\xd0\xb2\xd0\xb0\xd0\xbdq\x16X\x08\x00\x00\x00nazwiskoq\x17X\x10\x00\x00\x00\xd0\x98\xd0\xb2\xd0\xb0\xd0\xbd\xd0\xbe\xd0\xb2\xd0\xb8\xd1\x87q\x18ucbuiltins\nset\nq\x19]q\x1a(K\nK\x14K\x1ee\x85q\x1bRq\x1cK\x01\x85q\x1dK\n[email protected]%\x00\x00\x00\x00\x00\x00e.')
# [
#   <__main__.Astronaut object at 0x10585f850>,
#   <class '__main__.Astronaut'>,
#   datetime.datetime(2018, 9, 11, 13, 10, 5, 305661, tzinfo=datetime.timezone.utc),
#   '2018-10-11 13:10:05.305661+00:00',
#   '2018-10-11 13:10:05.305661+00:00',
#   '2018-10-11 13:10:05.305661+00:00',
#   '2018-10-11 13:10:05.305661+00:00',
#   {'imie': 'Иван', 'nazwisko': 'Иванович'},
#   {10, 20, 30},
#   (1,),
#   10,
#   10.5
# ]

3.8. Assignments

3.8.1. Pickle serialization

  • Complexity level: easy

  • Lines of code to write: 10 lines

  • Estimated time of completion: 10 min

  • Filename: solution/pickle_dump.py

English
  1. Use data input addressbook

  2. Using pickle save data structure to file

  3. Recreate data structure from file

Polish
  1. Użyj obiektu książki adresowej z danych wejściowych

  2. Za pomocą pickle zapisz strukturę danych do pliku

  3. Odtwórz strukturę danych na podstawie danych z pliku

Input
class Contact:
    def __init__(self, first_name, last_name, addresses=()):
        self.first_name = first_name
        self.last_name = last_name
        self.addresses = addresses


class Address:
    def __init__(self, location, city):
        self.location = location
        self.city = city


INPUT = [
    Contact(first_name='Jan', last_name='Twardowski', addresses=(
        Address(location='Johnson Space Center', city='Houston, TX'),
        Address(location='Kennedy Space Center', city='Merritt Island, FL'),
        Address(location='Jet Propulsion Laboratory', city='Pasadena, CA'),
    )),
    Contact(first_name='Mark', last_name='Watney'),
    Contact(first_name='Melissa', last_name='Lewis', addresses=()),
]