4.2. CSV Read/Write

4.2.1. Rationale

  • csv.reader: list[tuple]

  • csv.writer: list[tuple]

  • csv.DictReader: list[dict]

  • csv.DictWriter: list[dict]

4.2.2. Reader -> list[tuple]

Read data from CSV file using csv.reader():

import csv

FILE = r'_temporary.csv'
# sepal_length,sepal_width,petal_length,petal_width,species
# 5.4,3.9,1.3,0.4,setosa
# 5.9,3.0,5.1,1.8,virginica
# 6.0,3.4,4.5,1.6,versicolor


with open(FILE) as file:
    result = csv.reader(file)

    for line in result:
        print(line)

# ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
# ['5.4', '3.9', '1.3', '0.4', 'setosa']
# ['5.9', '3.0', '5.1', '1.8', 'virginica']
# ['6.0', '3.4', '4.5', '1.6', 'versicolor']

4.2.3. Writer <- list[tuple]

Writing data to CSV file using csv.writer():

import csv

FILE = r'_temporary.csv'

DATA = [('Sepal length', 'Sepal width', 'Petal length', 'Petal width', 'Species'),
        (5.8, 2.7, 5.1, 1.9, 'virginica'),
        (5.1, 3.5, 1.4, 0.2, 'setosa'),
        (5.7, 2.8, 4.1, 1.3, 'versicolor')]

with open(FILE, mode='w') as file:
    result = csv.writer(file)
    result.writerows(DATA)

# Sepal length,Sepal width,Petal length,Petal width,Species
# 5.8,2.7,5.1,1.9,virginica
# 5.1,3.5,1.4,0.2,setosa
# 5.7,2.8,4.1,1.3,versicolor

4.2.4. DictReader -> list[dict]

import csv

FILE = r'_temporary.csv'
# sepal_length,sepal_width,petal_length,petal_width,species
# 5.4,3.9,1.3,0.4,setosa
# 5.9,3.0,5.1,1.8,virginica
# 6.0,3.4,4.5,1.6,versicolor


with open(FILE) as file:
    result = csv.DictReader(file)

    for line in result:
        print(line)

# {'sepal_length': '5.4', 'sepal_width': '3.9', 'petal_length': '1.3', 'petal_width': '0.4', 'species': 'setosa'}
# {'sepal_length': '5.9', 'sepal_width': '3.0', 'petal_length': '5.1', 'petal_width': '1.8', 'species': 'virginica'}
# {'sepal_length': '6.0', 'sepal_width': '3.4', 'petal_length': '4.5', 'petal_width': '1.6', 'species': 'versicolor'}

Read data from CSV file using csv.DictReader(). While giving custom names note, that first line (typically a header) will be treated like normal data. Therefore we skip it using header = file.readline():

import csv

FILE = r'_temporary.csv'
# sepal_length,sepal_width,petal_length,petal_width,species
# 5.4,3.9,1.3,0.4,setosa
# 5.9,3.0,5.1,1.8,virginica
# 6.0,3.4,4.5,1.6,versicolor

FIELDNAMES = [
    'Sepal Length',
    'Sepal Width',
    'Petal Length',
    'Petal Width',
    'Species',
]


with open(FILE) as file:
    result = csv.DictReader(file, fieldnames=FIELDNAMES, delimiter=',')
    header = file.readline()  # skip the first line

    for line in result:
        print(line)

# {'Sepal Length': '5.4', 'Sepal Width': '3.9', 'Petal Length': '1.3', 'Petal Width': '0.4', 'Species': 'setosa'}
# {'Sepal Length': '5.9', 'Sepal Width': '3.0', 'Petal Length': '5.1', 'Petal Width': '1.8', 'Species': 'virginica'}
# {'Sepal Length': '6.0', 'Sepal Width': '3.4', 'Petal Length': '4.5', 'Petal Width': '1.6', 'Species': 'versicolor'}

4.2.5. DictWriter <- list[dict]

  • Remember to add mode='w' to open() function

  • Default encoding is encoding='utf-8'

import csv

FILE = r'_temporary.csv'

DATA = [{'Sepal Length': 5.4, 'Sepal Width': 3.9, 'Petal Length': 1.3, 'Petal Width': 0.4, 'Species': 'setosa'},
        {'Sepal Length': 5.9, 'Sepal Width': 3.0, 'Petal Length': 5.1, 'Petal Width': 1.8, 'Species': 'virginica'},
        {'Sepal Length': 6.0, 'Sepal Width': 3.4, 'Petal Length': 4.5, 'Petal Width': 1.6, 'Species': 'versicolor'}]

header = DATA[0].keys()

with open(FILE, mode='w') as file:
    result = csv.DictWriter(file, fieldnames=header)
    result.writeheader()
    result.writerows(DATA)


# Sepal Length,Sepal Width,Petal Length,Petal Width,Species
# 5.4,3.9,1.3,0.4,setosa
# 5.9,3.0,5.1,1.8,virginica
# 6.0,3.4,4.5,1.6,versicolor

Write data to CSV file using csv.DictWriter():

import csv

FILE = r'_temporary.csv'

DATA = [{'sepal_length': 5.4, 'sepal_width': 3.9, 'petal_length': 1.3, 'petal_width': 0.4, 'species': 'setosa'},
        {'sepal_length': 5.9, 'sepal_width': 3.0, 'petal_length': 5.1, 'petal_width': 1.8, 'species': 'virginica'},
        {'sepal_length': 6.0, 'sepal_width': 3.4, 'petal_length': 4.5, 'petal_width': 1.6, 'species': 'versicolor'}]

FIELDNAMES = ['sepal_length', 'sepal_width', 'petal_length',
              'petal_width', 'species']

with open(FILE, mode='w', encoding='utf-8') as file:
    result = csv.DictWriter(
        f=file,
        fieldnames=FIELDNAMES,
        delimiter=',',
        quotechar='"',
        quoting=csv.QUOTE_ALL,
        lineterminator='\n')

    result.writeheader()
    result.writerows(DATA)

# "sepal_length","sepal_width","petal_length","petal_width","species"
# "5.4","3.9","1.3","0.4","setosa"
# "5.9","3.0","5.1","1.8","virginica"
# "6.0","3.4","4.5","1.6","versicolor"

4.2.6. Use Cases

import csv

FILE = r'_temporary.csv'
# 'sepal_length';'sepal_width';'petal_length';'petal_width';'species'
# '5,4';'3,9';'1,3';'0,4';'setosa'
# '5,9';'3,0';'5,1';'1,8';'virginica'
# '6,0';'3,4';'4,5';'1,6';'versicolor'


def isnumeric(value):
    try:
        float(value)
        return True
    except ValueError:
        return False


def clean(line):
    return {key: float(v) if isnumeric(v) else v
            for key, value in line.items()
            if (v := value.replace(',', '.'))}


with open(FILE) as file:
    result = csv.DictReader(file, delimiter=';', quotechar="'")

    for line in result:
        print(clean(line))


# {'sepal_length': 5.4, 'sepal_width': 3.9, 'petal_length': 1.3, 'petal_width': 0.4, 'species': 'setosa'}
# {'sepal_length': 5.9, 'sepal_width': 3.0, 'petal_length': 5.1, 'petal_width': 1.8, 'species': 'virginica'}
# {'sepal_length': 6.0, 'sepal_width': 3.4, 'petal_length': 4.5, 'petal_width': 1.6, 'species': 'versicolor'}
import csv

FILE = r'_temporary.csv'

total = 0
count = 0

with open(FILE) as file:
    data = csv.reader(file)
    next(data)

    for line in data:
        total += float(line[1])
        count += 1

mean = total / count
print(mean)

4.2.7. Assignments

Code 4.6. Solution
"""
* Assignment: CSV Read/Write Writer
* Complexity: easy
* Lines of code: 4 lines
* Time: 5 min

English:
    1. Using `csv.writer()` save `DATA` to file
    2. Use Unix `\n` line terminator
    3. Run doctests - all must succeed

Polish:
    1. Za pomocą `csv.writer()` zapisz `DATA` do pliku
    2. Użyj zakończenia linii Unix `\n`
    3. Uruchom doctesty - wszystkie muszą się powieść

Hint:
    * For Python before 3.8: `dict(OrderedDict)`

Tests:
    >>> import sys; sys.tracebacklimit = 0

    >>> result = open(FILE).read()
    >>> print(result)
    Sepal length,Sepal width,Petal length,Petal width,Species
    5.8,2.7,5.1,1.9,virginica
    5.1,3.5,1.4,0.2,setosa
    5.7,2.8,4.1,1.3,versicolor
    6.3,2.9,5.6,1.8,virginica
    6.4,3.2,4.5,1.5,versicolor
    4.7,3.2,1.3,0.2,setosa
    7.0,3.2,4.7,1.4,versicolor
    7.6,3.0,6.6,2.1,virginica
    4.9,3.0,1.4,0.2,setosa
    <BLANKLINE>
    >>> from os import remove
    >>> remove(FILE)
"""

FILE = r'_temporary.csv'

DATA = [('Sepal length', 'Sepal width', 'Petal length', 'Petal width', 'Species'),
        (5.8, 2.7, 5.1, 1.9, 'virginica'),
        (5.1, 3.5, 1.4, 0.2, 'setosa'),
        (5.7, 2.8, 4.1, 1.3, 'versicolor'),
        (6.3, 2.9, 5.6, 1.8, 'virginica'),
        (6.4, 3.2, 4.5, 1.5, 'versicolor'),
        (4.7, 3.2, 1.3, 0.2, 'setosa'),
        (7.0, 3.2, 4.7, 1.4, 'versicolor'),
        (7.6, 3.0, 6.6, 2.1, 'virginica'),
        (4.9, 3.0, 1.4, 0.2, 'setosa'),]


Code 4.7. Solution
"""
* Assignment: CSV Read/Write DictWriter
* Complexity: easy
* Lines of code: 10 lines
* Time: 8 min

English:
    1. Using `csv.DictWriter()` save `DATA` to file
    2. Open file in your spreadsheet program like Microsoft Excel / Libre Office / Numbers etc.
    3. Open file in simple in your IDE and simple text editor (like Notepad, vim, gedit)
    4. Non functional requirements:
        a. All fields must be enclosed by double quote `"` character
        b. Use `,` to separate columns
        d. Use Unix `\n` line terminator
    5. Run doctests - all must succeed

Polish:
    1. Za pomocą `csv.DictWriter()` zapisz `DATA` do pliku
    2. Spróbuj otworzyć plik w arkuszu kalkulacyjnym tj. Microsoft Excel / Libre Office / Numbers itp
    3. Spróbuj otworzyć plik w IDE i prostym edytorze tekstu tj. Notepad, vim lub gedit
    4. Wymagania niefunkcjonalne:
        a. Wszystkie pola muszą być otoczone znakiem cudzysłowu `"`
        b. Użyj `,` do oddzielenia kolumn
        d. Użyj zakończenia linii Unix `\n`
    5. Uruchom doctesty - wszystkie muszą się powieść

Hint:
    * For Python before 3.8: `dict(OrderedDict)`

Tests:
    >>> import sys; sys.tracebacklimit = 0

    >>> result = open(FILE).read()
    >>> print(result)   # doctest: +NORMALIZE_WHITESPACE
    "firstname","lastname"
    "Jan","Twardowski"
    "Rick","Martinez"
    "Mark","Watney"
    "Ivan","Ivanovic"
    "Melissa","Lewis"
    >>> from os import remove
    >>> remove(FILE)
"""

FILE = r'_temporary.csv'

DATA = [{'firstname': 'Jan', 'lastname': 'Twardowski'},
        {'firstname': 'Rick', 'lastname': 'Martinez'},
        {'firstname': 'Mark', 'lastname': 'Watney'},
        {'firstname': 'Ivan', 'lastname': 'Ivanovic'},
        {'firstname': 'Melissa', 'lastname': 'Lewis'}]


Code 4.8. Solution
"""
* Assignment: CSV Read/Write DictReader
* Complexity: easy
* Lines of code: 10 lines
* Time: 8 min

English:
    1. Using `csv.DictReader` read the `FILE` content
    2. Use explicit `encoding`, `delimiter` and `quotechar`
    3. Replace column names with `FIELDNAMES`
    4. Skip the first line (header)
    5. Add rows to `result: list[dict]`
    6. Run doctests - all must succeed

Polish:
    1. Korzystając z `csv.DictReader` wczytaj zawartość pliku `FILE`
    2. Podaj jawnie `encoding`, `delimiter` oraz `quotechar`
    3. Podmień nazwy kolumn na `FIELDNAMES`
    4. Pomiń pierwszą linię (nagłówek)
    5. Dodaj wiersze do `result: list[dict]`
    6. Uruchom doctesty - wszystkie muszą się powieść

Hint:
    * For Python before 3.8: `dict(OrderedDict)`

Tests:
    >>> import sys; sys.tracebacklimit = 0

    >>> type(result)
    <class 'list'>
    >>> result  # doctest: +NORMALIZE_WHITESPACE
    [{'Sepal Length': '5.8', 'Sepal Width': '2.7', 'Petal Length': '5.1', 'Petal Width': '1.9', 'Species': 'virginica'},
     {'Sepal Length': '5.1', 'Sepal Width': '3.5', 'Petal Length': '1.4', 'Petal Width': '0.2', 'Species': 'setosa'},
     {'Sepal Length': '5.7', 'Sepal Width': '2.8', 'Petal Length': '4.1', 'Petal Width': '1.3', 'Species': 'versicolor'}]
    >>> from os import remove
    >>> remove(FILE)
"""


FILE = r'_temporary.csv'

FIELDNAMES = ['Sepal Length', 'Sepal Width',
              'Petal Length', 'Petal Width', 'Species']

DATA = """sepal_length,sepal_width,petal_length,petal_width,species
5.8,2.7,5.1,1.9,virginica
5.1,3.5,1.4,0.2,setosa
5.7,2.8,4.1,1.3,versicolor"""


with open(FILE, mode='w') as file:
    file.write(DATA)


result: list = []


Code 4.9. Solution
"""
* Assignment: CSV Read/Write Schemaless
* Complexity: medium
* Lines of code: 7 lines
* Time: 8 min

English:
    1. Using `csv.DictWriter()` write variable schema data to CSV file
    2. `fieldnames` must be automatically generated from `DATA`
    3. Non functional requirements:
        a. All fields must be enclosed by double quote `"` character
        b. Use `;` to separate columns
        c. Use `utf-8` encoding
        d. Use Unix `\n` line terminator
        e. Sort `fieldnames` using `sorted()`
    4. Run doctests - all must succeed

Polish:
    1. Za pomocą `csv.DictWriter()` zapisz do pliku CSV dane o zmiennej strukturze
    2. `fieldnames` musi być generowane automatycznie na podstawie `DATA`
    3. Wymagania niefunkcjonalne:
        a. Wszystkie pola muszą być otoczone znakiem cudzysłowu `"`
        b. Użyj `,` do oddzielenia kolumn
        c. Użyj kodowania `utf-8`
        d. Użyj zakończenia linii Unix `\n`
        e. Posortuj `fieldnames` używając `sorted()`
    4. Uruchom doctesty - wszystkie muszą się powieść

Hint:
    * For Python before 3.8: `dict(OrderedDict)`

Tests:
    >>> import sys; sys.tracebacklimit = 0

    >>> result = open(FILE).read()
    >>> print(result)
    "Petal length","Petal width","Sepal length","Sepal width","Species"
    "","","5.1","3.5","setosa"
    "4.1","1.3","","","versicolor"
    "","1.8","6.3","","virginica"
    "","0.2","5.0","","setosa"
    "4.1","","","2.8","versicolor"
    "","1.8","","2.9","virginica"
    <BLANKLINE>
    >>> from os import remove
    >>> remove(FILE)
"""

FILE = r'_temporary.csv'

DATA = [{'Sepal length': 5.1, 'Sepal width': 3.5, 'Species': 'setosa'},
        {'Petal length': 4.1, 'Petal width': 1.3, 'Species': 'versicolor'},
        {'Sepal length': 6.3, 'Petal width': 1.8, 'Species': 'virginica'},
        {'Sepal length': 5.0, 'Petal width': 0.2, 'Species': 'setosa'},
        {'Sepal width': 2.8, 'Petal length': 4.1, 'Species': 'versicolor'},
        {'Sepal width': 2.9, 'Petal width': 1.8, 'Species': 'virginica'}]


Code 4.10. Solution
"""
* Assignment: CSV Read/Write Objects
* Complexity: medium
* Lines of code: 6 lines
* Time: 8 min

English:
    1. Using `csv.DictWriter()` save data to CSV file
    2. Non functional requirements:
        a. All fields must be enclosed by double quote `"` character
        b. Use `,` to separate columns
        c. Use `utf-8` encoding
        d. Use Unix `\n` line terminator
    3. Run doctests - all must succeed

Polish:
    1. Za pomocą `csv.DictWriter()` zapisz dane do pliku CSV
    2. Wymagania niefunkcjonalne:
        a. Wszystkie pola muszą być otoczone znakiem cudzysłowu `"`
        b. Użyj `,` do oddzielenia kolumn
        c. Użyj kodowania `utf-8`
        d. Użyj zakończenia linii Unix `\n`
    3. Uruchom doctesty - wszystkie muszą się powieść

Hints:
    * `vars(obj)`
    * For Python before 3.8: `dict(OrderedDict)`

Tests:
    >>> import sys; sys.tracebacklimit = 0

    >>> result = open(FILE).read()
    >>> print(result)
    sepal_length,sepal_width,petal_length,petal_width,species
    5.1,3.5,1.4,0.2,setosa
    5.8,2.7,5.1,1.9,virginica
    5.1,3.5,1.4,0.2,setosa
    5.7,2.8,4.1,1.3,versicolor
    6.3,2.9,5.6,1.8,virginica
    6.4,3.2,4.5,1.5,versicolor
    <BLANKLINE>
    >>> from os import remove
    >>> remove(FILE)
"""

from csv import DictWriter


class Iris:
    def __init__(self, sepal_length, sepal_width,
                 petal_length, petal_width, species):
        self.sepal_length = sepal_length
        self.sepal_width = sepal_width
        self.petal_length = petal_length
        self.petal_width = petal_width
        self.species = species


FILE = r'_temporary.txt'

DATA = [Iris(5.1, 3.5, 1.4, 0.2, 'setosa'),
        Iris(5.8, 2.7, 5.1, 1.9, 'virginica'),
        Iris(5.1, 3.5, 1.4, 0.2, 'setosa'),
        Iris(5.7, 2.8, 4.1, 1.3, 'versicolor'),
        Iris(6.3, 2.9, 5.6, 1.8, 'virginica'),
        Iris(6.4, 3.2, 4.5, 1.5, 'versicolor')]