11.5. CSV DictReader

    • Reads CSV file to list[dict]

  • csv.DictReader()

11.5.1. SetUp

>>> import csv
>>> from pathlib import Path
>>> from pprint import pprint

11.5.2. Minimal

Data:

sepal_length,sepal_width,petal_length,petal_width,species
5.8,2.7,5.1,1.9,virginica
5.1,3.5,1.4,0.2,setosa
5.7,2.8,4.1,1.3,versicolor

SetUp:

>>> DATA = """sepal_length,sepal_width,petal_length,petal_width,species
... 5.8,2.7,5.1,1.9,virginica
... 5.1,3.5,1.4,0.2,setosa
... 5.7,2.8,4.1,1.3,versicolor
... """
>>>
>>> _ = Path('/tmp/myfile.csv').write_text(DATA)

Usage:

>>> with open('/tmp/myfile.csv') as file:
...     reader = csv.DictReader(file)
...     result = list(reader)
>>>
>>> pprint(result, sort_dicts=False)
[{'sepal_length': '5.8',
  'sepal_width': '2.7',
  'petal_length': '5.1',
  'petal_width': '1.9',
  'species': 'virginica'},
 {'sepal_length': '5.1',
  'sepal_width': '3.5',
  'petal_length': '1.4',
  'petal_width': '0.2',
  'species': 'setosa'},
 {'sepal_length': '5.7',
  'sepal_width': '2.8',
  'petal_length': '4.1',
  'petal_width': '1.3',
  'species': 'versicolor'}]

11.5.3. Parametrized

Data:

"sepal_length";"sepal_width";"petal_length";"petal_width";"species"
"5.8";"2.7";"5.1";"1.9";"virginica"
"5.1";"3.5";"1.4";"0.2";"setosa"
"5.7";"2.8";"4.1";"1.3";"versicolor"

SetUp:

>>> DATA = '''"sepal_length";"sepal_width";"petal_length";"petal_width";"species"
... "5.8";"2.7";"5.1";"1.9";"virginica"
... "5.1";"3.5";"1.4";"0.2";"setosa"
... "5.7";"2.8";"4.1";"1.3";"versicolor"
... '''
>>>
>>> _ = Path('/tmp/myfile.csv').write_text(DATA)

Usage:

>>> with open('/tmp/myfile.csv', mode='r', encoding='utf-8') as file:
...     reader = csv.DictReader(file, quotechar='"', delimiter=';', quoting=csv.QUOTE_ALL)
...     result = list(reader)
>>>
>>> pprint(result, sort_dicts=False)
[{'sepal_length': '5.8',
  'sepal_width': '2.7',
  'petal_length': '5.1',
  'petal_width': '1.9',
  'species': 'virginica'},
 {'sepal_length': '5.1',
  'sepal_width': '3.5',
  'petal_length': '1.4',
  'petal_width': '0.2',
  'species': 'setosa'},
 {'sepal_length': '5.7',
  'sepal_width': '2.8',
  'petal_length': '4.1',
  'petal_width': '1.3',
  'species': 'versicolor'}]

11.5.4. Custom Header

Read data from CSV file using csv.DictReader(). While giving custom names note, that first line (typically a header) will be treated like normal data. Therefore we skip it using header = file.readline():

Data:

sl,sw,pl,pw,species
5.8,2.7,5.1,1.9,virginica
5.1,3.5,1.4,0.2,setosa
5.7,2.8,4.1,1.3,versicolor

SetUp:

>>> DATA = """sl,sw,pl,pw,species
... 5.8,2.7,5.1,1.9,virginica
... 5.1,3.5,1.4,0.2,setosa
... 5.7,2.8,4.1,1.3,versicolor
... """
>>>
>>> _ = Path('/tmp/myfile.csv').write_text(DATA)

Usage:

>>> FIELDNAMES = [
...     'sepal_length',
...     'sepal_width',
...     'petal_length',
...     'petal_width',
...     'species',
... ]
>>>
>>> with open('/tmp/myfile.csv') as file:
...     old_header = file.readline()  # skip the first line (old header)
...     reader = csv.DictReader(file, fieldnames=FIELDNAMES)
...     result = list(reader)
>>>
>>> pprint(result, sort_dicts=False)
[{'sepal_length': '5.8',
  'sepal_width': '2.7',
  'petal_length': '5.1',
  'petal_width': '1.9',
  'species': 'virginica'},
 {'sepal_length': '5.1',
  'sepal_width': '3.5',
  'petal_length': '1.4',
  'petal_width': '0.2',
  'species': 'setosa'},
 {'sepal_length': '5.7',
  'sepal_width': '2.8',
  'petal_length': '4.1',
  'petal_width': '1.3',
  'species': 'versicolor'}]

11.5.5. Use Case - 0x01

sepal_length,sepal_width,petal_length,petal_width,species
5.8,2.7,5.1,1.9,virginica
5.1,3.5,1.4,0.2,setosa
5.7,2.8,4.1,1.3,versicolor
>>> import csv
>>> from pathlib import Path
>>> from pprint import pprint
>>>
>>>
>>> DATA = """sepal_length,sepal_width,petal_length,petal_width,species
... 5.8,2.7,5.1,1.9,virginica
... 5.1,3.5,1.4,0.2,setosa
... 5.7,2.8,4.1,1.3,versicolor
... """
>>>
>>> _ = Path('/tmp/myfile.csv').write_text(DATA)
>>>
>>>
>>> def clean(row: dict) -> dict:
...     return {
...         'sepal_length': float(row['sepal_length']),
...         'sepal_width': float(row['sepal_width']),
...         'petal_length': float(row['petal_length']),
...         'petal_width': float(row['petal_width']),
...         'species': row['species']
...     }
>>>
>>>
>>> with open('/tmp/myfile.csv') as file:
...     reader = csv.DictReader(file)
...     result = map(clean, reader)
...     result = list(result)
>>>
>>> pprint(result, sort_dicts=False)
[{'sepal_length': 5.8,
  'sepal_width': 2.7,
  'petal_length': 5.1,
  'petal_width': 1.9,
  'species': 'virginica'},
 {'sepal_length': 5.1,
  'sepal_width': 3.5,
  'petal_length': 1.4,
  'petal_width': 0.2,
  'species': 'setosa'},
 {'sepal_length': 5.7,
  'sepal_width': 2.8,
  'petal_length': 4.1,
  'petal_width': 1.3,
  'species': 'versicolor'}]

11.5.6. Assignments

"""
* Assignment: CSV DictReader Iris
* Complexity: easy
* Lines of code: 5 lines
* Time: 5 min

English:
    1. Define `result: list[dict]`
    2. To `result` add data read from `FILE`
    3. Use `csv.DictReader` to parse file
    4. Do not convert values to float, leave as str
    5. Run doctests - all must succeed

Polish:
    1. Zdefiniuj `result: list[dict]`
    2. Do `result` dodaj wczytane dane z pliku `FILE`
    3. Użyj `csv.DictReader` do sparsowania pliku
    4. Nie konwertuj wartości na floaty, pozostaw jako str
    5. Uruchom doctesty - wszystkie muszą się powieść

Tests:
    >>> import sys; sys.tracebacklimit = 0
    >>> from pprint import pprint
    >>> from os import remove
    >>> remove(FILE)

    >>> assert result is not Ellipsis, \
    'Assign result to variable: `result`'
    >>> assert type(result) is list, \
    'Variable `result` has invalid type, should be list'
    >>> assert all(type(x) is dict for x in result), \
    'All rows in `result` should be dict'

    >>> pprint(result, sort_dicts=False)
    [{'sepal_length': '5.8',
      'sepal_width': '2.7',
      'petal_length': '5.1',
      'petal_width': '1.9',
      'species': 'virginica'},
     {'sepal_length': '5.1',
      'sepal_width': '3.5',
      'petal_length': '1.4',
      'petal_width': '0.2',
      'species': 'setosa'},
     {'sepal_length': '5.7',
      'sepal_width': '2.8',
      'petal_length': '4.1',
      'petal_width': '1.3',
      'species': 'versicolor'}]
"""

import csv


DATA = """sepal_length,sepal_width,petal_length,petal_width,species
5.8,2.7,5.1,1.9,virginica
5.1,3.5,1.4,0.2,setosa
5.7,2.8,4.1,1.3,versicolor"""

FILE = r'_temporary.csv'

with open(FILE, mode='wt', encoding='utf-8') as file:
    file.write(DATA)

# Define `result: list[dict]`
# To `result` add data read from `FILE`
# Use `csv.DictReader` to parse file
# Do not convert values to float, leave as str
# type: list[dict]
result = ...

with open(FILE, mode='rt', encoding='utf-8') as file:
    ...