17.5. CSV DictReader

- Reads CSV file to list[dict]
csv.DictReader()

17.5.1. SetUp

>>> import csv
>>> from pprint import pprint

>>> DATA = """
...
... "firstname","lastname","age"
... "Mark","Watney","42"
... "Melissa","Lewis","41"
... "Rick","Martinez","40"
... "Alex","Vogel","42"
... "Beth","Johanssen","29"
... "Chris","Beck","36"
...
... """
>>>
>>> with open('/tmp/myfile.csv', mode='wt') as file:
...     file.write(DATA.strip())
159

17.5.2. Minimal

Data:

$ cat /tmp/myfile.csv
"firstname","lastname","age"
"Mark","Watney","42"
"Melissa","Lewis","41"
"Rick","Martinez","40"
"Alex","Vogel","42"
"Beth","Johanssen","29"
"Chris","Beck","36"

Usage:

>>> with open('/tmp/myfile.csv', mode='rt') as file:
...     reader = csv.DictReader(file)
...     result = list(reader)

Result:

>>> pprint(result, sort_dicts=False)
[{'firstname': 'Mark', 'lastname': 'Watney', 'age': '42'},
 {'firstname': 'Melissa', 'lastname': 'Lewis', 'age': '41'},
 {'firstname': 'Rick', 'lastname': 'Martinez', 'age': '40'},
 {'firstname': 'Alex', 'lastname': 'Vogel', 'age': '42'},
 {'firstname': 'Beth', 'lastname': 'Johanssen', 'age': '29'},
 {'firstname': 'Chris', 'lastname': 'Beck', 'age': '36'}]

17.5.3. Parametrized

Data:

$ cat /tmp/myfile.csv
"firstname","lastname","age"
"Mark","Watney","42"
"Melissa","Lewis","41"
"Rick","Martinez","40"
"Alex","Vogel","42"
"Beth","Johanssen","29"
"Chris","Beck","36"

Usage:

>>> with open('/tmp/myfile.csv', mode='rt', encoding='utf-8') as file:
...     reader = csv.DictReader(file, delimiter=',', quoting=csv.QUOTE_ALL, quotechar='"', lineterminator='\n')
...     result = list(reader)

Result:

>>> pprint(result, sort_dicts=False)
[{'firstname': 'Mark', 'lastname': 'Watney', 'age': '42'},
 {'firstname': 'Melissa', 'lastname': 'Lewis', 'age': '41'},
 {'firstname': 'Rick', 'lastname': 'Martinez', 'age': '40'},
 {'firstname': 'Alex', 'lastname': 'Vogel', 'age': '42'},
 {'firstname': 'Beth', 'lastname': 'Johanssen', 'age': '29'},
 {'firstname': 'Chris', 'lastname': 'Beck', 'age': '36'}]

17.5.4. Custom Header

Read data from CSV file using csv.DictReader(). While giving custom names note, that first line (typically a header) will be treated like normal data. Therefore we skip it using header = file.readline():

>>> fieldnames = ['fname', 'lname', 'age']
>>>
>>> with open('/tmp/myfile.csv', mode='rt') as file:
...     reader = csv.DictReader(file, fieldnames)
...     old_header = next(reader)
...     result = list(reader)

Result:

>>> pprint(result, sort_dicts=False)
[{'fname': 'Mark', 'lname': 'Watney', 'age': '42'},
 {'fname': 'Melissa', 'lname': 'Lewis', 'age': '41'},
 {'fname': 'Rick', 'lname': 'Martinez', 'age': '40'},
 {'fname': 'Alex', 'lname': 'Vogel', 'age': '42'},
 {'fname': 'Beth', 'lname': 'Johanssen', 'age': '29'},
 {'fname': 'Chris', 'lname': 'Beck', 'age': '36'}]

17.5.5. Use Case - 1

sepal_length,sepal_width,petal_length,petal_width,species
8,2.7,5.1,1.9,virginica
1,3.5,1.4,0.2,setosa
7,2.8,4.1,1.3,versicolor

>>> import csv
>>> from pathlib import Path
>>> from pprint import pprint
>>>
>>>
>>> DATA = """sepal_length,sepal_width,petal_length,petal_width,species
... 5.8,2.7,5.1,1.9,virginica
... 5.1,3.5,1.4,0.2,setosa
... 5.7,2.8,4.1,1.3,versicolor
... """
>>>
>>> _ = Path('/tmp/myfile.csv').write_text(DATA)
>>>
>>>
>>> def clean(row: dict) -> dict:
...     return {
...         'sepal_length': float(row['sepal_length']),
...         'sepal_width': float(row['sepal_width']),
...         'petal_length': float(row['petal_length']),
...         'petal_width': float(row['petal_width']),
...         'species': row['species']
...     }
>>>
>>>
>>> with open('/tmp/myfile.csv') as file:
...     reader = csv.DictReader(file)
...     result = map(clean, reader)
...     result = list(result)
>>>
>>> pprint(result, sort_dicts=False)
[{'sepal_length': 5.8,
  'sepal_width': 2.7,
  'petal_length': 5.1,
  'petal_width': 1.9,
  'species': 'virginica'},
 {'sepal_length': 5.1,
  'sepal_width': 3.5,
  'petal_length': 1.4,
  'petal_width': 0.2,
  'species': 'setosa'},
 {'sepal_length': 5.7,
  'sepal_width': 2.8,
  'petal_length': 4.1,
  'petal_width': 1.3,
  'species': 'versicolor'}]

17.5.6. Assignments

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: CSV DictReader Iris
# - Difficulty: easy
# - Lines: 5
# - Minutes: 5

# %% English
# 1. Define `result: list[dict]`
# 2. To `result` add data read from `FILE`
# 3. Use `csv.DictReader` to parse file
# 4. Do not convert values to `int`, leave as `str`
# 5. Run doctests - all must succeed

# %% Polish
# 1. Zdefiniuj `result: list[dict]`
# 2. Do `result` dodaj wczytane dane z pliku `FILE`
# 3. Użyj `csv.DictReader` do sparsowania pliku
# 4. Nie konwertuj wartości na `int`, pozostaw jako `str`
# 5. Uruchom doctesty - wszystkie muszą się powieść

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert all(type(x) is dict for x in result), \
'All rows in `result` should be dict'

>>> from os import remove
>>> remove(FILE)

>>> from pprint import pprint
>>> pprint(result, sort_dicts=False)
[{'firstname': 'Mark', 'lastname': 'Watney', 'age': '42'},
 {'firstname': 'Melissa', 'lastname': 'Lewis', 'age': '41'},
 {'firstname': 'Rick', 'lastname': 'Martinez', 'age': '40'},
 {'firstname': 'Alex', 'lastname': 'Vogel', 'age': '42'},
 {'firstname': 'Beth', 'lastname': 'Johanssen', 'age': '29'},
 {'firstname': 'Chris', 'lastname': 'Beck', 'age': '36'}]
"""

import csv


FILE = r'_temporary.csv'

DATA = """
firstname,lastname,age
Mark,Watney,42
Melissa,Lewis,41
Rick,Martinez,40
Alex,Vogel,42
Beth,Johanssen,29
Chris,Beck,36
"""

with open(FILE, mode='wt', encoding='utf-8') as file:
    file.write(DATA.lstrip())

# Define `result: list[dict]`
# To `result` add data read from `FILE`
# Use `csv.DictReader` to parse file
# Do not convert values to `int`, leave as `str`
# type: list[dict]
with open(FILE, mode='rt', encoding='utf-8') as file:
    result = ...

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: CSV DictReader Iris
# - Difficulty: easy
# - Lines: 5
# - Minutes: 5

# %% English
# 1. Define `result: list[dict]`
# 2. To `result` add data read from `FILE`
# 3. Use `csv.DictReader` to parse file
# 4. Convert values to `int`
# 5. Run doctests - all must succeed

# %% Polish
# 1. Zdefiniuj `result: list[dict]`
# 2. Do `result` dodaj wczytane dane z pliku `FILE`
# 3. Użyj `csv.DictReader` do sparsowania pliku
# 4. Skonwertuj wartości na `int`
# 5. Uruchom doctesty - wszystkie muszą się powieść

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert all(type(x) is dict for x in result), \
'All rows in `result` should be dict'

>>> from os import remove
>>> remove(FILE)

>>> from pprint import pprint
>>> pprint(result, sort_dicts=False)
[{'firstname': 'Mark', 'lastname': 'Watney', 'age': 42},
 {'firstname': 'Melissa', 'lastname': 'Lewis', 'age': 41},
 {'firstname': 'Rick', 'lastname': 'Martinez', 'age': 40},
 {'firstname': 'Alex', 'lastname': 'Vogel', 'age': 42},
 {'firstname': 'Beth', 'lastname': 'Johanssen', 'age': 29},
 {'firstname': 'Chris', 'lastname': 'Beck', 'age': 36}]
"""

import csv


FILE = r'_temporary.csv'

DATA = """
firstname,lastname,age
Mark,Watney,42
Melissa,Lewis,41
Rick,Martinez,40
Alex,Vogel,42
Beth,Johanssen,29
Chris,Beck,36
"""

with open(FILE, mode='wt', encoding='utf-8') as file:
    file.write(DATA.lstrip())

# Define `result: list[dict]`
# To `result` add data read from `FILE`
# Use `csv.DictReader` to parse file
# Convert values to `int`
# type: list[dict]
with open(FILE, mode='rt', encoding='utf-8') as file:
    result = ...