15.7. CSV Recap
15.7.1. Assignments
# %% About
# - Name: CSV Recap Iris
# - Difficulty: easy
# - Lines: 3
# - Minutes: 5
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Using `csv.writer()` save `DATA` to file
# 2. Use Unix `\n` line terminator
# 3. Run doctests - all must succeed
# %% Polish
# 1. Za pomocą `csv.writer()` zapisz `DATA` do pliku
# 2. Użyj zakończenia linii Unix `\n`
# 3. Uruchom doctesty - wszystkie muszą się powieść
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'
>>> result = open(FILE).read()
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is str, \
'Variable `result` has invalid type, should be str'
>>> from os import remove
>>> remove(FILE)
>>> print(result)
firstname,lastname,age
Mark,Watney,42
Melissa,Lewis,41
Rick,Martinez,40
Alex,Vogel,42
Beth,Johanssen,29
Chris,Beck,36
<BLANKLINE>
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`
# %% Imports
import csv
# %% Types
# %% Data
FILE = r'_temporary.csv'
DATA = [
{'firstname': 'Mark', 'lastname': 'Watney', 'age': 42},
{'firstname': 'Melissa', 'lastname': 'Lewis', 'age': 41},
{'firstname': 'Rick', 'lastname': 'Martinez', 'age': 40},
{'firstname': 'Alex', 'lastname': 'Vogel', 'age': 42},
{'firstname': 'Beth', 'lastname': 'Johanssen', 'age': 29},
{'firstname': 'Chris', 'lastname': 'Beck', 'age': 36},
]
# %% Result
with open(FILE, mode='wt', encoding='utf-8') as file:
...
# %% About
# - Name: CSV Recap Iris
# - Difficulty: easy
# - Lines: 3
# - Minutes: 5
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Using `csv.writer()` save `DATA` to file
# 2. Use Unix `\n` line terminator
# 3. Run doctests - all must succeed
# %% Polish
# 1. Za pomocą `csv.writer()` zapisz `DATA` do pliku
# 2. Użyj zakończenia linii Unix `\n`
# 3. Uruchom doctesty - wszystkie muszą się powieść
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'
>>> result = open(FILE).read()
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is str, \
'Variable `result` has invalid type, should be str'
>>> from os import remove
>>> remove(FILE)
>>> print(result)
firstname,lastname,age
Mark,Watney,40
Melissa,Lewis,41
Rick,Martinez,39
Alex,Vogel,42
Beth,Johanssen,29
Chris,Beck,36
<BLANKLINE>
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`
# %% Imports
import csv
# %% Types
# %% Data
FILE = r'_temporary.csv'
class User:
def __init__(self, firstname, lastname, age):
self.firstname = firstname
self.lastname = lastname
self.age = age
DATA = [
User('Mark', 'Watney', age=40),
User('Melissa', 'Lewis', age=41),
User('Rick', 'Martinez', age=39),
User('Alex', 'Vogel', age=42),
User('Beth', 'Johanssen', age=29),
User('Chris', 'Beck', age=36),
]
# %% Result
with open(FILE, mode='wt', encoding='utf-8') as file:
...
# %% About
# - Name: CSV Recap Syntax
# - Difficulty: easy
# - Lines: 4
# - Minutes: 5
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Using `csv.reader()` read data from `FILE`
# 2. Define `result: list[tuple]` with converted data
# 3. Use Unix `\n` line terminator
# 4. Convert values to proper types, ie. str, int, float
# 5. Run doctests - all must succeed
# %% Polish
# 1. Używając `csv.reader()` wczytaj dane z `FILE`
# 2. Zdefiniuj `result: list[tuple]` z przekonwertowanymi danymi
# 3. Użyj zakończenia linii Unix `\n`
# 4. Przekonwertuj wartości do odpowiednich typów, np. str, int, float
# 5. Uruchom doctesty - wszystkie muszą się powieść
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert all(type(x) is tuple for x in result), \
'All rows in `result` should be tuple'
>>> from os import remove
>>> remove(FILE)
>>> from pprint import pprint
>>> pprint(result)
[('firstname', 'lastname', 'age'),
('Mark', 'Watney', 42),
('Melissa', 'Lewis', 41),
('Rick', 'Martinez', 40),
('Alex', 'Vogel', 42),
('Beth', 'Johanssen', 29),
('Chris', 'Beck', 36)]
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`
# %% Imports
import csv
# %% Types
result: list[tuple[str|int,...]]
# %% Data
FILE = r'_temporary.csv'
DATA = """
firstname,lastname,age
Mark,Watney,42
Melissa,Lewis,41
Rick,Martinez,40
Alex,Vogel,42
Beth,Johanssen,29
Chris,Beck,36
"""
with open(FILE, mode='wt', encoding='utf-8') as file:
file.write(DATA.lstrip())
# %% Result
with open(FILE, mode='rt', encoding='utf-8') as file:
result = ...
# %% About
# - Name: CSV Recap Enumerate
# - Difficulty: medium
# - Lines: 8
# - Minutes: 8
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Using `csv.reader()` read data from `FILE`
# 2. Define `result: list[tuple]` with converted data
# 3. Use Unix `\n` line terminator
# 4. Run doctests - all must succeed
# %% Polish
# 1. Za pomocą `csv.reader()` wczytaj dane z `FILE`
# 2. Zdefiniuj `result: list[tuple]` z przekonwertowanymi danymi
# 3. Użyj zakończenia linii Unix `\n`
# 4. Uruchom doctesty - wszystkie muszą się powieść
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert all(type(x) is tuple for x in result), \
'All rows in `result` should be tuple'
>>> from os import remove
>>> remove(FILE)
>>> from pprint import pprint
>>> pprint(result)
[('Mark', 'Watney', 'staff'),
('Melissa', 'Lewis', 'admins'),
('Rick', 'Martinez', 'staff'),
('Alex', 'Vogel', 'users'),
('Beth', 'Johanssen', 'staff'),
('Chris', 'Beck', 'staff')]
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`
# %% Imports
import csv
# %% Types
result: list[tuple[str,str,str]]
# %% Data
FILE = r'_temporary.csv'
DATA = """
6,2,users,staff,admins
Mark,Watney,1
Melissa,Lewis,2
Rick,Martinez,1
Alex,Vogel,0
Beth,Johanssen,1
Chris,Beck,1
"""
with open(FILE, mode='wt', encoding='utf-8') as file:
file.write(DATA.lstrip())
# %% Result
with open(FILE, mode='rt', encoding='utf-8') as file:
result = ...
# %% About
# - Name: CSV Recap Schemaless
# - Difficulty: medium
# - Lines: 7
# - Minutes: 5
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Using `csv.DictWriter()` write variable schema data to `FILE`
# 2. `fieldnames` must be automatically generated from `DATA`
# 3. Non functional requirements:
# - All fields must be enclosed by double quote `"` character
# - Use `,` to separate columns
# - Use `utf-8` encoding
# - Use Unix `\n` line terminator
# - Sort `fieldnames` using `sorted()`
# 4. Run doctests - all must succeed
# %% Polish
# 1. Za pomocą `csv.DictWriter()` zapisz dane o zmiennej strukturze do `FILE`
# 2. `fieldnames` musi być generowane automatycznie na podstawie `DATA`
# 3. Wymagania niefunkcjonalne:
# - Wszystkie pola muszą być otoczone znakiem cudzysłowu `"`
# - Użyj `,` do oddzielenia kolumn
# - Użyj kodowania `utf-8`
# - Użyj zakończenia linii Unix `\n`
# - Posortuj `fieldnames` używając `sorted()`
# 4. Uruchom doctesty - wszystkie muszą się powieść
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'
>>> result = open(FILE).read()
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is str, \
'Variable `result` has invalid type, should be str'
>>> from os import remove
>>> remove(FILE)
>>> print(result)
"age","firstname","lastname"
"","Mark","Watney"
"41","Melissa",""
"","Rick","Martinez"
"42","","Vogel"
"29","Beth",""
"36","","Beck"
<BLANKLINE>
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`
# %% Imports
import csv
# %% Types
# %% Data
FILE = r'_temporary.csv'
DATA = [
{'firstname': 'Mark', 'lastname': 'Watney'},
{'firstname': 'Melissa', 'age': 41},
{'lastname': 'Martinez', 'firstname': 'Rick'},
{'lastname': 'Vogel', 'age': 42},
{'age': 29, 'firstname': 'Beth'},
{'age': 36, 'lastname': 'Beck', },
]
# %% Result
with open(FILE, mode='wt', encoding='utf-8') as file:
...
# %% About
# - Name: CSV Recap Iris
# - Difficulty: easy
# - Lines: 5
# - Minutes: 5
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Define `result: list[dict]`
# 2. To `result` add data read from `FILE`
# 3. Use `csv.DictReader` to parse file
# 4. Convert values to `int`
# 5. Run doctests - all must succeed
# %% Polish
# 1. Zdefiniuj `result: list[dict]`
# 2. Do `result` dodaj wczytane dane z pliku `FILE`
# 3. Użyj `csv.DictReader` do sparsowania pliku
# 4. Skonwertuj wartości na `int`
# 5. Uruchom doctesty - wszystkie muszą się powieść
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert all(type(x) is dict for x in result), \
'All rows in `result` should be dict'
>>> from os import remove
>>> remove(FILE)
>>> from pprint import pprint
>>> pprint(result, sort_dicts=False)
[{'firstname': 'Mark', 'lastname': 'Watney', 'age': 42},
{'firstname': 'Melissa', 'lastname': 'Lewis', 'age': 41},
{'firstname': 'Rick', 'lastname': 'Martinez', 'age': 40},
{'firstname': 'Alex', 'lastname': 'Vogel', 'age': 42},
{'firstname': 'Beth', 'lastname': 'Johanssen', 'age': 29},
{'firstname': 'Chris', 'lastname': 'Beck', 'age': 36}]
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`
# %% Imports
import csv
# %% Types
result: list[dict[str,str,int]]
# %% Data
FILE = r'_temporary.csv'
DATA = """
firstname,lastname,age
Mark,Watney,42
Melissa,Lewis,41
Rick,Martinez,40
Alex,Vogel,42
Beth,Johanssen,29
Chris,Beck,36
"""
with open(FILE, mode='wt', encoding='utf-8') as file:
file.write(DATA.lstrip())
# %% Result
with open(FILE, mode='rt', encoding='utf-8') as file:
result = ...
# %% About
# - Name: CSV Relations Nested
# - Difficulty: hard
# - Lines: 14
# - Minutes: 13
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Convert `DATA` to format with one column per each attribute for example:
# - `mission1_year`, `mission2_year`,
# - `mission1_name`, `mission2_name`
# 2. Note, that enumeration starts with one
# 3. Sort `fieldnames`
# 4. Save data to `FILE`
# 5. Run doctests - all must succeed
# %% Polish
# 1. Przekonwertuj `DATA` do formatu z jedną kolumną dla każdego atrybutu, np:
# - `mission1_year`, `mission2_year`,
# - `mission1_name`, `mission2_name`
# 2. Zwróć uwagę, że enumeracja zaczyna się od jeden
# 3. Posortuj `fieldnames`
# 4. Zapisz dane do `FILE`
# 5. Uruchom doctesty - wszystkie muszą się powieść
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'
>>> result = open(FILE).read()
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is str, \
'Variable `result` has invalid type, should be str'
>>> from os import remove
>>> remove(FILE)
>>> print(result)
"firstname","group1_gid","group1_name","group2_gid","group2_name","lastname"
"Mark","1","staff","","","Watney"
"Melissa","1","staff","2","admins","Lewis"
"Rick","","","","","Martinez"
<BLANKLINE>
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`
# %% Imports
import csv
# %% Types
# %% Data
FILE = r'_temporary.csv'
DATA = [
{"firstname": "Mark", "lastname": "Watney", "groups": [
{"gid": 1, "name": "staff"}]},
{"firstname": "Melissa", "lastname": "Lewis", "groups": [
{"gid": 1, "name": "staff"},
{"gid": 2, "name": "admins"}]},
{"firstname": "Rick", "lastname": "Martinez", "groups": []},
]
# %% Result
# %% About
# - Name: CSV Relations Join
# - Difficulty: hard
# - Lines: 11
# - Minutes: 13
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Using `csv.DictWriter()` save `DATA` to `FILE`
# 2. Non-functional requirements:
# - All fields must be enclosed by double quote `"` character
# - Use `,` to separate mission fields
# - Use `;` to separate missions
# - Use Unix `\n` newline
# - Sort `fieldnames` using `sorted()`
# 3. Run doctests - all must succeed
# %% Polish
# 1. Za pomocą `csv.DictWriter()` zapisz `DATA` do `FILE`
# 2. Wymagania niefunkcjonalne:
# - Wszystkie pola muszą być otoczone znakiem cudzysłowu `"`
# - Użyj `,` do oddzielania pól mission
# - Użyj `;` do oddzielenia missions
# - Użyj zakończenia linii Unix `\n`
# - Posortuj `fieldnames` używając `sorted()`
# 3. Uruchom doctesty - wszystkie muszą się powieść
# %% Hints
# - `vars(obj)`
# - Nested `for`
# - `str.join(';', sequence)`
# - `str.join(',', sequence)`
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'
>>> result = open(FILE).read()
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is str, \
'Variable `result` has invalid type, should be str'
>>> from os import remove
>>> remove(FILE)
>>> print(result)
"firstname","groups","lastname"
"Mark","1,users","Watney"
"Melissa","1,users;2,admins","Lewis"
"Rick","","Martinez"
<BLANKLINE>
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`
# %% Imports
import csv
# %% Types
# %% Data
FILE = r'_temporary.csv'
class Group:
gid: int
name: str
def __init__(self, gid, name):
self.gid = gid
self.name = name
class User:
firstname: str
lastname: str
groups: list[Group]
def __init__(self, firstname, lastname, groups=None):
self.firstname = firstname
self.lastname = lastname
self.groups = list(groups) if groups else []
DATA = [
User('Mark', 'Watney', groups=[
Group(gid=1, name='users')]),
User('Melissa', 'Lewis', groups=[
Group(gid=1, name='users'),
Group(gid=2, name='admins')]),
User('Rick', 'Martinez', groups=[]),
]
# %% Result