14.5. Serialization Load
load(file) -> object
14.5.1. SetUp
>>> with open('/tmp/myfile.txt', mode='wt') as file:
... file.write('Mark,Watney,41\n')
15
14.5.2. Problem
>>> with open('/tmp/myfile.txt', mode='rt') as file:
... content = file.read()
>>>
>>> result = content.strip().split(',')
>>> result
['Mark', 'Watney', '41']
14.5.3. Solution
>>> with open('/tmp/myfile.txt', mode='rt') as file:
... content = file.read()
>>>
>>> firstname, lastname, age = content.strip().split(',')
>>> result = [firstname, lastname, int(age)]
>>> result
['Mark', 'Watney', 41]
14.5.4. Case Study
from pprint import pprint
def load(filename):
with open(filename, mode='rt') as file:
data = file.read()
header, *lines = data.strip().splitlines()
header = tuple(header.split(','))
rows = []
for line in lines:
*values, species = line.split(',')
values = map(float, values)
row = tuple(values) + (species,)
rows.append(row)
return [header] + rows
with open('/tmp/myfile.csv', mode='wt') as file:
file.write("""sepal_length,sepal_width,petal_length,petal_width,species
5.8,2.7,5.1,1.9,virginica
5.1,3.5,1.4,0.2,setosa
5.7,2.8,4.1,1.3,versicolor
6.3,2.9,5.6,1.8,virginica
6.4,3.2,4.5,1.5,versicolor
4.7,3.2,1.3,0.2,setosa
7.0,3.2,4.7,1.4,versicolor
7.6,3.0,6.6,2.1,virginica
4.6,3.1,1.5,0.2,setosa""")
result = load('/tmp/myfile.csv')
pprint(result)
# [('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'),
# (5.8, 2.7, 5.1, 1.9, 'virginica'),
# (5.1, 3.5, 1.4, 0.2, 'setosa'),
# (5.7, 2.8, 4.1, 1.3, 'versicolor'),
# (6.3, 2.9, 5.6, 1.8, 'virginica'),
# (6.4, 3.2, 4.5, 1.5, 'versicolor'),
# (4.7, 3.2, 1.3, 0.2, 'setosa'),
# (7.0, 3.2, 4.7, 1.4, 'versicolor'),
# (7.6, 3.0, 6.6, 2.1, 'virginica'),
# (4.6, 3.1, 1.5, 0.2, 'setosa')]
14.5.5. Assignments
# FIXME: Zmienić dane na Userów
# FIXME: Uspójnić zadania z pikle, json, toml
# FIXME: zdefiniuj funkcję load, która czyta z pliku ...
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`
# %% About
# - Name: Serialization Load String
# - Difficulty: easy
# - Lines: 1
# - Minutes: 3
# %% English
# 1. Convert `DATA` to `result: list[tuple[str]]`
# 2. Do not convert numeric values to `float`, leave them as `str`
# 3. Run doctests - all must succeed
# %% Polish
# 1. Przekonwertuj `DATA` to `result: list[tuple[str]]`
# 2. Nie konwertuj wartości numerycznych do `float`, zostaw jako `str`
# 3. Uruchom doctesty - wszystkie muszą się powieść
# %% Hints
# - `tuple()`
# - `str.splitlines()`
# - `str.split()`
# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> result = list(result)
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert all(type(x) is tuple for x in result), \
'All rows in `result` should be tuple'
>>> from pprint import pprint
>>> pprint(result)
[('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'),
('5.8', '2.7', '5.1', '1.9', 'virginica'),
('5.1', '3.5', '1.4', '0.2', 'setosa'),
('5.7', '2.8', '4.1', '1.3', 'versicolor')]
"""
DATA = """sepal_length,sepal_width,petal_length,petal_width,species
5.8,2.7,5.1,1.9,virginica
5.1,3.5,1.4,0.2,setosa
5.7,2.8,4.1,1.3,versicolor"""
# data from file in list[tuple] format
# type: list[tuple]
result = ...
# FIXME: Zmienić dane na Userów
# FIXME: Uspójnić zadania z pikle, json, toml
# FIXME: zdefiniuj funkcję load, która czyta z pliku ...
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`
# %% About
# - Name: Serialization Load TypeCast
# - Difficulty: easy
# - Lines: 9
# - Minutes: 8
# %% English
# 1. Convert `DATA` to `result: list[tuple[str]]`
# 2. Convert numeric values to `float`
# 3. Run doctests - all must succeed
# %% Polish
# 1. Przekonwertuj `DATA` to `result: list[tuple[str]]`
# 2. Przekonwertuj wartości numeryczne do `float`
# 3. Uruchom doctesty - wszystkie muszą się powieść
# %% Hints
# - `a, *b = ...`
# - `str.splitlines()`
# - `str.split()`
# - `dict.get()`
# - `float()`
# - `tuple()`
# - `tuple() + tuple()`
# - `list.append()`
# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> result = list(result) # expand map object
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert all(type(x) is tuple for x in result), \
'All rows in `result` should be tuple'
>>> from pprint import pprint
>>> pprint(result)
[('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'),
(5.8, 2.7, 5.1, 1.9, 'virginica'),
(5.1, 3.5, 1.4, 0.2, 'setosa'),
(5.7, 2.8, 4.1, 1.3, 'versicolor')]
"""
DATA = """sepal_length,sepal_width,petal_length,petal_width,species
5.8,2.7,5.1,1.9,virginica
5.1,3.5,1.4,0.2,setosa
5.7,2.8,4.1,1.3,versicolor"""
# values from file (note the list[tuple] format!)
# type: list[tuple]
result = ...
# FIXME: Wywalić, funkcja jest zbyt specyficzna tylko dla jednego usecase
# FIXME: Uspójnić zadania z pikle, json, toml
# FIXME: zdefiniuj funkcję load, która czyta z pliku ...
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`
# %% About
# - Name: Serialization Load Switch
# - Difficulty: easy
# - Lines: 6
# - Minutes: 5
# %% English
# 1. Convert `DATA` to `result: list[tuple[str]]`
# 2. Substitute last element (label) with value from `ENCODER`
# 3. Run doctests - all must succeed
# %% Polish
# 1. Przekonwertuj `DATA` to `result: list[tuple[str]]`
# 2. Podmień ostatni element (etykietę) z wartością z `ENCODER`
# 3. Uruchom doctesty - wszystkie muszą się powieść
# %% Hints
# - `str.splitlines()`
# - `str.split()`
# - `dict.get()`
# - `tuple()`
# - `tuple() + tuple()`
# - `list.append()`
# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> result = list(result) # expand map object
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert all(type(x) is tuple for x in result), \
'All rows in `result` should be tuple'
>>> from pprint import pprint
>>> pprint(result)
[('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'),
('5.8', '2.7', '5.1', '1.9', 'virginica'),
('5.1', '3.5', '1.4', '0.2', 'setosa'),
('5.7', '2.8', '4.1', '1.3', 'versicolor')]
"""
DATA = """sepal_length,sepal_width,petal_length,petal_width,species
5.8,2.7,5.1,1.9,0
5.1,3.5,1.4,0.2,1
5.7,2.8,4.1,1.3,2"""
ENCODER = {
'0': 'virginica',
'1': 'setosa',
'2': 'versicolor',
}
# data from file (note the list[tuple] format!)
# type: list[tuple]
result = ...
# FIXME: Wywalić, funkcja jest zbyt specyficzna tylko dla jednego usecase
# FIXME: Uspójnić zadania z pikle, json, toml
# FIXME: zdefiniuj funkcję load, która czyta z pliku ...
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`
# %% About
# - Name: Serialization Load Encoder
# - Difficulty: medium
# - Lines: 10
# - Minutes: 13
# %% English
# 1. Convert `DATA` to `result: list[tuple[str]]`
# 2. Generate `ENCODER: dict[int,str]` from `header: list[str]`
# 3. Substitute last element (label) with value from `ENCODER`
# 4. Run doctests - all must succeed
# %% Polish
# 1. Przekonwertuj `DATA` to `result: list[tuple[str]]`
# 2. Wygeneruj `ENCODER: dict[int,str]` z `header: list[str]`
# 3. Podmień ostatni element (etykietę) z wartością z `ENCODER`
# 4. Uruchom doctesty - wszystkie muszą się powieść
# %% Hints
# - `a, *b = ...`
# - `dict(enumerate())`
# - `str.splitlines()`
# - `str.split()`
# - `dict.get()`
# - `int()`
# - `tuple()`
# - `tuple() + tuple()`
# - `list.append()`
# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> result = list(result) # expand map object
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert all(type(x) is tuple for x in result), \
'All rows in `result` should be tuple'
>>> from pprint import pprint
>>> pprint(result)
[('5.8', '2.7', '5.1', '1.9', 'virginica'),
('5.1', '3.5', '1.4', '0.2', 'setosa'),
('5.7', '2.8', '4.1', '1.3', 'versicolor')]
"""
DATA = """3,4,setosa,virginica,versicolor
5.8,2.7,5.1,1.9,1
5.1,3.5,1.4,0.2,0
5.7,2.8,4.1,1.3,2"""
# values from file (note the list[tuple] format!)
# type: list[tuple]
result = ...
# FIXME: Zmienić dane na Userów
# FIXME: Uspójnić zadania z pikle, json, toml
# FIXME: zdefiniuj funkcję load, która czyta z pliku ...
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`
# %% About
# - Name: Serialization Load FixedHeader
# - Difficulty: easy
# - Lines: 5
# - Minutes: 5
# %% English
# 1. Convert `DATA` to `result: list[dict]`
# 2. Use `HEADER` as dict keys
# 3. Do not convert numeric values to `float`, leave them as `str`
# 4. Run doctests - all must succeed
# %% Polish
# 1. Przekonwertuj `DATA` to `result: list[dict]`
# 2. Użyj `HEADER` jako kluczy dictów
# 3. Nie konwertuj wartości numerycznychh do `float`, pozostaw je jako `str`
# 4. Uruchom doctesty - wszystkie muszą się powieść
# %% Hints
# - `str.splitlines()`
# - `str.split()`
# - `list.append()`
# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> result = list(result) # expand map object
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert all(type(x) is dict for x in result), \
'All rows in `result` should be dict'
>>> from pprint import pprint
>>> pprint(result)
[{'petal_length': '5.1',
'petal_width': '1.9',
'sepal_length': '5.8',
'sepal_width': '2.7',
'species': 'virginica'},
{'petal_length': '1.4',
'petal_width': '0.2',
'sepal_length': '5.1',
'sepal_width': '3.5',
'species': 'setosa'},
{'petal_length': '4.1',
'petal_width': '1.3',
'sepal_length': '5.7',
'sepal_width': '2.8',
'species': 'versicolor'}]
"""
DATA = """5.8,2.7,5.1,1.9,virginica
5.1,3.5,1.4,0.2,setosa
5.7,2.8,4.1,1.3,versicolor"""
HEADER = [
'sepal_length',
'sepal_width',
'petal_length',
'petal_width',
'species',
]
# Replace keys with `HEADER`
# type: list[dict[str,str]]
result = ...
# FIXME: Zmienić dane na Userów
# FIXME: Uspójnić zadania z pikle, json, toml
# FIXME: zdefiniuj funkcję load, która czyta z pliku ...
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`
# %% About
# - Name: Serialization Load GenerateHeader
# - Difficulty: hard
# - Lines: 7
# - Minutes: 8
# %% English
# 1. Generate `header: list[str]` from first line `DATA`
# 2. Convert `DATA` to `result: list[dict]`
# 3. Use `header` as keys
# 4. Convert numeric values to `float`
# 5. Run doctests - all must succeed
# %% Polish
# 1. Wygeneruj `header: list[str]` z pierwszej linii `DATA`
# 2. Przekonwertuj `DATA` to `result: list[dict]`
# 3. Użyj nagłówka jako kluczy
# 4. Przekonwertuj wartości numeryczne do `float`
# 5. Uruchom doctesty - wszystkie muszą się powieść
# %% Hints
# - `str.split()`
# - `list() + list()`
# - `list.append()`
# - `tuple()`
# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> result = list(result)
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert all(type(x) is dict for x in result), \
'All rows in `result` should be dict'
>>> from pprint import pprint
>>> pprint(result)
[{'petal_length': 5.1,
'petal_width': 1.9,
'sepal_length': 5.8,
'sepal_width': 2.7,
'species': 'virginica'},
{'petal_length': 1.4,
'petal_width': 0.2,
'sepal_length': 5.1,
'sepal_width': 3.5,
'species': 'setosa'},
{'petal_length': 4.1,
'petal_width': 1.3,
'sepal_length': 5.7,
'sepal_width': 2.8,
'species': 'versicolor'}]
"""
DATA = """sepal_length,sepal_width,petal_length,petal_width,species
5.8,2.7,5.1,1.9,virginica
5.1,3.5,1.4,0.2,setosa
5.7,2.8,4.1,1.3,versicolor"""
# replace fieldnames with `FIELDNAMES`
# type: list[dict]
result = ...