14.5. Serialization Load

load(file) -> object

14.5.1. SetUp

>>> with open('/tmp/myfile.txt', mode='wt') as file:
...     file.write('Mark,Watney,41\n')
15

14.5.2. Problem

>>> with open('/tmp/myfile.txt', mode='rt') as file:
...     content = file.read()
>>>
>>> result = content.strip().split(',')
>>> result
['Mark', 'Watney', '41']

14.5.3. Solution

>>> with open('/tmp/myfile.txt', mode='rt') as file:
...     content = file.read()
>>>
>>> firstname, lastname, age = content.strip().split(',')
>>> result = [firstname, lastname, int(age)]
>>> result
['Mark', 'Watney', 41]

14.5.4. Case Study

from pprint import pprint


def load(filename):
    with open(filename, mode='rt') as file:
        data = file.read()
    header, *lines = data.strip().splitlines()
    header = tuple(header.split(','))
    rows = []
    for line in lines:
        *values, species = line.split(',')
        values = map(float, values)
        row = tuple(values) + (species,)
        rows.append(row)
    return [header] + rows


with open('/tmp/myfile.csv', mode='wt') as file:
    file.write("""sepal_length,sepal_width,petal_length,petal_width,species
5.8,2.7,5.1,1.9,virginica
5.1,3.5,1.4,0.2,setosa
5.7,2.8,4.1,1.3,versicolor
6.3,2.9,5.6,1.8,virginica
6.4,3.2,4.5,1.5,versicolor
4.7,3.2,1.3,0.2,setosa
7.0,3.2,4.7,1.4,versicolor
7.6,3.0,6.6,2.1,virginica
4.6,3.1,1.5,0.2,setosa""")

result = load('/tmp/myfile.csv')
pprint(result)
# [('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'),
#  (5.8, 2.7, 5.1, 1.9, 'virginica'),
#  (5.1, 3.5, 1.4, 0.2, 'setosa'),
#  (5.7, 2.8, 4.1, 1.3, 'versicolor'),
#  (6.3, 2.9, 5.6, 1.8, 'virginica'),
#  (6.4, 3.2, 4.5, 1.5, 'versicolor'),
#  (4.7, 3.2, 1.3, 0.2, 'setosa'),
#  (7.0, 3.2, 4.7, 1.4, 'versicolor'),
#  (7.6, 3.0, 6.6, 2.1, 'virginica'),
#  (4.6, 3.1, 1.5, 0.2, 'setosa')]

14.5.5. Assignments

# FIXME: Zmienić dane na Userów
# FIXME: Uspójnić zadania z pikle, json, toml
# FIXME: zdefiniuj funkcję load, która czyta z pliku ...

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: Serialization Load String
# - Difficulty: easy
# - Lines: 1
# - Minutes: 3

# %% English
# 1. Convert `DATA` to `result: list[tuple[str]]`
# 2. Do not convert numeric values to `float`, leave them as `str`
# 3. Run doctests - all must succeed

# %% Polish
# 1. Przekonwertuj `DATA` to `result: list[tuple[str]]`
# 2. Nie konwertuj wartości numerycznych do `float`, zostaw jako `str`
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - `tuple()`
# - `str.splitlines()`
# - `str.split()`

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> result = list(result)
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert all(type(x) is tuple for x in result), \
'All rows in `result` should be tuple'

>>> from pprint import pprint
>>> pprint(result)
[('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'),
 ('5.8', '2.7', '5.1', '1.9', 'virginica'),
 ('5.1', '3.5', '1.4', '0.2', 'setosa'),
 ('5.7', '2.8', '4.1', '1.3', 'versicolor')]
"""

DATA = """sepal_length,sepal_width,petal_length,petal_width,species
5.8,2.7,5.1,1.9,virginica
5.1,3.5,1.4,0.2,setosa
5.7,2.8,4.1,1.3,versicolor"""

# data from file in list[tuple] format
# type: list[tuple]
result = ...

# FIXME: Zmienić dane na Userów
# FIXME: Uspójnić zadania z pikle, json, toml
# FIXME: zdefiniuj funkcję load, która czyta z pliku ...

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: Serialization Load TypeCast
# - Difficulty: easy
# - Lines: 9
# - Minutes: 8

# %% English
# 1. Convert `DATA` to `result: list[tuple[str]]`
# 2. Convert numeric values to `float`
# 3. Run doctests - all must succeed

# %% Polish
# 1. Przekonwertuj `DATA` to `result: list[tuple[str]]`
# 2. Przekonwertuj wartości numeryczne do `float`
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - `a, *b = ...`
# - `str.splitlines()`
# - `str.split()`
# - `dict.get()`
# - `float()`
# - `tuple()`
# - `tuple() + tuple()`
# - `list.append()`

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> result = list(result)  # expand map object
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert all(type(x) is tuple for x in result), \
'All rows in `result` should be tuple'

>>> from pprint import pprint
>>> pprint(result)
[('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'),
 (5.8, 2.7, 5.1, 1.9, 'virginica'),
 (5.1, 3.5, 1.4, 0.2, 'setosa'),
 (5.7, 2.8, 4.1, 1.3, 'versicolor')]
"""

DATA = """sepal_length,sepal_width,petal_length,petal_width,species
5.8,2.7,5.1,1.9,virginica
5.1,3.5,1.4,0.2,setosa
5.7,2.8,4.1,1.3,versicolor"""

# values from file (note the list[tuple] format!)
# type: list[tuple]
result = ...

# FIXME: Wywalić, funkcja jest zbyt specyficzna tylko dla jednego usecase
# FIXME: Uspójnić zadania z pikle, json, toml
# FIXME: zdefiniuj funkcję load, która czyta z pliku ...

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: Serialization Load Switch
# - Difficulty: easy
# - Lines: 6
# - Minutes: 5

# %% English
# 1. Convert `DATA` to `result: list[tuple[str]]`
# 2. Substitute last element (label) with value from `ENCODER`
# 3. Run doctests - all must succeed

# %% Polish
# 1. Przekonwertuj `DATA` to `result: list[tuple[str]]`
# 2. Podmień ostatni element (etykietę) z wartością z `ENCODER`
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - `str.splitlines()`
# - `str.split()`
# - `dict.get()`
# - `tuple()`
# - `tuple() + tuple()`
# - `list.append()`

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> result = list(result)  # expand map object
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert all(type(x) is tuple for x in result), \
'All rows in `result` should be tuple'

>>> from pprint import pprint
>>> pprint(result)
[('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'),
 ('5.8', '2.7', '5.1', '1.9', 'virginica'),
 ('5.1', '3.5', '1.4', '0.2', 'setosa'),
 ('5.7', '2.8', '4.1', '1.3', 'versicolor')]
"""

DATA = """sepal_length,sepal_width,petal_length,petal_width,species
5.8,2.7,5.1,1.9,0
5.1,3.5,1.4,0.2,1
5.7,2.8,4.1,1.3,2"""

ENCODER = {
    '0': 'virginica',
    '1': 'setosa',
    '2': 'versicolor',
}

# data from file (note the list[tuple] format!)
# type: list[tuple]
result = ...

# FIXME: Wywalić, funkcja jest zbyt specyficzna tylko dla jednego usecase
# FIXME: Uspójnić zadania z pikle, json, toml
# FIXME: zdefiniuj funkcję load, która czyta z pliku ...

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: Serialization Load Encoder
# - Difficulty: medium
# - Lines: 10
# - Minutes: 13

# %% English
# 1. Convert `DATA` to `result: list[tuple[str]]`
# 2. Generate `ENCODER: dict[int,str]` from `header: list[str]`
# 3. Substitute last element (label) with value from `ENCODER`
# 4. Run doctests - all must succeed

# %% Polish
# 1. Przekonwertuj `DATA` to `result: list[tuple[str]]`
# 2. Wygeneruj `ENCODER: dict[int,str]` z `header: list[str]`
# 3. Podmień ostatni element (etykietę) z wartością z `ENCODER`
# 4. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - `a, *b = ...`
# - `dict(enumerate())`
# - `str.splitlines()`
# - `str.split()`
# - `dict.get()`
# - `int()`
# - `tuple()`
# - `tuple() + tuple()`
# - `list.append()`

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> result = list(result)  # expand map object
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert all(type(x) is tuple for x in result), \
'All rows in `result` should be tuple'

>>> from pprint import pprint
>>> pprint(result)
[('5.8', '2.7', '5.1', '1.9', 'virginica'),
 ('5.1', '3.5', '1.4', '0.2', 'setosa'),
 ('5.7', '2.8', '4.1', '1.3', 'versicolor')]
"""

DATA = """3,4,setosa,virginica,versicolor
5.8,2.7,5.1,1.9,1
5.1,3.5,1.4,0.2,0
5.7,2.8,4.1,1.3,2"""

# values from file (note the list[tuple] format!)
# type: list[tuple]
result = ...

# FIXME: Zmienić dane na Userów
# FIXME: Uspójnić zadania z pikle, json, toml
# FIXME: zdefiniuj funkcję load, która czyta z pliku ...

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: Serialization Load FixedHeader
# - Difficulty: easy
# - Lines: 5
# - Minutes: 5

# %% English
# 1. Convert `DATA` to `result: list[dict]`
# 2. Use `HEADER` as dict keys
# 3. Do not convert numeric values to `float`, leave them as `str`
# 4. Run doctests - all must succeed

# %% Polish
# 1. Przekonwertuj `DATA` to `result: list[dict]`
# 2. Użyj `HEADER` jako kluczy dictów
# 3. Nie konwertuj wartości numerycznychh do `float`, pozostaw je jako `str`
# 4. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - `str.splitlines()`
# - `str.split()`
# - `list.append()`

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> result = list(result)  # expand map object
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert all(type(x) is dict for x in result), \
'All rows in `result` should be dict'

>>> from pprint import pprint
>>> pprint(result)
[{'petal_length': '5.1',
  'petal_width': '1.9',
  'sepal_length': '5.8',
  'sepal_width': '2.7',
  'species': 'virginica'},
 {'petal_length': '1.4',
  'petal_width': '0.2',
  'sepal_length': '5.1',
  'sepal_width': '3.5',
  'species': 'setosa'},
 {'petal_length': '4.1',
  'petal_width': '1.3',
  'sepal_length': '5.7',
  'sepal_width': '2.8',
  'species': 'versicolor'}]
"""

DATA = """5.8,2.7,5.1,1.9,virginica
5.1,3.5,1.4,0.2,setosa
5.7,2.8,4.1,1.3,versicolor"""

HEADER = [
    'sepal_length',
    'sepal_width',
    'petal_length',
    'petal_width',
    'species',
]

# Replace keys with `HEADER`
# type: list[dict[str,str]]
result = ...

# FIXME: Zmienić dane na Userów
# FIXME: Uspójnić zadania z pikle, json, toml
# FIXME: zdefiniuj funkcję load, która czyta z pliku ...

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: Serialization Load GenerateHeader
# - Difficulty: hard
# - Lines: 7
# - Minutes: 8

# %% English
# 1. Generate `header: list[str]` from first line `DATA`
# 2. Convert `DATA` to `result: list[dict]`
# 3. Use `header` as keys
# 4. Convert numeric values to `float`
# 5. Run doctests - all must succeed

# %% Polish
# 1. Wygeneruj `header: list[str]` z pierwszej linii `DATA`
# 2. Przekonwertuj `DATA` to `result: list[dict]`
# 3. Użyj nagłówka jako kluczy
# 4. Przekonwertuj wartości numeryczne do `float`
# 5. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - `str.split()`
# - `list() + list()`
# - `list.append()`
# - `tuple()`

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> result = list(result)
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert all(type(x) is dict for x in result), \
'All rows in `result` should be dict'

>>> from pprint import pprint
>>> pprint(result)
[{'petal_length': 5.1,
  'petal_width': 1.9,
  'sepal_length': 5.8,
  'sepal_width': 2.7,
  'species': 'virginica'},
 {'petal_length': 1.4,
  'petal_width': 0.2,
  'sepal_length': 5.1,
  'sepal_width': 3.5,
  'species': 'setosa'},
 {'petal_length': 4.1,
  'petal_width': 1.3,
  'sepal_length': 5.7,
  'sepal_width': 2.8,
  'species': 'versicolor'}]
"""

DATA = """sepal_length,sepal_width,petal_length,petal_width,species
5.8,2.7,5.1,1.9,virginica
5.1,3.5,1.4,0.2,setosa
5.7,2.8,4.1,1.3,versicolor"""

# replace fieldnames with `FIELDNAMES`
# type: list[dict]
result = ...