14.3. Serialization Dump

  • dump(object) -> file

14.3.1. Problem

>>> DATA = ['Mark', 'Watney', 41]
>>> data = str(DATA)
>>>
>>> with open('/tmp/myfile.txt', mode='wt') as file:
...     file.write(data)
22
>>> open('/tmp/myfile.txt').read()
"['Mark', 'Watney', 41]"

14.3.2. Solution

>>> DATA = ['Mark', 'Watney', 41]
>>> data = ','.join(map(str,DATA)) + '\n'
>>>
>>> with open('/tmp/myfile.txt', mode='wt') as file:
...     file.write(data)
15
>>> open('/tmp/myfile.txt').read()
'Mark,Watney,41\n'

14.3.3. Case Study

def dump(data, file):
    values = [','.join(map(str, row)) for row in data]
    result = '\n'.join(values) + '\n'
    with open(file, mode='wt') as file:
        file.write(result)


DATA = [
    ('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'),
    (5.8, 2.7, 5.1, 1.9, 'virginica'),
    (5.1, 3.5, 1.4, 0.2, 'setosa'),
    (5.7, 2.8, 4.1, 1.3, 'versicolor'),
    (6.3, 2.9, 5.6, 1.8, 'virginica'),
    (6.4, 3.2, 4.5, 1.5, 'versicolor'),
    (4.7, 3.2, 1.3, 0.2, 'setosa'),
    (7.0, 3.2, 4.7, 1.4, 'versicolor'),
    (7.6, 3.0, 6.6, 2.1, 'virginica'),
    (4.6, 3.1, 1.5, 0.2, 'setosa'),
]

dump(DATA, file='/tmp/myfile.csv')


result = open('/tmp/myfile.csv').read()
print(result)
# sepal_length,sepal_width,petal_length,petal_width,species
# 5.8,2.7,5.1,1.9,virginica
# 5.1,3.5,1.4,0.2,setosa
# 5.7,2.8,4.1,1.3,versicolor
# 6.3,2.9,5.6,1.8,virginica
# 6.4,3.2,4.5,1.5,versicolor
# 4.7,3.2,1.3,0.2,setosa
# 7.0,3.2,4.7,1.4,versicolor
# 7.6,3.0,6.6,2.1,virginica
# 4.6,3.1,1.5,0.2,setosa

14.3.4. Assignments

# FIXME: Uspójnić zadania z pikle, json, toml

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: Serialization Dump ListDict
# - Difficulty: easy
# - Lines: 3
# - Minutes: 3

# %% English
# 1. Define function `dump()`:
#    - Argument: `data: list[dict]`, `file: str`
#    - Returns: `None`
#    - Function writes `data` to `file` in CSV format
#    - Add quotes to values
# 2. Non-functional requirements:
#    - Do not use `import` and any module
#    - Quotechar: `"`
#    - Quoting: always
#    - Delimiter: `,`
#    - Lineseparator: `\n`
#    - File must end with an empty line
# 3. Run doctests - all must succeed

# %% Polish
# 1. Zdefiniuj funkcję `dump()`:
#    - Argument: `data: list[dict]`, `file: str`
#    - Zwraca: `None`
#    - Funkcja zapisuje `data` do `file` w formacie CSV
#    - Dodaj cudzysłowia do wartości
# 2. Wymagania niefunkcjonalne:
#    - Nie używaj `import` ani żadnych modułów
#    - Quotechar: `"`
#    - Quoting: zawsze
#    - Delimiter: `,`
#    - Lineseparator: `\n`
#    - Plik musi kończyć się pustą linią
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - `tuple()`
# - `dict.keys()`
# - `dict.values()`
# - `list.append()`
# - `list.extend()`
# - `str.join()`

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> dump(DATA, file=FILE)
>>> result = open(FILE).read()

>>> from os import remove
>>> remove(FILE)

>>> assert type(result) is str, \
'Variable `result` has invalid type, should be str'
>>> assert result != '', \
'File content is empty'

>>> print(result)
"firstname","lastname","age"
"Mark","Watney","41"
"Melissa","Lewis","40"
"Rick","Martinez","39"
"Alex","Vogel","40"
"Chris","Beck","36"
"Beth","Johanssen","29"
<BLANKLINE>
"""

FILE = '_temporary.csv'

DATA = [
    {'firstname': 'Mark', 'lastname': 'Watney', 'age': 41},
    {'firstname': 'Melissa', 'lastname': 'Lewis', 'age': 40},
    {'firstname': 'Rick', 'lastname': 'Martinez', 'age': 39},
    {'firstname': 'Alex', 'lastname': 'Vogel', 'age': 40},
    {'firstname': 'Chris', 'lastname': 'Beck', 'age': 36},
    {'firstname': 'Beth', 'lastname': 'Johanssen', 'age': 29},
]


def ascsv(sequence):
    return ','.join(f'"{x}"' for x in sequence)

def dumps(data):
    header = [ascsv(data[0].keys())]
    rows = [ascsv(row.values()) for row in data]
    return '\n'.join(header+rows)


# Define function `dump()`:
# - Argument: `data: list[dict]`, `file: str`
# - Returns: `None`
# - Function writes `data` to `file` in CSV format
# - Add quotes to values
# type: Callable[[list[dict]], None]
def dump(data, file):
    ...


# FIXME: Uspójnić zadania z pikle, json, toml

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: Serialization Dump ListObject
# - Difficulty: easy
# - Lines: 3
# - Minutes: 3

# %% English
# 1. Define function `dump()`:
#    - Argument: `data: list[object]`, `file: str`
#    - Returns: `None`
#    - Function writes `data` to `file` in CSV format
#    - Add quotes to values
# 2. Non-functional requirements:
#    - Do not use `import` and any module
#    - Quotechar: `"`
#    - Quoting: always
#    - Delimiter: `,`
#    - Lineseparator: `\n`
#    - File must end with an empty line
# 3. Run doctests - all must succeed

# %% Polish
# 1. Zdefiniuj funkcję `dump()`:
#    - Argument: `data: list[object]`, `file: str`
#    - Zwraca: `None`
#    - Funkcja zapisuje `data` do `file` w formacie CSV
#    - Dodaj cudzysłowia do wartości
# 2. Wymagania niefunkcjonalne:
#    - Nie używaj `import` ani żadnych modułów
#    - Quotechar: `"`
#    - Quoting: zawsze
#    - Delimiter: `,`
#    - Lineseparator: `\n`
#    - Plik musi kończyć się pustą linią
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - `vars()`
# - `tuple()`
# - `dict.keys()`
# - `dict.values()`
# - `list.append()`
# - `list.extend()`
# - `str.join()`

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> dump(DATA, file=FILE)
>>> result = open(FILE).read()

>>> from os import remove
>>> remove(FILE)

>>> assert type(result) is str, \
'Variable `result` has invalid type, should be str'
>>> assert result != '', \
'File content is empty'

>>> print(result)
"firstname","lastname","age"
"Mark","Watney","41"
"Melissa","Lewis","40"
"Rick","Martinez","39"
"Alex","Vogel","40"
"Chris","Beck","36"
"Beth","Johanssen","29"
<BLANKLINE>
"""

FILE = '_temporary.csv'

class User:
    def __init__(self, firstname, lastname, age):
        self.firstname = firstname
        self.lastname = lastname
        self.age = age

    def __repr__(self):
        clsname = self.__class__.__name__
        firstname = self.firstname
        lastname = self.lastname
        age = self.age
        return f'{clsname}({firstname=}, {lastname=}, {age=})'


DATA = [
    User('Mark', 'Watney', age=41),
    User('Melissa', 'Lewis', age=40),
    User('Rick', 'Martinez', age=39),
    User('Alex', 'Vogel', age=40),
    User('Chris', 'Beck', age=36),
    User('Beth', 'Johanssen', age=29),
]


def ascsv(sequence):
    return ','.join(f'"{x}"' for x in sequence)

def dumps(data):
    header = [ascsv(vars(data[0]).keys())]
    rows = [ascsv(vars(row).values()) for row in data]
    return '\n'.join(header+rows)


# Define function `dump()`:
# - Argument: `data: list[object]`, `file: str`
# - Returns: `None`
# - Function writes `data` to `file` in CSV format
# - Add quotes to values
# type: Callable[[list[object]], None]
def dump(data, file):
    ...


# FIXME: Zamienić na dane z Userami
# FIXME: Uspójnić zadania z pikle, json, toml

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: Serialization Dump FixedSchema
# - Difficulty: hard
# - Lines: 7
# - Minutes: 13

# %% English
# 1. Define function `dump()`:
#    - Argument: `data: list[tuple]`, `file: str`
#    - Returns: `None`
#    - Function writes `data` to `file` in CSV format
#    - Add quotes to values
# 2. Sort header
# 3. Non-functional requirements:
#    - Do not use `import` and any module
#    - Quotechar: `"`
#    - Quoting: always
#    - Delimiter: `,`
#    - Lineseparator: `\n`
#    - Sort `fieldnames`
# 4. Run doctests - all must succeed

# %% Polish
# 1. Zdefiniuj funkcję `dump()`:
#    - Argument: `data: list[tuple]`, `file: str`
#    - Zwraca: `None`
#    - Funkcja zapisuje `data` do `file` w formacie CSV
#    - Dodaj cudzysłowia do wartości
# 2. Posortuj header
# 3. Wymagania niefunkcjonalne:
#    - Nie używaj `import` ani żadnych modułów
#    - Quotechar: `"`
#    - Quoting: zawsze
#    - Delimiter: `,`
#    - Lineseparator: `\n`
#    - Posortuj `fieldnames`
# 4. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - `sorted()`
# - `str.join()`
# - `dict.get(..., default)`

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> dump(DATA, file=FILE)
>>> result = open(FILE).read()

>>> from os import remove
>>> remove(FILE)

>>> assert type(result) is str, \
'Variable `result` has invalid type, should be str'
>>> assert result != '', \
'File content is empty'

>>> print(result)
"petal_length","petal_width","sepal_length","sepal_width","species"
"1.4","0.2","5.1","3.5","setosa"
"5.1","1.9","5.8","2.7","virginica"
"1.4","0.2","5.1","3.5","setosa"
"4.1","1.3","5.7","2.8","versicolor"
"5.6","1.8","6.3","2.9","virginica"
"4.5","1.5","6.4","3.2","versicolor"
<BLANKLINE>
"""

FILE = '_temporary.csv'

DATA = [
    {'sepal_length': 5.1, 'sepal_width': 3.5, 'petal_length': 1.4, 'petal_width': 0.2, 'species': 'setosa'},
    {'sepal_length': 5.8, 'sepal_width': 2.7, 'petal_length': 5.1, 'petal_width': 1.9, 'species': 'virginica'},
    {'sepal_length': 5.1, 'sepal_width': 3.5, 'petal_length': 1.4, 'petal_width': 0.2, 'species': 'setosa'},
    {'sepal_length': 5.7, 'sepal_width': 2.8, 'petal_length': 4.1, 'petal_width': 1.3, 'species': 'versicolor'},
    {'sepal_length': 6.3, 'sepal_width': 2.9, 'petal_length': 5.6, 'petal_width': 1.8, 'species': 'virginica'},
    {'sepal_length': 6.4, 'sepal_width': 3.2, 'petal_length': 4.5, 'petal_width': 1.5, 'species': 'versicolor'},
]

# Define function `dump()`:
# - Argument: `data: list[tuple]`, `file: str`
# - Returns: `None`
# - Function writes `data` to `file` in CSV format
# - Add quotes to values
# Sort header (important!)
# type: Callable[[list[tuple]], None]
def dump(data, file):
    ...


# FIXME: Zamienić na dane z Userami
# FIXME: Uspójnić zadania z pikle, json, toml

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: Serialization Dump Schemaless
# - Difficulty: hard
# - Lines: 7
# - Minutes: 13

# %% English
# 1. Define function `dump()`:
#    - Argument: `data: list[tuple]`, `file: str`
#    - Returns: `None`
#    - Function writes `data` to `file` in CSV format
#    - Add quotes to values
# 2. Sort header
# 3. Non-functional requirements:
#    - Do not use `import` and any module
#    - Quotechar: `"`
#    - Quoting: always
#    - Delimiter: `,`
#    - Lineseparator: `\n`
#    - Sort `fieldnames`
# 4. Run doctests - all must succeed

# %% Polish
# 1. Zdefiniuj funkcję `dump()`:
#    - Argument: `data: list[tuple]`, `file: str`
#    - Zwraca: `None`
#    - Funkcja zapisuje `data` do `file` w formacie CSV
#    - Dodaj cudzysłowia do wartości
# 2. Posortuj header
# 3. Wymagania niefunkcjonalne:
#    - Nie używaj `import` ani żadnych modułów
#    - Quotechar: `"`
#    - Quoting: zawsze
#    - Delimiter: `,`
#    - Lineseparator: `\n`
#    - Posortuj `fieldnames`
# 4. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - `sorted()`
# - `set()`
# - `set.update()`
# - `str.join()`
# - `dict.get(..., default)`

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> dump(DATA, file=FILE)
>>> result = open(FILE).read()

>>> from os import remove
>>> remove(FILE)

>>> assert type(result) is str, \
'Variable `result` has invalid type, should be str'
>>> assert result != '', \
'File content is empty'

>>> print(result)
"petal_length","petal_width","sepal_length","sepal_width","species"
"","","5.1","3.5","setosa"
"4.1","1.3","","","versicolor"
"","1.8","6.3","","virginica"
"","0.2","5.0","","setosa"
"4.1","","","2.8","versicolor"
"","1.8","","2.9","virginica"
<BLANKLINE>
"""

FILE = '_temporary.csv'

DATA = [
    {'sepal_length': 5.1, 'sepal_width': 3.5, 'species': 'setosa'},
    {'petal_length': 4.1, 'petal_width': 1.3, 'species': 'versicolor'},
    {'sepal_length': 6.3, 'petal_width': 1.8, 'species': 'virginica'},
    {'sepal_length': 5.0, 'petal_width': 0.2, 'species': 'setosa'},
    {'sepal_width': 2.8, 'petal_length': 4.1, 'species': 'versicolor'},
    {'sepal_width': 2.9, 'petal_width': 1.8, 'species': 'virginica'},
]

# Define function `dump()`:
# - Argument: `data: list[tuple]`, `file: str`
# - Returns: `None`
# - Function writes `data` to `file` in CSV format
# - Add quotes to values
# Sort header
# type: Callable[[list[tuple]], str]
def dump(data, file):
    ...