17.6. CSV Non-Standard

17.6.1. Ini

  • setup.cfg

  • delimiter='='

key=MP
name=MyProject
language=py
encoding=UTF-8
verbose=true
>>> delimiter = '='
>>> result = [row.split(delimiter) for row in DATA.splitlines()]  

17.6.2. Config

  • /etc/postgresql/*/main/postgresql.conf

  • delimiter=' = '

listen_addresses = 'localhost'
port = 5432
max_connections = 100
ssl = on
password_encryption = on
db_user_namespace = off
>>> delimiter = ' = '
>>> result = [row.split(delimiter) for row in DATA.splitlines()]  

17.6.3. Toml

  • pyproject.toml

  • delimiter='='

namespace_packages = false
explicit_package_bases = false
ignore_missing_imports = false
follow_imports = "normal"
follow_imports_for_stubs = false
no_site_packages = false
no_silence_site_packages = false
# Platform configuration
python_version = "3.13"
platform = "linux-64"

17.6.4. Passwd

  • /etc/passwd

  • delimiter=':'

root:x:0:0:root:/root:/bin/bash
bin:x:1:1:bin:/bin:/sbin/nologin
daemon:x:2:2:daemon:/sbin:/sbin/nologin
adm:x:3:4:adm:/var/adm:/sbin/nologin
shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown
halt:x:7:0:halt:/sbin:/sbin/halt
nobody:x:99:99:Nobody:/:/sbin/nologin
sshd:x:74:74:Privilege-separated SSH:/var/empty/sshd:/sbin/nologin
mwatney:x:1000:1000:Mark Watney:/home/mwatney:/bin/bash
mlewis:x:1001:1001:Melissa Lewis:/home/mlewis:/bin/bash
rmartinez:x:1002:1002:Rick Martinez:/home/rmartinez:/bin/bash
avogel:x:1003:1003:Alex Vogel:/home/avogel:/bin/bash
bjohanssen:x:1004:1004:Beth Johanssen:/home/bjohanssen:/bin/bash
cbeck:x:1005:1005:Chris Beck:/home/cbeck:/bin/bash
>>> delimiter = ':'
>>> result = [row.split(delimiter) for row in DATA.splitlines()]  

17.6.5. SSHd Config

  • /etc/ssh/sshd_config

  • delimiter=' '

ChrootDirectory none
ClientAliveCountMax 3
ClientAliveInterval 0
Compression delayed
MaxStartups 10:30:100
PidFile /var/run/sshd.pid
X11Forwarding no
X11UseLocalhost yes
>>> delimiter = ' '
>>> result = [row.split(delimiter) for row in DATA.splitlines()]  

17.6.6. Hosts

  • delimiter='\s+'

##
# `/etc/hosts` structure:
#   - ip: internet protocol address (IPv4 or IPv6)
#   - hosts: host names
##

127.0.0.1       localhost
127.0.0.1       astromatt
10.13.37.1      nasa.gov esa.int
255.255.255.255 broadcasthost
::1             localhost

17.6.7. Crontab

  • /etc/crontab

  • delimiter='\s+'

# [Minute] [Hour] [Day_of_the_Month] [Month_of_the_Year] [Day_of_the_Week] [command]
*/5 * * * *          /usr/bin/python3 /home/python/run-5min.py 1>/dev/null
* * * * *            /usr/bin/python3 /home/python/run-1min.py 1>/dev/null
00 * * * *           /home/python/run.py 1>/dev/null
* * * jan,may,aug *  /home/python/run.py
0 17 * * sun,fri     /home/python/run.py
0 */4 * * *          /home/python/run.py
0 4,17 * * sun,mon   /home/python/run.py

17.6.8. Key-Value

  • /etc/locate.rc

  • delimiter='='

TMPDIR="/tmp"
FCODES="/var/db/locate.database"
SEARCHPATHS="/"
PRUNEPATHS="/tmp /var/tmp"
# temp directory
TMPDIR="/tmp"

# the actual database
#FCODES="/var/db/locate.database"

# directories to be put in the database
SEARCHPATHS="/"

# directories unwanted in output
#PRUNEPATHS="/tmp /var/tmp"

17.6.9. Docker

  • .env from Docker

  • delimiter='='

DATABASE_ENGINE=postgresql
DATABASE_SERVER=localhost
DATABASE_PORT=5432
DATABASE_NAME=mydatabase
DATABASE_USERNAME=myusername
DATABASE_PASSWORD=mypassword

17.6.10. Sensors

  • delimiter=';'

Name,         Long,       Lat,        ModuleType
"ESA EAC",    50.8524881, 7.1315254,  Indoor

Date,         Time,       Temperature, Humidity, CO2, Noise, Pressure
"2000-01-01", "00:00:00", 22.6,        46,       981, 32,    1019.1
"2000-01-01", "00:05:00", 22.6,        46,       981, 31,    1019.1
"2000-01-01", "00:10:00", 22.6,        46,       968, 32,    1019.1
Name;Long;Lat;ModuleName;ModuleType
"European Astronaut Centre";50.8524881,7.1315254;;Indoor
Timestamp;"Timezone : Europe/Berlin";Temperature;Humidity;CO2;Noise;Pressure
1622498702;"2021/06/01 00:05:02";22.6;46;981;32;1019.1
1622499004;"2021/06/01 00:10:04";22.6;46;981;31;1019.1
1622499306;"2021/06/01 00:15:06";22.6;46;968;32;1019.1
1622499608;"2021/06/01 00:20:08";22.5;46;940;31;1019.1
1622499912;"2021/06/01 00:25:12";22.5;46;907;32;1019
1622500214;"2021/06/01 00:30:14";22.5;46;877;31;1019
1622500517;"2021/06/01 00:35:17";22.4;46;873;32;1019
>>> DATA= """Name;Long;Lat;ModuleName;ModuleType
... "European Astronaut Centre";50.8524881,7.1315254;;Indoor
... Timestamp;"Timezone : Europe/Berlin";Temperature;Humidity;CO2;Noise;Pressure
... 1622498702;"2021/06/01 00:05:02";22.6;46;981;32;1019.1
... 1622499004;"2021/06/01 00:10:04";22.6;46;981;31;1019.1
... 1622499306;"2021/06/01 00:15:06";22.6;46;968;32;1019.1
... 1622499608;"2021/06/01 00:20:08";22.5;46;940;31;1019.1
... 1622499912;"2021/06/01 00:25:12";22.5;46;907;32;1019
... 1622500214;"2021/06/01 00:30:14";22.5;46;877;31;1019
... 1622500517;"2021/06/01 00:35:17";22.4;46;873;32;1019"""
>>>
>>>
>>> metadata_header, metadata_values, data_header, *data_values = DATA.splitlines()
>>>
>>> metadata_header = metadata_header.split(';')
>>> metadata_values = metadata_values.split(';')
>>> data_header = data_header.split(';')
>>> data_values = [line.split(';') for line in data_values]
>>>
>>>
>>> metadata_header
['Name', 'Long', 'Lat', 'ModuleName', 'ModuleType']
>>>
>>> metadata_values
['"European Astronaut Centre"', '50.8524881,7.1315254', '', 'Indoor']
>>>
>>> data_header
['Timestamp', '"Timezone : Europe/Berlin"', 'Temperature', 'Humidity', 'CO2', 'Noise', 'Pressure']
>>>
>>> data_values  
[['1622498702', '"2021/06/01 00:05:02"', '22.6', '46', '981', '32', '1019.1'],
 ['1622499004', '"2021/06/01 00:10:04"', '22.6', '46', '981', '31', '1019.1'],
 ['1622499306', '"2021/06/01 00:15:06"', '22.6', '46', '968', '32', '1019.1'],
 ['1622499608', '"2021/06/01 00:20:08"', '22.5', '46', '940', '31', '1019.1'],
 ['1622499912', '"2021/06/01 00:25:12"', '22.5', '46', '907', '32', '1019'],
 ['1622500214', '"2021/06/01 00:30:14"', '22.5', '46', '877', '31', '1019'],
 ['1622500517', '"2021/06/01 00:35:17"', '22.4', '46', '873', '32', '1019']]

17.6.11. Assignments

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: CSV Relations Nested
# - Difficulty: medium
# - Lines: 14
# - Minutes: 13

# %% English
# 1. Convert `DATA` to format with one column per each attrbute for example:
#    - `mission1_year`, `mission2_year`,
#    - `mission1_name`, `mission2_name`
# 2. Note, that enumeration starts with one
# 3. Sort `fieldnames`
# 4. Save data to `FILE`
# 5. Run doctests - all must succeed

# %% Polish
# 1. Przekonweruj `DATA` do formatu z jedną kolumną dla każdego atrybutu, np:
#    - `mission1_year`, `mission2_year`,
#    - `mission1_name`, `mission2_name`
# 2. Zwróć uwagę, że enumeracja zaczyna się od jeden
# 3. Posortuj `fieldnames`
# 4. Zapisz dane do `FILE`
# 5. Uruchom doctesty - wszystkie muszą się powieść

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> result = open(FILE).read()
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is str, \
'Variable `result` has invalid type, should be str'

>>> from os import remove
>>> remove(FILE)

>>> print(result)
"firstname","group1_gid","group1_name","group2_gid","group2_name","lastname"
"Mark","1","staff","","","Watney"
"Melissa","1","staff","2","admins","Lewis"
"Rick","","","","","Martinez"
<BLANKLINE>
"""

import csv

FILE = r'_temporary.csv'

DATA = [
    {"firstname": "Mark", "lastname": "Watney", "groups": [
        {"gid": 1, "name": "staff"}]},

    {"firstname": "Melissa", "lastname": "Lewis", "groups": [
        {"gid": 1, "name": "staff"},
        {"gid": 2, "name": "admins"}]},

    {"firstname": "Rick", "lastname": "Martinez", "groups": []},
]

# flatten data, each mission field prefixed with mission and number
# type: list[dict]
result = ...


# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: CSV Relations Join
# - Difficulty: hard
# - Lines: 11
# - Minutes: 13

# %% English
# 1. Using `csv.DictWriter()` save `DATA` to `FILE`
# 2. Non-functional requirements:
#    - All fields must be enclosed by double quote `"` character
#    - Use `,` to separate mission fields
#    - Use `;` to separate missions
#    - Use Unix `\n` newline
#    - Sort `fieldnames` using `sorted()`
# 3. Run doctests - all must succeed

# %% Polish
# 1. Za pomocą `csv.DictWriter()` zapisz `DATA` do `FILE`
# 2. Wymagania niefunkcjonalne:
#    - Wszystkie pola muszą być otoczone znakiem cudzysłowu `"`
#    - Użyj `,` do oddzielania pól mission
#    - Użyj `;` do oddzielenia missions
#    - Użyj zakończenia linii Unix `\n`
#    - Posortuj `fieldnames` używając `sorted()`
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - `vars(obj)`
# - Nested `for`
# - `str.join(';', sequence)`
# - `str.join(',', sequence)`

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> result = open(FILE).read()
>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is str, \
'Variable `result` has invalid type, should be str'

>>> from os import remove
>>> remove(FILE)

>>> print(result)
"firstname","groups","lastname"
"Mark","1,users","Watney"
"Melissa","1,users;2,admins","Lewis"
"Rick","","Martinez"
<BLANKLINE>
"""

import csv


class Group:
    gid: int
    name: str

    def __init__(self, gid, name):
        self.gid = gid
        self.name = name


class User:
    firstname: str
    lastname: str
    groups: list[Group]

    def __init__(self, firstname, lastname, groups=None):
        self.firstname = firstname
        self.lastname = lastname
        self.groups = list(groups) if groups else []


DATA = [
    User('Mark', 'Watney', groups=[
        Group(gid=1, name='users')]),
    User('Melissa', 'Lewis', groups=[
        Group(gid=1, name='users'),
        Group(gid=2, name='admins')]),
    User('Rick', 'Martinez', groups=[]),
]

FILE = r'_temporary.csv'


# Using `csv.DictWriter()` save DATA to CSV file
# type: list[dict]
result = ...