11.6. For Recap

11.6.1. Assignments

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: For Recap Sentences
# - Difficulty: medium
# - Lines: 15
# - Minutes: 13

# %% English
# 1. Given is text of the "Moon Speech"
#    by John F. Kennedy's [1]
# 2. Sentences are separated by period (`.`)
# 3. Clean each sentence from whitespaces at the beginning and at the end
# 4. Words are separated by spaces
# 5. Print the total number in whole text:
#    - sentences
#    - words
#    - letters
#    - characters (including spaces inside sentences, but not comas `,`)
#    - commas (`,`)
#    - adverbs (words ending with "ly")
# 6. Run doctests - all must succeed

# %% Polish
# 1. Dany jest tekst przemówienia "Moon Speech" wygłoszonej
#    przez John F. Kennedy'ego [1]
# 2. Zdania oddzielone są kropkami (`.`)
# 3. Każde zdanie oczyść z białych znaków na początku i końcu
# 4. Słowa oddzielone są spacjami
# 5. Wypisz także ile jest łącznie w całym tekście:
#    - zdań
#    - słów
#    - liter
#    - znaków (łącznie ze spacjami wewnątrz zdań, ale bez przecinków `,`)
#    - przecinków (`,`)
#    - przysłówków (słów zakończonych na "ly")
# 6. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - `str.split()`
# - `str.strip()`
# - `str.replace()`
# - `str.count()`
# - `str.endswith()`
# - `list()`
# - `len()`

# %% References
# [1] Kennedy, J.F. Moon Speech - Rice Stadium.
#     Year: 1962.
#     Retrieved: 2021-03-06.
#     URL: http://er.jsc.nasa.gov/seh/ricetalk.htm

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> from pprint import pprint

>>> assert result is not Ellipsis, \
'Assign your result to variable `result`'
>>> assert type(result) is dict, \
'Variable `result` has invalid type, should be dict'

>>> pprint(result)
{'adverbs': 0,
 'characters': 347,
 'commas': 1,
 'letters': 283,
 'sentences': 7,
 'words': 71}
"""

TEXT = """
    We choose to go to the Moon.
    We choose to go to the Moon in this decade and do the other things.
    Not because they are easy, but because they are hard.
    Because that goal will serve to organize and measure the best of our energies and skills.
    Because that challenge is one that we are willing to accept.
    One we are unwilling to postpone.
    And one we intend to win
"""

# Number of occurrences of each grammar object
# type: dict[str,int]
result = {
    'adverbs': 0,
    'characters': 0,
    'commas': 0,
    'letters': 0,
    'sentences': 0,
    'words': 0,
}

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: For Recap Split
# - Difficulty: medium
# - Lines: 5
# - Minutes: 5

# %% English
# 1. Define:
#    - `features: list[tuple]` - list of values
#    - `labels: list[str]` - species
# 2. Separate header from data
# 3. For each row append to `features`, `labels`
# 4. Run doctests - all must succeed

# %% Polish
# 1. Zdefiniuj:
#    - `features: list[tuple]` - lista wartości
#    - `labels: list[str]` - gatunki
# 2. Odseparuj nagłówek od danych
# 3. Dla każdego wiersza dodawaj do `feature`, `labels`
# 4. Uruchom doctesty - wszystkie muszą się powieść

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> from pprint import pprint

>>> assert type(features) is list
>>> assert type(labels) is list
>>> assert all(type(x) is tuple for x in features)
>>> assert all(type(x) is str for x in labels)

>>> pprint(features)
[(5.8, 2.7, 5.1, 1.9),
 (5.1, 3.5, 1.4, 0.2),
 (5.7, 2.8, 4.1, 1.3),
 (6.3, 2.9, 5.6, 1.8),
 (6.4, 3.2, 4.5, 1.5),
 (4.7, 3.2, 1.3, 0.2)]

>>> pprint(labels)
['virginica', 'setosa', 'versicolor', 'virginica', 'versicolor', 'setosa']
"""

DATA = [
    ('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'),
    (5.8, 2.7, 5.1, 1.9, 'virginica'),
    (5.1, 3.5, 1.4, 0.2, 'setosa'),
    (5.7, 2.8, 4.1, 1.3, 'versicolor'),
    (6.3, 2.9, 5.6, 1.8, 'virginica'),
    (6.4, 3.2, 4.5, 1.5, 'versicolor'),
    (4.7, 3.2, 1.3, 0.2, 'setosa'),
]

header = DATA[0]
rows = DATA[1:]

# Values from column 0-4 (right exclusive) from DATA without header
# type: list[tuple]
features = ...

# species name from column 4 from DATA without header
# type: list[str]
labels = ...

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: For Recap ToListTuple
# - Difficulty: medium
# - Lines: 8
# - Minutes: 8

# %% English
# 1. Load `DATA` from JSON format
# 2. Convert data to `result: list[tuple]`
# 3. Add header as a first line
# 4. Run doctests - all must succeed

# %% Polish
# 1. Wczytaj `DATA` z formatu JSON
# 2. Przekonwertuj dane do `result: list[tuple]`
# 3. Dodaj nagłówek jako pierwszą linię
# 4. Uruchom doctesty - wszystkie muszą się powieść

# %% Why
# - Convert data from `list[dict]` to `list[tuple]`
# - `list[dict]` is used to represent JSON data
# - `list[tuple]` is used to represent CSV data
# - `list[tuple]` is used to represent database rows
# - JSON is the most popular format in web development

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> from pprint import pprint

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> result = list(result)
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert len(result) > 0, \
'Variable `result` should not be empty'
>>> assert all(type(row) is tuple for row in result), \
'Variable `result` should be a list[tuple]'

>>> pprint(result)
[('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'),
 (5.8, 2.7, 5.1, 1.9, 'virginica'),
 (5.1, 3.5, 1.4, 0.2, 'setosa'),
 (5.7, 2.8, 4.1, 1.3, 'versicolor'),
 (6.3, 2.9, 5.6, 1.8, 'virginica'),
 (6.4, 3.2, 4.5, 1.5, 'versicolor'),
 (4.7, 3.2, 1.3, 0.2, 'setosa')]
"""

DATA = [
    {'sepal_length': 5.8, 'sepal_width': 2.7, 'petal_length': 5.1, 'petal_width': 1.9, 'species': 'virginica'},
    {'sepal_length': 5.1, 'sepal_width': 3.5, 'petal_length': 1.4, 'petal_width': 0.2, 'species': 'setosa'},
    {'sepal_length': 5.7, 'sepal_width': 2.8, 'petal_length': 4.1, 'petal_width': 1.3, 'species': 'versicolor'},
    {'sepal_length': 6.3, 'sepal_width': 2.9, 'petal_length': 5.6, 'petal_width': 1.8, 'species': 'virginica'},
    {'sepal_length': 6.4, 'sepal_width': 3.2, 'petal_length': 4.5, 'petal_width': 1.5, 'species': 'versicolor'},
    {'sepal_length': 4.7, 'sepal_width': 3.2, 'petal_length': 1.3, 'petal_width': 0.2, 'species': 'setosa'},
]

# Define `result` with `DATA` converted from `list[dict]` to `list[tuple]`
# Add header as a first line
# type: header = tuple[str,...]
# type: row = tuple[float,float,float,float,str]
# type: list[tuple[header|row,...]]
result = ...

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: For Recap ToListDict
# - Difficulty: medium
# - Lines: 11
# - Minutes: 8

# %% English
# 1. Convert `list[tuple]` to `list[dict]`
# 2. Define `result: list[dict]`:
#    - key - name from the header
#    - value - measurement or species
# 3. Do not use `zip()` builtin function
# 4. Run doctests - all must succeed

# %% Polish
# 1. Przekonwertuj `list[tuple]` do `list[dict]`
# 2. Zdefiniuj `result: list[dict]`:
#    - klucz - nazwa z nagłówka
#    - wartość - wyniki pomiarów lub gatunek
# 3. Nie używaj wbudowanej funkcji `zip()`
# 4. Uruchom doctesty - wszystkie muszą się powieść

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> assert result is not Ellipsis, \
'Assign your result to variable `result`'
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'

>>> assert all(type(x) is dict for x in result)

>>> from pprint import pprint
>>> pprint(result, width=120, sort_dicts=False)
[{'sepal_length': 5.8, 'sepal_width': 2.7, 'petal_length': 5.1, 'petal_width': 1.9, 'species': 'virginica'},
 {'sepal_length': 5.1, 'sepal_width': 3.5, 'petal_length': 1.4, 'petal_width': 0.2, 'species': 'setosa'},
 {'sepal_length': 5.7, 'sepal_width': 2.8, 'petal_length': 4.1, 'petal_width': 1.3, 'species': 'versicolor'},
 {'sepal_length': 6.3, 'sepal_width': 2.9, 'petal_length': 5.6, 'petal_width': 1.8, 'species': 'virginica'},
 {'sepal_length': 6.4, 'sepal_width': 3.2, 'petal_length': 4.5, 'petal_width': 1.5, 'species': 'versicolor'},
 {'sepal_length': 4.7, 'sepal_width': 3.2, 'petal_length': 1.3, 'petal_width': 0.2, 'species': 'setosa'}]
"""

DATA = [
    ('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'),
    (5.8, 2.7, 5.1, 1.9, 'virginica'),
    (5.1, 3.5, 1.4, 0.2, 'setosa'),
    (5.7, 2.8, 4.1, 1.3, 'versicolor'),
    (6.3, 2.9, 5.6, 1.8, 'virginica'),
    (6.4, 3.2, 4.5, 1.5, 'versicolor'),
    (4.7, 3.2, 1.3, 0.2, 'setosa'),
]

# Define variable `result` with converted DATA
# type: list[dict]
result = ...

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: For Recap To Dict
# - Difficulty: medium
# - Lines: 4
# - Minutes: 5

# %% English
# 1. Define `result: dict[str, int]` with converted `DATA`
# 2. Check `Example` section to see output format
# 3. Run doctests - all must succeed

# %% Polish
# 1. Zdefiniuj `result: dict[str, int]` z przekonwertowanym `DATA`
# 2. Sprawdź sekcję `Example` aby zobaczyć format wynikowy
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Example
#     {'Doctorate': 6,
#      'Prof-school': 6,
#      'Masters': 5,
#      'Bachelor': 5,
#      'Engineer': 5,
#      'HS-grad': 4,
#      'Junior High': 3,
#      'Primary School': 2,
#      'Kindergarten': 1}

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> from pprint import pprint

>>> assert result is not Ellipsis, \
'Assign your result to variable `result`'
>>> assert type(result) is dict, \
'Variable `result` has invalid type, should be dict'

>>> pprint(result, sort_dicts=False)
{'Doctorate': 6,
 'Prof-school': 6,
 'Masters': 5,
 'Bachelor': 5,
 'Engineer': 5,
 'HS-grad': 4,
 'Junior High': 3,
 'Primary School': 2,
 'Kindergarten': 1}
"""

DATA = {
    6: ['Doctorate', 'Prof-school'],
    5: ['Masters', 'Bachelor', 'Engineer'],
    4: ['HS-grad'],
    3: ['Junior High'],
    2: ['Primary School'],
    1: ['Kindergarten'],
}

# Define `result: dict[str, int]` with converted `DATA`
# type: dict[str,int]
result = ...

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: For Recap Label Encoder
# - Difficulty: hard
# - Lines: 14
# - Minutes: 13

# %% English
# 1. Use `DATA: list[tuple]`
# 2. Define `features: list` - list of values (data from columns 0-4)
# 3. Define `labels: list` - species names encoded as integers (column 4)
# 4. To encode and decode species generate from `DATA` two dictionaries:
#    - `decoder: dict` - eg. {0: 'virginica', 1: 'setosa', 2: 'versicolor'}
#    - `encoder: dict` - eg. {'virginica': 0, 'setosa': 1, 'versicolor': 2}
# 5. Run doctests - all must succeed

# %% Polish
# 1. Użyj `DATA: list[tuple]`
# 2. Zdefiniuj `features: list` - lista wartości (dane z kolumn 0-4)
# 3. Zdefiniuj `labels: list` - nazwy gatunków zakodowane jako liczby (kolumna 4)
# 4. Aby móc zakodować i odkodować gatunki wygeneruj z `DATA` dwa słowniki:
#    - `decoder: dict` - np. {0: 'virginica', 1: 'setosa', 2: 'versicolor'}
#    - `encoder: dict` - np. {'virginica': 0, 'setosa': 1, 'versicolor': 2}
# 5. Uruchom doctesty - wszystkie muszą się powieść

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> from pprint import pprint

>>> assert type(features) is list
>>> assert type(labels) is list
>>> assert all(type(x) is tuple for x in features)
>>> assert all(type(x) is int for x in labels)

>>> pprint(features)
[(5.8, 2.7, 5.1, 1.9),
 (5.1, 3.5, 1.4, 0.2),
 (5.7, 2.8, 4.1, 1.3),
 (6.3, 2.9, 5.6, 1.8),
 (6.4, 3.2, 4.5, 1.5),
 (4.7, 3.2, 1.3, 0.2)]

>>> pprint(labels)
[0, 1, 2, 0, 2, 1]

>>> encoder
{'virginica': 0, 'setosa': 1, 'versicolor': 2}

>>> decoder
{0: 'virginica', 1: 'setosa', 2: 'versicolor'}
"""

DATA = [
    ('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'),
    (5.8, 2.7, 5.1, 1.9, 'virginica'),
    (5.1, 3.5, 1.4, 0.2, 'setosa'),
    (5.7, 2.8, 4.1, 1.3, 'versicolor'),
    (6.3, 2.9, 5.6, 1.8, 'virginica'),
    (6.4, 3.2, 4.5, 1.5, 'versicolor'),
    (4.7, 3.2, 1.3, 0.2, 'setosa'),
]

header = DATA[0]
rows = DATA[1:]

# Define `features: list` - list of values (data from columns 0-4)
# type: list[tuple]
features = ...

# Define `labels: list` - species names encoded as integers (column 4)
# type: list[int]
labels = ...

# Generate `encoder: dict` - eg. {'virginica': 0, 'setosa': 1, 'versicolor': 2}
# type: dict[str,int]
encoder = ...

# Generate `decoder: dict` - eg. {0: 'virginica', 1: 'setosa', 2: 'versicolor'}
# type: dict[int,str]
decoder = ...