5.1. DataFrame Create

Important

pd.DataFrame(list[dict])
pd.DataFrame(dict[str,list])

5.1.1. SetUp

✘>>> import pandas as pd
>>> import numpy as np

5.1.2. Create from List of Dicts

✘>>> pd.DataFrame([
...     {'A': 1.0, 'B': 2.0},
...     {'A': 3.0, 'B': 4.0},
... ])
     A    B
0  1.0  2.0
1  3.0  4.0

✘>>> pd.DataFrame([
...     {'A': 1.0, 'B': 2.0},
...     {'B': 3.0, 'C': 4.0},
... ])
     A    B    C
0  1.0  2.0  NaN
1  NaN  3.0  4.0

✘>>> pd.DataFrame([
...     {'firstname': 'Mark', 'lastname': 'Watney'},
...     {'firstname': 'Melissa', 'lastname': 'Lewis'},
...     {'firstname': 'Rick', 'lastname': 'Martinez'},
...     {'firstname': 'Alex', 'lastname': 'Vogel'},
... ])
  firstname  lastname
0      Mark    Watney
1   Melissa     Lewis
2      Rick  Martinez
3      Alex     Vogel

5.1.3. Create from Dict

✘>>> pd.DataFrame({
...     'A': ['a', 'b', 'c'],
...     'B': [1.0, 2.0, 3.0],
...     'C': [1, 2, 3],
... })
   A    B  C
0  a  1.0  1
1  b  2.0  2
2  c  3.0  3

✘>>> pd.DataFrame({
...     'firstname': ['Mark', 'Melissa', 'Rick', 'Alex'],
...     'lastname': ['Watney', 'Lewis', 'Martinez', 'Vogel'],
... })
  firstname  lastname
0      Mark    Watney
1   Melissa     Lewis
2      Rick  Martinez
3      Alex     Vogel

5.1.4. Create from NDArray

✘>>> import pandas as pd
>>> import numpy as np
>>> np.random.seed(0)
>>>
>>>
>>> df = pd.DataFrame(np.random.randn(7, 4))
>>>
>>> df
          0         1         2         3
0  1.764052  0.400157  0.978738  2.240893
1  1.867558 -0.977278  0.950088 -0.151357
2 -0.103219  0.410599  0.144044  1.454274
3  0.761038  0.121675  0.443863  0.333674
4  1.494079 -0.205158  0.313068 -0.854096
5 -2.552990  0.653619  0.864436 -0.742165
6  2.269755 -1.454366  0.045759 -0.187184

5.1.5. Use Case - 1

✘>>> import pandas as pd
>>> import numpy as np
>>>
>>>
>>> pd.DataFrame({
...     'A': 1.,
...     'B': pd.Timestamp('1961-04-12'),
...     'C': pd.Series(1, index=list(range(4)), dtype='float32'),
...     'D': np.array([3] * 4, dtype='int32'),
...     'E': pd.Categorical(["test", "train", "test", "train"]),
...     'F': 'foo',
...     'G': [1,2,3,4],
... })
     A          B    C  D      E    F  G
0  1.0 1961-04-12  1.0  3   test  foo  1
1  1.0 1961-04-12  1.0  3  train  foo  2
2  1.0 1961-04-12  1.0  3   test  foo  3
3  1.0 1961-04-12  1.0  3  train  foo  4

5.1.6. Use Case - 2

✘>>> import pandas as pd
>>> import numpy as np
>>> np.random.seed(0)
>>>
>>>
>>> df = pd.DataFrame(
...     columns = ['Morning', 'Noon', 'Evening', 'Midnight'],
...     index = pd.date_range('1999-12-30', periods=7),
...     data = np.random.randn(7, 4))
...
>>> df
             Morning      Noon   Evening  Midnight
1999-12-30  1.764052  0.400157  0.978738  2.240893
1999-12-31  1.867558 -0.977278  0.950088 -0.151357
2000-01-01 -0.103219  0.410599  0.144044  1.454274
2000-01-02  0.761038  0.121675  0.443863  0.333674
2000-01-03  1.494079 -0.205158  0.313068 -0.854096
2000-01-04 -2.552990  0.653619  0.864436 -0.742165
2000-01-05  2.269755 -1.454366  0.045759 -0.187184

5.1.7. Assignments

# %% About
# - Name: DataFrame Create
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Define variable `result: with
#    dataframe of `DATA` with columns named `firstname`, `lastname`, `role`
# 2. Run doctests - all must succeed

# %% Polish
# 1. Zdefiniuj zmienną `result` with
#    dataframe z `DATA` z kolumnami nazwanymi `firstname`, `lastname`, `role`
# 2. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - `pd.DataFrame()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.max_columns', 10)
>>> pd.set_option('display.max_rows', 10)

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is pd.DataFrame, \
'Variable `result` must be a `pd.DataFrame` type'

>>> result  # doctest: +NORMALIZE_WHITESPACE
  firstname   lastname       role
0      Mark     Watney   botanist
1   Melissa      Lewis  commander
2      Rick   Martinez      pilot
3      Alex      Vogel    chemist
4      Beth  Johanssen   engineer
5     Chris       Back      medic
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data
DATA = {
    'firstname': ('Mark', 'Melissa', 'Rick', 'Alex', 'Beth', 'Chris'),
    'lastname': ('Watney', 'Lewis', 'Martinez', 'Vogel', 'Johanssen', 'Back'),
    'role': ('botanist', 'commander', 'pilot', 'chemist', 'engineer', 'medic'),
}

# %% Result
result = ...

# %% About
# - Name: DataFrame Create
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Define variable `result: with
#    dataframe of `DATA` with columns named `firstname`, `lastname`, `role`
# 2. Run doctests - all must succeed

# %% Polish
# 1. Zdefiniuj zmienną `result` with
#    dataframe z `DATA` z kolumnami nazwanymi `firstname`, `lastname`, `role`
# 2. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - `pd.DataFrame()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.max_columns', 10)
>>> pd.set_option('display.max_rows', 10)

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is pd.DataFrame, \
'Variable `result` must be a `pd.DataFrame` type'

>>> result  # doctest: +NORMALIZE_WHITESPACE
  firstname   lastname       role
0      Mark     Watney   botanist
1   Melissa      Lewis  commander
2      Rick   Martinez      pilot
3      Alex      Vogel    chemist
4      Beth  Johanssen   engineer
5     Chris       Back      medic
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data
DATA = [
    ('Mark', 'Watney', 'botanist', ),
    ('Melissa', 'Lewis', 'commander', ),
    ('Rick', 'Martinez', 'pilot', ),
    ('Alex', 'Vogel', 'chemist', ),
    ('Beth', 'Johanssen', 'engineer', ),
    ('Chris', 'Back', 'medic', ),
]

COLUMNS = ['firstname', 'lastname', 'role']

# %% Result
result = ...

# %% About
# - Name: DataFrame Create
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Define variable `result: with
#    dataframe of `DATA` with columns named `firstname`, `lastname`, `role`
# 2. Run doctests - all must succeed

# %% Polish
# 1. Zdefiniuj zmienną `result` with
#    dataframe z `DATA` z kolumnami nazwanymi `firstname`, `lastname`, `role`
# 2. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - `pd.DataFrame()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.max_columns', 10)
>>> pd.set_option('display.max_rows', 10)

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is pd.DataFrame, \
'Variable `result` must be a `pd.DataFrame` type'

>>> result  # doctest: +NORMALIZE_WHITESPACE
  firstname   lastname       role
0      Mark     Watney   botanist
1   Melissa      Lewis  commander
2      Rick   Martinez      pilot
3      Alex      Vogel    chemist
4      Beth  Johanssen   engineer
5     Chris       Back      medic
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data
DATA = [
    {'firstname': 'Mark', 'lastname': 'Watney', 'role': 'botanist'},
    {'firstname': 'Melissa', 'lastname': 'Lewis', 'role': 'commander'},
    {'firstname': 'Rick', 'lastname': 'Martinez', 'role': 'pilot'},
    {'firstname': 'Alex', 'lastname': 'Vogel', 'role': 'chemist'},
    {'firstname': 'Beth', 'lastname': 'Johanssen', 'role': 'engineer'},
    {'firstname': 'Chris', 'lastname': 'Back', 'role': 'medic'},
]

# %% Result
result = ...

# %% About
# - Name: DataFrame Create
# - Difficulty: easy
# - Lines: 7
# - Minutes: 3

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Define variable `result: with
#    dataframe with columns named `firstname`, `lastname`, `role`
# 2. Use selection with `alt` key in your IDE
#    to convert data to `dict[str,tuple]` format
# 3. Run doctests - all must succeed

# %% Polish
# 1. Zdefiniuj zmienną `result` with
#    dataframe z kolumnami nazwanymi `firstname`, `lastname`, `role`
# 2. Użyj zaznaczania z klawiszem `alt` w Twoim IDE
#    aby przekonwertować dane do formatu `dict[str,tuple]`
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - `pd.DataFrame()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.max_columns', 10)
>>> pd.set_option('display.max_rows', 10)

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is pd.DataFrame, \
'Variable `result` must be a `pd.DataFrame` type'

>>> result  # doctest: +NORMALIZE_WHITESPACE
  firstname   lastname       role
0      Mark     Watney   botanist
1   Melissa      Lewis  commander
2      Rick   Martinez      pilot
3      Alex      Vogel    chemist
4      Beth  Johanssen   engineer
5     Chris       Back      medic
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data

# | firstname | lastname  | role      |
# |-----------|-----------|-----------|
# | Mark      | Watney    | botanist  |
# | Melissa   | Lewis     | commander |
# | Rick      | Martinez  | pilot     |
# | Alex      | Vogel     | chemist   |
# | Beth      | Johanssen | engineer  |
# | Chris     | Back      | medic     |

# %% Result
result = ...

# %% About
# - Name: DataFrame Create
# - Difficulty: easy
# - Lines: 8
# - Minutes: 3

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Define variable `result: with
#    dataframe with columns named `firstname`, `lastname`, `role`
# 2. Use selection with `alt` key in your IDE
#    to convert data to `list[tuple]` format
# 3. Run doctests - all must succeed

# %% Polish
# 1. Zdefiniuj zmienną `result` with
#    dataframe z kolumnami nazwanymi `firstname`, `lastname`, `role`
# 2. Użyj zaznaczania z klawiszem `alt` w Twoim IDE
#    aby przekonwertować dane do formatu `list[tuple]`
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - `pd.DataFrame()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.max_columns', 10)
>>> pd.set_option('display.max_rows', 10)

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is pd.DataFrame, \
'Variable `result` must be a `pd.DataFrame` type'

>>> result  # doctest: +NORMALIZE_WHITESPACE
  firstname   lastname       role
0      Mark     Watney   botanist
1   Melissa      Lewis  commander
2      Rick   Martinez      pilot
3      Alex      Vogel    chemist
4      Beth  Johanssen   engineer
5     Chris       Back      medic
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data

# | firstname | lastname  | role      |
# |-----------|-----------|-----------|
# | Mark      | Watney    | botanist  |
# | Melissa   | Lewis     | commander |
# | Rick      | Martinez  | pilot     |
# | Alex      | Vogel     | chemist   |
# | Beth      | Johanssen | engineer  |
# | Chris     | Back      | medic     |

# %% Result
result = ...

# %% About
# - Name: DataFrame Create
# - Difficulty: easy
# - Lines: 8
# - Minutes: 3

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Define variable `result: with
#    dataframe with columns named `firstname`, `lastname`, `role`
# 2. Use selection with `alt` key in your IDE
#    to convert data to `list[dict]` format
# 3. Run doctests - all must succeed

# %% Polish
# 1. Zdefiniuj zmienną `result` with
#    dataframe z kolumnami nazwanymi `firstname`, `lastname`, `role`
# 2. Użyj zaznaczania z klawiszem `alt` w Twoim IDE
#    aby przekonwertować dane do formatu `list[dict]`
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - `pd.DataFrame()`

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.max_columns', 10)
>>> pd.set_option('display.max_rows', 10)

>>> assert result is not Ellipsis, \
'Assign result to variable: `result`'
>>> assert type(result) is pd.DataFrame, \
'Variable `result` must be a `pd.DataFrame` type'

>>> result  # doctest: +NORMALIZE_WHITESPACE
  firstname   lastname       role
0      Mark     Watney   botanist
1   Melissa      Lewis  commander
2      Rick   Martinez      pilot
3      Alex      Vogel    chemist
4      Beth  Johanssen   engineer
5     Chris       Back      medic
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% Imports
import pandas as pd

# %% Types
result: pd.DataFrame

# %% Data

# | firstname | lastname  | role      |
# |-----------|-----------|-----------|
# | Mark      | Watney    | botanist  |
# | Melissa   | Lewis     | commander |
# | Rick      | Martinez  | pilot     |
# | Alex      | Vogel     | chemist   |
# | Beth      | Johanssen | engineer  |
# | Chris     | Back      | medic     |

# %% Result
result = ...