2.6. Array Import
2.6.1. SetUp
>>> import numpy as np
2.6.2. np.loadtxt()
>>> DATA = 'https://python3.info/_static/iris.csv'
>>> a = np.loadtxt(DATA)
Traceback (most recent call last):
ValueError: could not convert string 'sepal_length,sepal_width,petal_length,petal_width,species' to float64 at row 0, column 1.
>>> a = np.loadtxt(DATA, skiprows=1)
Traceback (most recent call last):
ValueError: could not convert string '5.4,3.9,1.3,0.4,setosa' to float64 at row 0, column 1.
>>> a = np.loadtxt(DATA, skiprows=1, delimiter=',')
Traceback (most recent call last):
ValueError: could not convert string 'setosa' to float64 at row 0, column 5.
>>> a = np.loadtxt(DATA, skiprows=1, delimiter=',', max_rows=5, usecols=(0,1,2,3))
>>> a
array([[5.4, 3.9, 1.3, 0.4],
[5.9, 3. , 5.1, 1.8],
[6. , 3.4, 4.5, 1.6],
[7.3, 2.9, 6.3, 1.8],
[5.6, 2.5, 3.9, 1.1]])
>>> header = np.loadtxt(DATA, max_rows=1, delimiter=',', dtype=str, usecols=(0,1,2,3))
>>> data = np.loadtxt(DATA, skiprows=1, max_rows=3, delimiter=',', usecols=(0,1,2,3))
>>>
>>> header
array(['sepal_length', 'sepal_width', 'petal_length', 'petal_width'], dtype='<U12')
>>>
>>> data
array([[5.4, 3.9, 1.3, 0.4],
[5.9, 3. , 5.1, 1.8],
[6. , 3.4, 4.5, 1.6]])
2.6.3. Other
Method |
Data Type |
Description |
---|---|---|
|
Text |
Load data from text file such as |
|
Binary |
Load data from |
|
Binary |
Load binary data from |
|
Text |
Load data from string |
|
Text |
Load data from file using regex to parse |
|
Text |
Load data with missing values handled as specified |
|
Binary |
reads MATLAB data files |
>>>
... data = np.loadtxt('/tmp/myfile.csv', delimiter=',', usecols=1, skiprows=1, dtype=np.float16)
...
... small = (data < 1)
... medium = (data < 1) & (data < 2.0)
... large = (data < 2)
...
... np.save('/tmp/small', data[small])
... np.save('/tmp/medium', data[medium])
... np.save('/tmp/large', data[large])
2.6.4. Use Case - 1
>>> header = np.loadtxt(DATA, max_rows=1, dtype='str', delimiter=',', usecols=(0,1,2,3))
>>> values = np.loadtxt(DATA, skiprows=1, dtype='float', delimiter=',', usecols=(0,1,2,3))
>>> species = np.loadtxt(DATA, skiprows=1, dtype='str', delimiter=',', usecols=4)
>>>
>>> sepal_length = (header == 'sepal_length')
>>> sepal_width = (header == 'sepal_width')
>>> petal_length = (header == 'petal_length')
>>> petal_width = (header == 'petal_width')
>>>
>>> setosa = (species == 'setosa')
>>> versicolor = (species == 'versicolor')
>>> virginica = (species == 'virginica')
Then you can query your data using previously defined identifiers (queries):
>>> values[setosa, sepal_length]
array([5.4, 5.4, 4.9, 5.1, 4.6, 5.2, 5.2, 5.1, 4.8, 4.9, 4.3, 5. , 5.4,
5.1, 4.8, 4.8, 4.4, 5.1, 4.6, 5.5, 5. , 5.7, 5.4, 4.8, 5. , 5.1,
4.9, 5. , 4.6, 4.9, 5.1, 4.7, 5.7, 4.4, 5.4, 4.5, 5. , 5.3, 5.1,
5. , 5.8, 5.2, 4.6, 4.8, 4.4, 5.4, 5. , 4.7, 5.1, 5.5, 5. ])
>>> values[setosa, sepal_length].mean()
np.float64(5.013725490196078)
>>> values[setosa, sepal_length].mean().round(2)
np.float64(5.01)
2.6.5. Assignments
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`
# %% About
# - Name: Numpy Loadtext
# - Difficulty: easy
# - Lines: 4
# - Minutes: 5
# %% English
# 1. Load text from `DATA`
# 2. Define variables:
# - `species: np.ndarray[str]` - first row, columns 2, 3, 4
# - `features: np.ndarray[float]` - all rows except the first one, columns 0, 1, 2, 3
# - `labels: np.ndarray[int]` - all rows except the first one, column 4
# 3. Run doctests - all must succeed
# %% Polish
# 1. Wczytaj tekst z `DATA`
# 2. Zdefiniuj zmienne:
# - `species: np.ndarray[str]` - pierwszy wiersz, kolumny 2, 3, 4
# - `features: np.ndarray[float]` - wszystkie wiersze poza pierwszym, kolumny 0, 1, 2, 3
# - `labels: np.ndarray[int]` - wszystkie wiersze poza pierwszym, kolumna 4
# 3. Uruchom doctesty - wszystkie muszą się powieść
# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'
>>> assert species is not Ellipsis, \
'Assign result to variable: `species`'
>>> assert labels is not Ellipsis, \
'Assign result to variable: `labels`'
>>> assert features is not Ellipsis, \
'Assign result to variable: `features`'
>>> assert type(species) is np.ndarray, \
'Variable `species` has invalid type, expected: np.ndarray'
>>> assert type(features) is np.ndarray, \
'Variable `features` has invalid type, expected: np.ndarray'
>>> assert type(labels) is np.ndarray, \
'Variable `labels` has invalid type, expected: np.ndarray'
>>> assert species.dtype == np.dtype('<U10'), \
'Variable `species` has invalid type, expected: str'
>>> assert features.dtype is np.dtype('float64'), \
'Variable `features` has invalid type, expected: float'
>>> assert labels.dtype is np.dtype('int64'), \
'Variable `labels` has invalid type, expected: int'
>>> assert len(species) == 3, \
'Variable `species` length should be 3'
>>> assert len(features) == 151, \
'Variable `features` length should be 151'
>>> assert len(labels) == 151, \
'Variable `labels` length should be 151'
>>> species
array(['setosa', 'versicolor', 'virginica'], dtype='<U10')
>>> features[:3]
array([[5.4, 3.9, 1.3, 0.4],
[5.9, 3. , 5.1, 1.8],
[6. , 3.4, 4.5, 1.6]])
>>> features[-3:]
array([[4.9, 2.5, 4.5, 1.7],
[6.3, 2.8, 5.1, 1.5],
[6.8, 3.2, 5.9, 2.3]])
>>> labels
array([0, 2, 1, 2, 1, 0, 1, 1, 0, 2, 2, 0, 0, 2, 2, 1, 2, 2, 2, 1, 0, 1,
1, 0, 0, 0, 2, 2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2, 1, 1, 1, 2, 2,
0, 1, 1, 1, 1, 1, 2, 0, 2, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 2, 0, 0,
0, 0, 0, 0, 1, 0, 2, 0, 0, 1, 1, 2, 2, 1, 0, 2, 1, 0, 1, 0, 2, 1,
0, 2, 0, 2, 1, 0, 2, 1, 1, 0, 0, 1, 2, 2, 2, 1, 0, 1, 1, 1, 2, 2,
0, 2, 2, 0, 2, 1, 2, 0, 0, 1, 0, 2, 0, 2, 1, 2, 2, 2, 1, 0, 2, 1,
0, 0, 2, 0, 2, 1, 1, 1, 0, 1, 1, 2, 0, 1, 1, 0, 2, 2, 2])
"""
import numpy as np
DATA = 'https://python3.info/_static/iris-dirty.csv'
species = ...
features = ...
labels = ...