13.8. File Read¶
Works with both relative and absolute path
Fails when directory with file cannot be accessed
Fails when file cannot be accessed
Uses context manager
mode
parameter toopen()
function is optional (defaults tomode='rt'
)
13.8.1. SetUp¶
>>> from pathlib import Path
>>> Path('/tmp/myfile.txt').unlink(missing_ok=True)
>>> Path('/tmp/myfile.txt').touch()
>>>
>>>
>>> DATA = """sepal_length,sepal_width,petal_length,petal_width,species
... 5.8,2.7,5.1,1.9,virginica
... 5.1,3.5,1.4,0.2,setosa
... 5.7,2.8,4.1,1.3,versicolor
... 6.3,2.9,5.6,1.8,virginica
... 6.4,3.2,4.5,1.5,versicolor
... 4.7,3.2,1.3,0.2,setosa
... """
>>>
>>> with open('/tmp/myfile.txt', mode='w') as file:
... _ = file.write(DATA)
13.8.2. Read From File¶
Always remember to close file
>>> FILE = r'/tmp/myfile.txt'
>>>
>>> file = open(FILE)
>>> data = file.read()
>>> file.close()
13.8.3. Read Using Context Manager¶
Context managers use
with ... as ...:
syntaxIt closes file automatically upon block exit (dedent)
Using context manager is best practice
More information in Protocol Context Manager
>>> FILE = r'/tmp/myfile.txt'
>>>
>>> with open(FILE) as file:
... data = file.read()
13.8.4. Read File at Once¶
Note, that whole file must fit into memory
>>> FILE = r'/tmp/myfile.txt'
>>>
>>> with open(FILE) as file:
... data = file.read()
13.8.5. Read File as List of Lines¶
Note, that whole file must fit into memory
>>> FILE = r'/tmp/myfile.txt'
>>>
>>> with open(FILE) as file:
... data = file.readlines()
Read selected (1-30) lines from file:
>>> FILE = r'/tmp/myfile.txt'
>>>
>>> with open(FILE) as file:
... lines = file.readlines()[1:30]
Read selected (1-30) lines from file:
>>> FILE = r'/tmp/myfile.txt'
>>>
>>> with open(FILE) as file:
... for line in file.readlines()[1:30]:
... line = line.strip()
Read whole file and split by lines, separate header from content:
>>> FILE = r'/tmp/myfile.txt'
>>>
>>>
... with open(FILE) as file:
... lines = file.readlines()
... header = lines[0]
... content = lines[1:]
...
... for line in content:
... line = line.strip()
13.8.6. Reading File as Generator¶
Use generator to iterate over other lines
In those examples,
file
is a generator
>>> FILE = r'/tmp/myfile.txt'
>>>
>>> with open(FILE) as file:
... for line in file:
... line = line.strip()
>>> FILE = r'/tmp/myfile.txt'
>>>
>>> with open(FILE) as file:
... header = file.readline()
...
... for line in file:
... line = line.strip()
13.8.7. Examples¶
>>> FILE = r'/tmp/myfile.txt'
... # sepal_length,sepal_width,petal_length,petal_width,species
... # 5.8,2.7,5.1,1.9,virginica
... # 5.1,3.5,1.4,0.2,setosa
... # 5.7,2.8,4.1,1.3,versicolor
... # 6.3,2.9,5.6,1.8,virginica
... # 6.4,3.2,4.5,1.5,versicolor
... # 4.7,3.2,1.3,0.2,setosa
>>>
>>>
>>> result = []
>>>
>>> with open(FILE) as file:
... header = file.readline().strip().split(',')
...
... for line in file:
... line = line.strip().split(',')
... values = [float(x) for x in line[0:4]]
... species = line[4]
... row = values + [species]
... pairs = zip(header, row)
... result.append(dict(pairs))
>>>
>>> result
[{'sepal_length': 5.8, 'sepal_width': 2.7, 'petal_length': 5.1, 'petal_width': 1.9, 'species': 'virginica'},
{'sepal_length': 5.1, 'sepal_width': 3.5, 'petal_length': 1.4, 'petal_width': 0.2, 'species': 'setosa'},
{'sepal_length': 5.7, 'sepal_width': 2.8, 'petal_length': 4.1, 'petal_width': 1.3, 'species': 'versicolor'},
{'sepal_length': 6.3, 'sepal_width': 2.9, 'petal_length': 5.6, 'petal_width': 1.8, 'species': 'virginica'},
{'sepal_length': 6.4, 'sepal_width': 3.2, 'petal_length': 4.5, 'petal_width': 1.5, 'species': 'versicolor'},
{'sepal_length': 4.7, 'sepal_width': 3.2, 'petal_length': 1.3, 'petal_width': 0.2, 'species': 'setosa'}]
13.8.8. StringIO¶
>>> from io import StringIO
>>>
>>>
>>> DATA = """sepal_length,sepal_width,petal_length,petal_width,species
... 5.8,2.7,5.1,1.9,virginica
... 5.1,3.5,1.4,0.2,setosa
... 5.7,2.8,4.1,1.3,versicolor
... 6.3,2.9,5.6,1.8,virginica
... 6.4,3.2,4.5,1.5,versicolor
... 4.7,3.2,1.3,0.2,setosa
... """
>>>
>>>
>>> with StringIO(DATA) as file:
... result = file.readline()
...
>>> result
'sepal_length,sepal_width,petal_length,petal_width,species\n'
>>> from io import StringIO
>>>
>>>
>>> DATA = """sepal_length,sepal_width,petal_length,petal_width,species
... 5.8,2.7,5.1,1.9,virginica
... 5.1,3.5,1.4,0.2,setosa
... 5.7,2.8,4.1,1.3,versicolor
... 6.3,2.9,5.6,1.8,virginica
... 6.4,3.2,4.5,1.5,versicolor
... 4.7,3.2,1.3,0.2,setosa
... """
>>>
>>>
>>> file = StringIO(DATA)
>>>
>>> file.read(50)
'sepal_length,sepal_width,petal_length,petal_width,'
>>> file.seek(0)
0
>>> file.readline()
'sepal_length,sepal_width,petal_length,petal_width,species\n'
>>> file.close()
13.8.9. Use Case - 0x01¶
>>> DATA = """A,B,C,red,green,blue
... 1,2,3,0
... 4,5,6,1
... 7,8,9,2"""
>>>
>>> data = DATA.splitlines()
>>> header = data[0]
>>> lines = data[1:]
>>> colors = header.strip().split(',')[3:]
>>> colors = dict(enumerate(colors))
>>> result = []
>>>
>>> for line in lines:
... line = line.strip().split(',')
... *numbers, color = map(int, line)
... line = numbers + [colors.get(color)]
... result.append(tuple(line))
13.8.10. Assignments¶
"""
* Assignment: File Read Str
* Type: class assignment
* Complexity: easy
* Lines of code: 2 lines
* Time: 3 min
English:
1. Read `FILE` to `result: str`
2. Run doctests - all must succeed
Polish:
1. Wczytaj `FILE` do `result: str`
2. Uruchom doctesty - wszystkie muszą się powieść
Hints:
* `with`
* `open()`
Tests:
>>> import sys; sys.tracebacklimit = 0
>>> from os import remove
>>> result = open(FILE).read()
>>> remove(FILE)
>>> assert result is not Ellipsis, \
'Assign your result to variable `result`'
>>> assert type(result) is str, \
'Variable `result` has invalid type, should be str'
>>> result
'hello world'
"""
FILE = '_temporary.txt'
DATA = 'hello world'
with open(FILE, mode='wt') as file:
file.write(DATA)
# Read `FILE` to `result: list[str]`
# type: str
result = ...
"""
* Assignment: File Read Multiline
* Type: class assignment
* Complexity: easy
* Lines of code: 3 lines
* Time: 3 min
English:
1. Read `FILE` to `result: list[str]`
2. Remove whitespaces
3. Run doctests - all must succeed
Polish:
1. Wczytaj `FILE` do `result: list[str]`
2. Usuń białe znaki
3. Uruchom doctesty - wszystkie muszą się powieść
Hints:
* `with`
* `open()`
Tests:
>>> import sys; sys.tracebacklimit = 0
>>> from os import remove; remove(FILE)
>>> assert result is not Ellipsis, \
'Assign your result to variable `result`'
>>> assert type(result) is str, \
'Variable `result` has invalid type, should be str'
>>> result
'Fist line\\nSecond line\\nThird line\\n'
"""
FILE = '_temporary.txt'
DATA = """Fist line
Second line
Third line
"""
with open(FILE, mode='wt') as file:
file.write(DATA)
# Read `FILE` to `result: list[str]`
# Remove whitespaces
# type: str
result = ...
"""
* Assignment: File Read List[str]
* Type: class assignment
* Complexity: easy
* Lines of code: 2 lines
* Time: 3 min
English:
1. Read `FILE` to `result: list[str]`
2. Remove whitespaces
3. Split line by comma
4. Run doctests - all must succeed
Polish:
1. Wczytaj `FILE` do `result: list[str]`
2. Usuń białe znaki
3. Podziel linię po przecinku
4. Uruchom doctesty - wszystkie muszą się powieść
Hints:
* `with`
* `open()`
* `str.strip()`
* `str.split()`
Tests:
>>> import sys; sys.tracebacklimit = 0
>>> from os import remove; remove(FILE)
>>> assert result is not Ellipsis, \
'Assign your result to variable `result`'
>>> assert type(result) is list, \
'Variable `result` has invalid type, should be list'
>>> assert all(type(x) is str for x in result), \
'All rows in `result` should be str'
>>> result
['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
"""
FILE = '_temporary.txt'
DATA = 'sepal_length,sepal_width,petal_length,petal_width,species'
with open(FILE, mode='wt') as file:
file.write(DATA)
# Read `FILE` to `result: list[str]`
# Remove whitespaces
# Split line by comma
# type: str
result = ...
"""
* Assignment: File Read Multiline
* Type: class assignment
* Complexity: easy
* Lines of code: 6 lines
* Time: 3 min
English:
1. Read `FILE` to `result: tuple`
2. Remove whitespaces
3. Split line by comma
4. Convert numeric values to float
5. Run doctests - all must succeed
Polish:
1. Wczytaj `FILE` do `result: tuple`
2. Usuń białe znaki
3. Podziel linię po przecinku
4. Przekonwertuj wartości numeryczne do float
5. Uruchom doctesty - wszystkie muszą się powieść
Hints:
* `with`
* `open()`
* Comprehension
* `str.strip()`
* `str.split()`
* `float()`
* `tuple()`
Tests:
>>> import sys; sys.tracebacklimit = 0
>>> from os import remove; remove(FILE)
>>> assert result is not Ellipsis, \
'Assign your result to variable `result`'
>>> assert type(result) is tuple, \
'Variable `result` has invalid type, should be tuple'
>>> assert all(type(x) in (float, str) for x in result), \
'All rows in `result` should be float or str'
>>> result
(5.1, 3.5, 1.4, 0.2, 'setosa')
"""
FILE = '_temporary.txt'
DATA = (5.1, 3.5, 1.4, 0.2, 'setosa')
data = ','.join(str(x) for x in DATA) + '\n'
with open(FILE, mode='wt') as file:
file.write(data)
# Read `FILE` to `result: tuple`
# Remove whitespaces
# Split line by comma
# Convert numeric values to float
# type: tuple[float, float, float, float, str]
result = ...
"""
* Assignment: File Read CSV
* Type: class assignment
* Complexity: easy
* Lines of code: 15 lines
* Time: 8 min
English:
1. Read `FILE` to `result: tuple`
2. Remove whitespaces
3. Split line by comma
4. Convert numeric values to float
5. Run doctests - all must succeed
Polish:
1. Wczytaj `FILE` do `result: tuple`
2. Usuń białe znaki
3. Podziel linię po przecinku
4. Przekonwertuj wartości numeryczne do float
5. Uruchom doctesty - wszystkie muszą się powieść
Hints:
* `with`
* `open()`
* `str.split()`
* `str.strip()`
* Comprehension
* `float()`
* `(1,2,3) + ('abc',)`
* `list.append()`
Tests:
>>> import sys; sys.tracebacklimit = 0
>>> from pprint import pprint
>>> from os import remove; remove(FILE)
>>> assert header is not Ellipsis, \
'Assign your result to variable `header`'
>>> assert features is not Ellipsis, \
'Assign your result to variable `features`'
>>> assert labels is not Ellipsis, \
'Assign your result to variable `labels`'
>>> assert type(header) is list, \
'Variable `header` has invalid type, should be list'
>>> assert type(features) is list, \
'Variable `features` has invalid type, should be list'
>>> assert type(labels) is list, \
'Variable `labels` has invalid type, should be list'
>>> assert all(type(x) is str for x in header), \
'All rows in `header` should be str'
>>> assert all(type(x) is tuple for x in features), \
'All rows in `features` should be tuple'
>>> assert all(type(x) is str for x in labels), \
'All rows in `labels` should be str'
>>> pprint(result)
[('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'),
(5.4, 3.9, 1.3, 0.4, 'setosa'),
(5.9, 3.0, 5.1, 1.8, 'virginica'),
(6.0, 3.4, 4.5, 1.6, 'versicolor'),
(7.3, 2.9, 6.3, 1.8, 'virginica'),
(5.6, 2.5, 3.9, 1.1, 'versicolor'),
(5.4, 3.9, 1.3, 0.4, 'setosa')]
"""
FILE = '_temporary.csv'
DATA = """sepal_length,sepal_width,petal_length,petal_width,species
5.4,3.9,1.3,0.4,setosa
5.9,3.0,5.1,1.8,virginica
6.0,3.4,4.5,1.6,versicolor
7.3,2.9,6.3,1.8,virginica
5.6,2.5,3.9,1.1,versicolor
5.4,3.9,1.3,0.4,setosa
"""
with open(FILE, mode='w') as file:
file.write(DATA)
# Read `FILE` to `result: tuple`
# Remove whitespaces
# Split line by comma
# Convert numeric values to float
# type: list[tuple]
result = ...
"""
* Assignment: File Read CleanFile
* Type: homework
* Complexity: medium
* Lines of code: 10 lines
* Time: 8 min
English:
1. Read `FILE` to `result: dict`:
a. key: str - IP address
b. value: list[str] - list of hosts
2. Run doctests - all must succeed
Polish:
1. Wczytaj `FILE` do `result: dict`:
a. klucz: str - adres IP
b. wartość: list[str] - lista hostów
2. Uruchom doctesty - wszystkie muszą się powieść
Hints:
* `str.split()`
* `str.strip()`
* `with`
* `open()`
Tests:
>>> import sys; sys.tracebacklimit = 0
>>> from pprint import pprint
>>> from os import remove; remove(FILE)
>>> assert result is not Ellipsis, \
'Assign your result to variable `result`'
>>> assert type(result) is dict, \
'Variable `result` has invalid type, should be dict'
>>> assert all(type(x) is str for x in result.keys()), \
'All keys in `result` should be str'
>>> assert all(type(x) is list for x in result.values()), \
'All values in `result` should be list'
>>> pprint(result, sort_dicts=False)
{'127.0.0.1': ['localhost'],
'10.13.37.1': ['nasa.gov', 'esa.int'],
'255.255.255.255': ['broadcasthost'],
'::1': ['localhost']}
"""
FILE = '_temporary.txt'
DATA = """127.0.0.1 localhost
10.13.37.1 nasa.gov esa.int
255.255.255.255 broadcasthost
::1 localhost
"""
with open(FILE, mode='w') as file:
file.write(DATA)
# Read `FILE` to `result: list[dict]`:
# - key: str - IP address
# - value: list[str] - list of hosts
# Example {'10.13.37.1': ['nasa.gov', 'esa.int'], ...}
# type: dict[str,list[str]]
result = ...