14.6. File Read
Works with both relative and absolute path
Fails when directory with file cannot be accessed
Fails when file cannot be accessed
Uses context manager
modeparameter toopen()function is optional (defaults tomode='rt')
14.6.1. SetUp
>>> lines = [
... 'This is a first line\n',
... 'This is a second line\n',
... ]
>>>
>>> with open('/tmp/myfile.txt', mode='w') as file:
... file.writelines(lines)
14.6.2. Open for Reading
# By default file is opened in read text mode (rt)
>>> file = open('/tmp/myfile.txt') # read in text mode
>>> file = open('/tmp/myfile.txt', mode='r') # read in text mode
>>> file = open('/tmp/myfile.txt', mode='rt') # read in text mode
>>> file = open('/tmp/myfile.txt', mode='rb') # read in binary mode
14.6.3. Read File at Once
Always remember to close file
Note, that whole file must fit into memory
Line ends with newline character
\n, so you may want to usestr.strip()
>>> file = open('/tmp/myfile.txt', mode='rt')
>>> data = file.read()
>>> file.close()
14.6.4. Read One Line from File
Always remember to close file
Line ends with newline character
\n, so you may want to usestr.strip()
>>> file = open('/tmp/myfile.txt', mode='rt')
>>> data = file.readline()
>>> file.close()
14.6.5. Read All Lines from File
Always remember to close file
Note, that whole file must fit into memory
Each line ends with newline character
\n, so you may want to usestr.strip()in comprehension
>>> file = open('/tmp/myfile.txt', mode='rt')
>>> data = file.readlines()
>>> file.close()
Read selected (1-10) lines from file:
>>> file = open('/tmp/myfile.txt', mode='rt')
>>> data = file.readlines()[0:10]
>>> file.close()
14.6.6. Reading File as Generator
Always remember to close file
Use generator (
file) to iterate over other linesEach line ends with newline character
\n, so you may want to usestr.strip()Also remember, that
print()adds its own newline character, becauseprint(..., end='\n')by defaultYou may end up with double newlines if you don't use
str.strip()or changeprint(..., end)behavior
>>> file = open('/tmp/myfile.txt', mode='rt')
>>>
>>> for line in file:
... line.strip()
'This is a first line'
'This is a second line'
>>>
>>> file.close()
14.6.7. Read Using Context Manager
Context managers use
with ... as ...:syntaxIt closes file automatically upon block exit (dedent)
Using context manager is best practice
Read whole file:
>>> with open('/tmp/myfile.txt', mode='rt') as file:
... data = file.read()
Read one line:
>>> with open('/tmp/myfile.txt', mode='rt') as file:
... data = file.readline()
Read all lines:
>>> with open('/tmp/myfile.txt', mode='rt') as file:
... data = file.readlines()
Read file as generator:
>>> with open('/tmp/myfile.txt', mode='rt') as file:
... for line in file:
... line.strip()
'This is a first line'
'This is a second line'
14.6.8. Seek
Move file pointer to specific position
Then next read will start from that position
>>> with open('/tmp/myfile.txt', mode='rt') as file:
... data1 = file.read(10)
... data2 = file.read(10)
... file.seek(100)
... data3 = file.read(10)
100
14.6.9. Reading From One File and Writing to Another
>>> with open('/tmp/myfile1.txt', mode='rt') as infile, \
... open('/tmp/myfile2.txt', mode='wt') as outfile:
... data = infile.read()
... # transform data
... result = outfile.write(data)
>>> with (open('/tmp/myfile1.txt', mode='rt') as infile,
... open('/tmp/myfile2.txt', mode='wt') as outfile):
... data = infile.read()
... # transform data
... result = outfile.write(data)
14.6.10. Case Study
>>> from pprint import pprint
>>>
>>>
>>> FILE = r'/tmp/myfile.txt'
>>>
>>> DATA = """sepal_length,sepal_width,petal_length,petal_width,species
... 5.8,2.7,5.1,1.9,virginica
... 5.1,3.5,1.4,0.2,setosa
... 5.7,2.8,4.1,1.3,versicolor
... 6.3,2.9,5.6,1.8,virginica
... 6.4,3.2,4.5,1.5,versicolor
... 4.7,3.2,1.3,0.2,setosa
... """
Write:
>>> with open(FILE, mode='wt') as file:
... file.write(DATA)
210
Read:
>>> with open(FILE) as file:
... data = file.readlines()
...
>>> lines = [x.strip().split(',') for x in data]
>>> header = tuple(lines[0])
>>>
>>> rows = []
>>> for line in lines[1:]:
... values = [float(x) for x in line[0:4]]
... species = line[4]
... row = tuple(values) + (species,)
... rows.append(row)
>>>
>>> result = []
>>> result.append(header)
>>> result.extend(rows)
Result:
>>> pprint(result)
[('sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'),
(5.8, 2.7, 5.1, 1.9, 'virginica'),
(5.1, 3.5, 1.4, 0.2, 'setosa'),
(5.7, 2.8, 4.1, 1.3, 'versicolor'),
(6.3, 2.9, 5.6, 1.8, 'virginica'),
(6.4, 3.2, 4.5, 1.5, 'versicolor'),
(4.7, 3.2, 1.3, 0.2, 'setosa')]
14.6.11. Use Case - 1
>>> DATA = """A,B,C,red,green,blue
... 1,2,3,0
... 4,5,6,1
... 7,8,9,2"""
>>>
>>> data = DATA.splitlines()
>>> header = data[0]
>>> lines = data[1:]
>>> colors = header.strip().split(',')[3:]
>>> colors = dict(enumerate(colors))
>>> result = []
>>>
>>> for line in lines:
... line = line.strip().split(',')
... *numbers, color = map(int, line)
... line = numbers + [colors.get(color)]
... result.append(tuple(line))
14.6.12. Assignments
# %% About
# - Name: File Read Read
# - Difficulty: easy
# - Lines: 2
# - Minutes: 2
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Read file `FILE` content as string (`str`)
# 2. Define variable `result` with the solution
# 3. Run doctests - all must succeed
# %% Polish
# 1. Wczytaj zawartość pliku `FILE` jako ciąg znaków (`str`)
# 2. Zdefiniuj zmienną `result` z rozwiązaniem
# 3. Uruchom doctesty - wszystkie muszą się powieść
# %% Expected
# >>> result
# 'Alice Apricot\n'
# %% Hints
# - `with`
# - `open()`
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'
>>> from os import remove
>>> remove(FILE)
>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'
>>> assert type(result) is str, \
'Variable `result` has an invalid type; expected: `str`.'
>>> print(result)
Alice Apricot
<BLANKLINE>
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
# %% Types
result: str
# %% Data
FILE = '_temporary.txt'
DATA = 'Alice Apricot\n'
with open(FILE, mode='wt') as file:
file.write(DATA)
# %% Result
result = ...
# %% About
# - Name: File Read Readlines
# - Difficulty: easy
# - Lines: 2
# - Minutes: 2
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Read file `FILE` content as list of string (`list[str]`)
# 2. Define variable `result` with the solution
# 3. Run doctests - all must succeed
# %% Polish
# 1. Wczytaj zawartość pliku `FILE` jako listę ciągu znaków (`list[str]`)
# 2. Zdefiniuj zmienną `result` z rozwiązaniem
# 3. Uruchom doctesty - wszystkie muszą się powieść
# %% Expected
# >>> result
# ['Alice Apricot\\n', 'Bob Blackthorn\\n', 'Carol Corn\\n']
# %% Hints
# - `with`
# - `open()`
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'
>>> from os import remove
>>> remove(FILE)
>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'
>>> assert type(result) is list, \
'Variable `result` has an invalid type; expected: `list`.'
>>> print(result)
['Alice Apricot\\n', 'Bob Blackthorn\\n', 'Carol Corn\\n']
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
# %% Types
result: list[str]
# %% Data
FILE = '_temporary.txt'
DATA = """Alice Apricot
Bob Blackthorn
Carol Corn
"""
with open(FILE, mode='wt') as file:
file.write(DATA)
# %% Result
result = ...
# %% About
# - Name: File Read List[str]
# - Difficulty: easy
# - Lines: 3
# - Minutes: 3
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Read file `FILE` content as string (`str`)
# 2. Remove newline character (`\n`)
# 3. Split the line by comma (`,`)
# 4. Define variable `result` with the solution
# 5. Run doctests - all must succeed
# %% Polish
# 1. Wczytaj zawartość pliku `FILE` jako ciąg znaków (`str`)
# 2. Usuń znak końca linii (`\n`)
# 3. Podziel linię po przecinku (`,`)
# 4. Zdefiniuj zmienną `result` z rozwiązaniem
# 5. Uruchom doctesty - wszystkie muszą się powieść
# %% Expected
# >>> result
# ['firstname', 'lastname', 'age']
# %% Hints
# - `with`
# - `open()`
# - `str.strip()`
# - `str.split()`
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'
>>> from os import remove
>>> remove(FILE)
>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'
>>> assert type(result) is list, \
'Variable `result` has an invalid type; expected: `list`.'
>>> assert all(type(x) is str for x in result), \
'Variable `result` has elements of an invalid type; all items should be: `str`.'
>>> result
['firstname', 'lastname', 'age']
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
# %% Types
result: list[str]
# %% Data
FILE = '_temporary.txt'
DATA = 'firstname,lastname,age\n'
with open(FILE, mode='wt') as file:
file.write(DATA)
# %% Result
result = ...
# %% About
# - Name: File Read List[str,int]
# - Difficulty: easy
# - Lines: 5
# - Minutes: 3
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Read file `FILE` content as string (`str`)
# 2. Remove newline character (`\n`)
# 3. Split the line by comma (`,`)
# 4. Convert numeric values to integer (`int`)
# 5. Create a tuple (`tuple`) with values for firstname, lastname and age
# 6. Define variable `result` with the solution
# 7. Run doctests - all must succeed
# %% Polish
# 1. Wczytaj zawartość pliku `FILE` jako ciąg znaków (`str`)
# 2. Usuń znak końca linii (`\n`)
# 3. Podziel linię po przecinku (`,`)
# 4. Przekonwertuj wartości numeryczne do liczby całkowitej (`int`)
# 5. Stwórz krotkę (`tuple`) z wartościami dla imię, nazwisko i wiek
# 6. Zdefiniuj zmienną `result` z rozwiązaniem
# 7. Uruchom doctesty - wszystkie muszą się powieść
# %% Expected
# >>> result
# ('Alice', 'Apricot', 30)
# %% Hints
# - `with`
# - `open()`
# - Comprehension
# - `str.strip()`
# - `str.split()`
# - `int()`
# - `tuple()`
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'
>>> from os import remove
>>> remove(FILE)
>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'
>>> assert type(result) is tuple, \
'Variable `result` has an invalid type; expected: `tuple`.'
>>> assert all(type(x) in (str, int) for x in result), \
'All rows in `result` should be float or str or int'
>>> print(result)
('Alice', 'Apricot', 30)
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
# %% Types
result: tuple[str,str,int]
# %% Data
FILE = '_temporary.txt'
DATA = 'Alice,Apricot,30\n'
with open(FILE, mode='wt') as file:
file.write(DATA)
# %% Result
result = ...
# %% About
# - Name: File Read CSV
# - Difficulty: easy
# - Lines: 11
# - Minutes: 8
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Read file `FILE` content as list of strings (`list[str]`)
# 2. Remove newline character (`\n`) from each line
# 3. Split each line by comma (`,`)
# 4. Convert numeric values to integer (`int`)
# 5. Each row should be a tuple (`tuple`) with values for firstname, lastname and age
# 6. Define variable `result` with the solution
# 7. Run doctests - all must succeed
# %% Polish
# 1. Wczytaj zawartość pliku `FILE` jako listę ciągów znaków (`list[str]`)
# 2. Z każdej linii usuń znak końca linii (`\n`)
# 3. Każdą linię podziel po przecinku (`,`)
# 4. Przekonwertuj wartości numeryczne do liczby całkowitej (`int`)
# 5. Każdy wiersz powinien być krotką (`tuple`) z wartościami dla imię, nazwisko i wiek
# 6. Zdefiniuj zmienną `result` z rozwiązaniem
# 7. Uruchom doctesty - wszystkie muszą się powieść
# %% Expected
# >>> result
# [('firstname', 'lastname', 'age'),
# ('Alice', 'Apricot', 30),
# ('Bob', 'Blackthorn', 31),
# ('Carol', 'Corn', 32),
# ('Dave', 'Durian', 33),
# ('Eve', 'Elderberry', 34),
# ('Mallory', 'Melon', 15)]
# %% Hints
# - `with`
# - `open()`
# - `str.split()`
# - `str.strip()`
# - Comprehension
# - `float()`
# - `(1,2,3) + ('abc',)`
# - `list.append()`
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python has an is invalid version; expected: `3.9` or newer.'
>>> from pprint import pprint
>>> from os import remove; remove(FILE)
>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'
>>> assert type(result) is list, \
'Variable `result` has an invalid type; expected: `list`.'
>>> assert all(type(x) is tuple for x in result), \
'Variable `result` has elements of an invalid type; all items should be: `tuple`.'
>>> pprint(result)
[('firstname', 'lastname', 'age'),
('Alice', 'Apricot', 30),
('Bob', 'Blackthorn', 31),
('Carol', 'Corn', 32),
('Dave', 'Durian', 33),
('Eve', 'Elderberry', 34),
('Mallory', 'Melon', 15)]
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
# %% Types
result: list[tuple]
# %% Data
FILE = '_temporary.csv'
DATA = """firstname,lastname,age
Alice,Apricot,30
Bob,Blackthorn,31
Carol,Corn,32
Dave,Durian,33
Eve,Elderberry,34
Mallory,Melon,15
"""
with open(FILE, mode='w') as file:
file.write(DATA)
# %% Result
result = ...