4.2. To CSV
File paths works also with DATAs
4.2.1. SetUp
>>> import pandas as pd
>>>
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)
>>>
>>>
>>> data = pd.DataFrame([
... {'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30, 'lastlogin': pd.Timestamp('2000-01-01'), 'is_active': True},
... {'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': 31, 'lastlogin': pd.Timestamp('2000-01-02'), 'is_active': True},
... {'firstname': 'Carol', 'lastname': 'Corn', 'age': 32, 'lastlogin': pd.Timestamp('2000-01-03'), 'is_active': False},
... {'firstname': 'Dave', 'lastname': 'Durian', 'age': 33, 'lastlogin': pd.Timestamp('2000-01-04'), 'is_active': False},
... {'firstname': 'Eve', 'lastname': 'Elderberry', 'age': 34, 'lastlogin': pd.Timestamp('2000-01-05'), 'is_active': True},
... {'firstname': 'Mallory', 'lastname': 'Melon', 'age': 15, 'lastlogin': pd.NaT, 'is_active': None},
... ])
>>>
>>> data
firstname lastname age lastlogin is_active
0 Alice Apricot 30 2000-01-01 True
1 Bob Blackthorn 31 2000-01-02 True
2 Carol Corn 32 2000-01-03 False
3 Dave Durian 33 2000-01-04 False
4 Eve Elderberry 34 2000-01-05 True
5 Mallory Melon 15 NaT None
4.2.2. Example
>>> data.to_csv('/tmp/myfile.csv')
$ cat /tmp/myfile.csv
,firstname,lastname,age,lastlogin,is_active
0,Alice,Apricot,30,2000-01-01,True
1,Bob,Blackthorn,31,2000-01-02,True
2,Carol,Corn,32,2000-01-03,False
3,Dave,Durian,33,2000-01-04,False
4,Eve,Elderberry,34,2000-01-05,True
5,Mallory,Melon,15,,
4.2.3. Index
>>> data.to_csv('/tmp/myfile.csv', index=False)
$ cat /tmp/myfile.csv
firstname,lastname,age,lastlogin,is_active
Alice,Apricot,30,2000-01-01,True
Bob,Blackthorn,31,2000-01-02,True
Carol,Corn,32,2000-01-03,False
Dave,Durian,33,2000-01-04,False
Eve,Elderberry,34,2000-01-05,True
Mallory,Melon,15,,
4.2.4. Date Format
data.to_csv('/tmp/myfile.csv', date_format='%Y-%m-%d')- ISO 8601 formatdata.to_csv('/tmp/myfile.csv', date_format='%d.%m.%Y')- Polish formatdata.to_csv('/tmp/myfile.csv', date_format='%m/%d/%Y')- US short formatdata.to_csv('/tmp/myfile.csv', date_format='%B %d, %Y')- US long format%Y- Year with century as a decimal number%m- Month as a zero-padded decimal number%d- Day of the month as a zero-padded decimal number%H- Hour (24-hour clock) as a zero-padded decimal number%M- Minute as a zero-padded decimal number%S- Second as a zero-padded decimal number%f- Microsecond as a decimal number, zero-padded on the left%Z- Time zone name (empty string if the object is naive)%z- UTC offset in the form +HHMM or -HHMM (empty string if the object is naive)%A- Weekday as locale's full name%a- Weekday as locale's abbreviated name%B- Month as locale's full name%b- Month as locale's abbreviated name%I- Hour (12-hour clock) as a zero-padded decimal number%p- Locale's equivalent of either AM or PMMore info at https://python3.info/intermediate/datetime/directives.html
ISO-8601 Format:
>>> data.to_csv('/tmp/myfile.csv', date_format='%Y-%m-%d')
$ cat /tmp/myfile.csv
,firstname,lastname,age,lastlogin,is_active
0,Alice,Apricot,30,2000-01-01,True
1,Bob,Blackthorn,31,2000-01-02,True
2,Carol,Corn,32,2000-01-03,False
3,Dave,Durian,33,2000-01-04,False
4,Eve,Elderberry,34,2000-01-05,True
5,Mallory,Melon,15,,
Polish Format:
>>> data.to_csv('/tmp/myfile.csv', date_format='%d.%m.%Y')
$ cat /tmp/myfile.csv
,firstname,lastname,age,lastlogin,is_active
0,Alice,Apricot,30,01.01.2000,True
1,Bob,Blackthorn,31,02.01.2000,True
2,Carol,Corn,32,03.01.2000,False
3,Dave,Durian,33,04.01.2000,False
4,Eve,Elderberry,34,05.01.2000,True
5,Mallory,Melon,15,,
US Short Format:
>>> data.to_csv('/tmp/myfile.csv', date_format='%m/%d/%Y')
$ cat /tmp/myfile.csv
,firstname,lastname,age,lastlogin,is_active
0,Alice,Apricot,30,01/01/2000,True
1,Bob,Blackthorn,31,01/02/2000,True
2,Carol,Corn,32,01/03/2000,False
3,Dave,Durian,33,01/04/2000,False
4,Eve,Elderberry,34,01/05/2000,True
5,Mallory,Melon,15,,
US Long Format:
>>> data.to_csv('/tmp/myfile.csv', date_format='%B %d, %Y')
$ cat /tmp/myfile.csv
,firstname,lastname,age,lastlogin,is_active
0,Alice,Apricot,30,"January 01, 2000",True
1,Bob,Blackthorn,31,"January 02, 2000",True
2,Carol,Corn,32,"January 03, 2000",False
3,Dave,Durian,33,"January 04, 2000",False
4,Eve,Elderberry,34,"January 05, 2000",True
5,Mallory,Melon,15,,
4.2.5. Quoting
data.to_csv('/tmp/myfile.csv', quoting=csv.QUOTE_ALL, quotechar='"')csv.QUOTE_ALL- Quote everythingcsv.QUOTE_MINIMAL- Quote only fields which contain special characters such as delimiter, quotechar or any of the characters in lineterminator.csv.QUOTE_NONNUMERIC- Quote all non-numeric fields.csv.QUOTE_NONE- Never quote fields. When the current delimiter occurs in output data it is preceded by the current escapechar character
>>> import csv
>>>
>>> data.to_csv('/tmp/myfile.csv', quoting=csv.QUOTE_ALL, quotechar='"')
$ cat /tmp/myfile.csv
"","firstname","lastname","age","lastlogin","is_active"
"0","Alice","Apricot","30","2000-01-01","True"
"1","Bob","Blackthorn","31","2000-01-02","True"
"2","Carol","Corn","32","2000-01-03","False"
"3","Dave","Durian","33","2000-01-04","False"
"4","Eve","Elderberry","34","2000-01-05","True"
"5","Mallory","Melon","15","",""
4.2.6. Assignments
# %% About
# - Name: Pandas To CSV
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% English
# 1. Export data from `data` to file `FILE`
# 2. Data has to be in CSV format
# 3. Run doctests - all must succeed
# %% Polish
# 1. Wyeksportuj dane z `data` do pliku `FILE`
# 2. Dane mają być w formacie CSV
# 3. Uruchom doctesty - wszystkie muszą się powieść
# %% Expected
# >>> result
# ,firstname,lastname,age,email,lastlogin,is_active,groups
# 0,Alice,Apricot,30,alice@example.com,2000-01-01,True,users;staff
# 1,Bob,Blackthorn,31,bob@example.com,2000-01-02,True,users;staff
# 2,Carol,Corn,32,carol@example.com,2000-01-03,True,users
# 3,Dave,Durian,33,dave@example.org,2000-01-04,True,users
# 4,Eve,Elderberry,34,eve@example.org,2000-01-05,True,users;staff;admins
# 5,Mallory,Melon,15,mallory@example.net,,False,
# <BLANKLINE>
# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> from os import remove
>>> result = open(FILE).read()
>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'
>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'
>>> assert type(result) is str, \
'Variable `result` has an invalid type; expected: `str`.'
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)
>>> print(result) # doctest: +NORMALIZE_WHITESPACE
,firstname,lastname,age,email,lastlogin,is_active,groups
0,Alice,Apricot,30,alice@example.com,2000-01-01,True,users;staff
1,Bob,Blackthorn,31,bob@example.com,2000-01-02,True,users;staff
2,Carol,Corn,32,carol@example.com,2000-01-03,True,users
3,Dave,Durian,33,dave@example.org,2000-01-04,True,users
4,Eve,Elderberry,34,eve@example.org,2000-01-05,True,users;staff;admins
5,Mallory,Melon,15,mallory@example.net,,False,
<BLANKLINE>
>>> remove(FILE)
"""
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`
# %% Imports
import pandas as pd
# %% Types
# %% Data
DATA = 'https://python3.info/_static/example.csv'
FILE = r'_temporary.csv'
data = pd.read_csv(DATA)
# %% Result