4.2. To CSV

File paths works also with DATAs

4.2.1. SetUp

>>> import pandas as pd
>>>
>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)
>>>
>>>
>>> data = pd.DataFrame([
...     {'firstname': 'Alice', 'lastname': 'Apricot', 'age': 30, 'lastlogin': pd.Timestamp('2000-01-01'), 'is_active': True},
...     {'firstname': 'Bob', 'lastname': 'Blackthorn', 'age': 31, 'lastlogin': pd.Timestamp('2000-01-02'), 'is_active': True},
...     {'firstname': 'Carol', 'lastname': 'Corn', 'age': 32, 'lastlogin': pd.Timestamp('2000-01-03'), 'is_active': False},
...     {'firstname': 'Dave', 'lastname': 'Durian', 'age': 33, 'lastlogin': pd.Timestamp('2000-01-04'), 'is_active': False},
...     {'firstname': 'Eve', 'lastname': 'Elderberry', 'age': 34, 'lastlogin': pd.Timestamp('2000-01-05'), 'is_active': True},
...     {'firstname': 'Mallory', 'lastname': 'Melon', 'age': 15, 'lastlogin': pd.NaT, 'is_active': None},
... ])
>>>
>>> data
  firstname    lastname  age  lastlogin is_active
0     Alice     Apricot   30 2000-01-01      True
1       Bob  Blackthorn   31 2000-01-02      True
2     Carol        Corn   32 2000-01-03     False
3      Dave      Durian   33 2000-01-04     False
4       Eve  Elderberry   34 2000-01-05      True
5   Mallory       Melon   15        NaT      None

4.2.2. Example

>>> data.to_csv('/tmp/myfile.csv')

$ cat /tmp/myfile.csv
,firstname,lastname,age,lastlogin,is_active
0,Alice,Apricot,30,2000-01-01,True
1,Bob,Blackthorn,31,2000-01-02,True
2,Carol,Corn,32,2000-01-03,False
3,Dave,Durian,33,2000-01-04,False
4,Eve,Elderberry,34,2000-01-05,True
5,Mallory,Melon,15,,

4.2.3. Index

>>> data.to_csv('/tmp/myfile.csv', index=False)

$ cat /tmp/myfile.csv
firstname,lastname,age,lastlogin,is_active
Alice,Apricot,30,2000-01-01,True
Bob,Blackthorn,31,2000-01-02,True
Carol,Corn,32,2000-01-03,False
Dave,Durian,33,2000-01-04,False
Eve,Elderberry,34,2000-01-05,True
Mallory,Melon,15,,

4.2.4. Date Format

data.to_csv('/tmp/myfile.csv', date_format='%Y-%m-%d') - ISO 8601 format
data.to_csv('/tmp/myfile.csv', date_format='%d.%m.%Y') - Polish format
data.to_csv('/tmp/myfile.csv', date_format='%m/%d/%Y') - US short format
data.to_csv('/tmp/myfile.csv', date_format='%B %d, %Y') - US long format
%Y - Year with century as a decimal number
%m - Month as a zero-padded decimal number
%d - Day of the month as a zero-padded decimal number
%H - Hour (24-hour clock) as a zero-padded decimal number
%M - Minute as a zero-padded decimal number
%S - Second as a zero-padded decimal number
%f - Microsecond as a decimal number, zero-padded on the left
%Z - Time zone name (empty string if the object is naive)
%z - UTC offset in the form +HHMM or -HHMM (empty string if the object is naive)
%A - Weekday as locale's full name
%a - Weekday as locale's abbreviated name
%B - Month as locale's full name
%b - Month as locale's abbreviated name
%I - Hour (12-hour clock) as a zero-padded decimal number
%p - Locale's equivalent of either AM or PM
More info at https://python3.info/intermediate/datetime/directives.html

ISO-8601 Format:

>>> data.to_csv('/tmp/myfile.csv', date_format='%Y-%m-%d')

$ cat /tmp/myfile.csv
,firstname,lastname,age,lastlogin,is_active
0,Alice,Apricot,30,2000-01-01,True
1,Bob,Blackthorn,31,2000-01-02,True
2,Carol,Corn,32,2000-01-03,False
3,Dave,Durian,33,2000-01-04,False
4,Eve,Elderberry,34,2000-01-05,True
5,Mallory,Melon,15,,

Polish Format:

>>> data.to_csv('/tmp/myfile.csv', date_format='%d.%m.%Y')

$ cat /tmp/myfile.csv
,firstname,lastname,age,lastlogin,is_active
0,Alice,Apricot,30,01.01.2000,True
1,Bob,Blackthorn,31,02.01.2000,True
2,Carol,Corn,32,03.01.2000,False
3,Dave,Durian,33,04.01.2000,False
4,Eve,Elderberry,34,05.01.2000,True
5,Mallory,Melon,15,,

US Short Format:

>>> data.to_csv('/tmp/myfile.csv', date_format='%m/%d/%Y')

$ cat /tmp/myfile.csv
,firstname,lastname,age,lastlogin,is_active
0,Alice,Apricot,30,01/01/2000,True
1,Bob,Blackthorn,31,01/02/2000,True
2,Carol,Corn,32,01/03/2000,False
3,Dave,Durian,33,01/04/2000,False
4,Eve,Elderberry,34,01/05/2000,True
5,Mallory,Melon,15,,

US Long Format:

>>> data.to_csv('/tmp/myfile.csv', date_format='%B %d, %Y')

$ cat /tmp/myfile.csv
,firstname,lastname,age,lastlogin,is_active
0,Alice,Apricot,30,"January 01, 2000",True
1,Bob,Blackthorn,31,"January 02, 2000",True
2,Carol,Corn,32,"January 03, 2000",False
3,Dave,Durian,33,"January 04, 2000",False
4,Eve,Elderberry,34,"January 05, 2000",True
5,Mallory,Melon,15,,

4.2.5. Quoting

data.to_csv('/tmp/myfile.csv', quoting=csv.QUOTE_ALL, quotechar='"')
csv.QUOTE_ALL - Quote everything
csv.QUOTE_MINIMAL - Quote only fields which contain special characters such as delimiter, quotechar or any of the characters in lineterminator.
csv.QUOTE_NONNUMERIC - Quote all non-numeric fields.
csv.QUOTE_NONE - Never quote fields. When the current delimiter occurs in output data it is preceded by the current escapechar character

>>> import csv
>>>
>>> data.to_csv('/tmp/myfile.csv', quoting=csv.QUOTE_ALL, quotechar='"')

$ cat /tmp/myfile.csv
"","firstname","lastname","age","lastlogin","is_active"
"0","Alice","Apricot","30","2000-01-01","True"
"1","Bob","Blackthorn","31","2000-01-02","True"
"2","Carol","Corn","32","2000-01-03","False"
"3","Dave","Durian","33","2000-01-04","False"
"4","Eve","Elderberry","34","2000-01-05","True"
"5","Mallory","Melon","15","",""

4.2.6. Assignments

# %% About
# - Name: Pandas To CSV
# - Difficulty: easy
# - Lines: 1
# - Minutes: 2

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% English
# 1. Export data from `data` to file `FILE`
# 2. Data has to be in CSV format
# 3. Run doctests - all must succeed

# %% Polish
# 1. Wyeksportuj dane z `data` do pliku `FILE`
# 2. Dane mają być w formacie CSV
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Expected
# >>> result
# ,firstname,lastname,age,email,lastlogin,is_active,groups
# 0,Alice,Apricot,30,alice@example.com,2000-01-01,True,users;staff
# 1,Bob,Blackthorn,31,bob@example.com,2000-01-02,True,users;staff
# 2,Carol,Corn,32,carol@example.com,2000-01-03,True,users
# 3,Dave,Durian,33,dave@example.org,2000-01-04,True,users
# 4,Eve,Elderberry,34,eve@example.org,2000-01-05,True,users;staff;admins
# 5,Mallory,Melon,15,mallory@example.net,,False,
# <BLANKLINE>

# %% Doctests
"""
>>> import sys; sys.tracebacklimit = 0
>>> from os import remove

>>> result = open(FILE).read()

>>> assert 'result' in globals(), \
'Variable `result` is not defined; assign result of your program to it.'

>>> assert result is not Ellipsis, \
'Variable `result` has an invalid value; assign result of your program to it.'

>>> assert type(result) is str, \
'Variable `result` has an invalid type; expected: `str`.'

>>> pd.set_option('display.max_columns', 50)
>>> pd.set_option('display.max_rows', 200)
>>> pd.set_option('display.width', 500)
>>> pd.set_option('display.memory_usage', 'deep')
>>> pd.set_option('display.precision', 4)

>>> print(result)  # doctest: +NORMALIZE_WHITESPACE
,firstname,lastname,age,email,lastlogin,is_active,groups
0,Alice,Apricot,30,alice@example.com,2000-01-01,True,users;staff
1,Bob,Blackthorn,31,bob@example.com,2000-01-02,True,users;staff
2,Carol,Corn,32,carol@example.com,2000-01-03,True,users
3,Dave,Durian,33,dave@example.org,2000-01-04,True,users
4,Eve,Elderberry,34,eve@example.org,2000-01-05,True,users;staff;admins
5,Mallory,Melon,15,mallory@example.net,,False,
<BLANKLINE>

>>> remove(FILE)
"""

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -f -v myfile.py`

# %% Imports
import pandas as pd

# %% Types

# %% Data
DATA = 'https://python3.info/_static/example.csv'
FILE = r'_temporary.csv'

data = pd.read_csv(DATA)

# %% Result