7.5. Builder
EN: Builder
PL: Budowniczy
Type: object
Why: To separate the construction of an object from its representation
Why: The same construction algorithm can be applied to different representations
Usecase: Export data to different formats
The Builder design pattern is a creational design pattern that separates the construction of a complex object from its representation. It is useful when the construction process must allow different representations for the object that's constructed.
Here's a simple example of the Builder pattern in Python:
>>> class Builder:
... def build_part_a(self):
... pass
...
... def build_part_b(self):
... pass
...
>>> class ConcreteBuilder(Builder):
... def build_part_a(self):
... return "Part A"
...
... def build_part_b(self):
... return "Part B"
...
>>> class Director:
... def __init__(self, builder):
... self.builder = builder
...
... def construct(self):
... part_a = self.builder.build_part_a()
... part_b = self.builder.build_part_b()
... return f"{part_a} and {part_b}"
...
>>> builder = ConcreteBuilder()
>>> director = Director(builder)
>>> product = director.construct()
>>> print(product)
Part A and Part B
In this example, Builder is an interface that specifies methods for creating the parts of a complex object. ConcreteBuilder is a class that implements these operations to create concrete parts. Director is a class that constructs an object using the Builder interface.
7.5.1. Pattern
7.5.2. Problem
Violates Open/Close Principle
Tight coupling between Presentation class with formats
PDF has pages, Movies has frames, this knowledge belongs to somewhere else
Duplicated code
Magic number
class CSV:
def __init__(self, filename, delimiter, encoding, quotechar, lineterminator, verbose):
self.filename = filename
self.delimiter = delimiter
self.encoding = encoding
self.quotechar = quotechar
self.lineterminator = lineterminator
self.verbose = verbose
if __name__ == '__main__':
file = CSV('/tmp/myfile.csv', ',', 'utf-8', '"', '\n', True)
7.5.3. Solution
Use the builder pattern to separate the exporting logic from the presentation format
The same exporting logic belongs to the different formats
class CSV:
def __init__(self, filename):
self.filename = filename
def with_delimiter(self, delimiter):
self.delimiter = delimiter
return self
def with_encoding(self, encoding):
self.encoding = encoding
return self
def with_quotechar(self, quotechar):
self.quotechar = quotechar
return self
def with_lineterminator(self, lineterminator):
self.lineterminator = lineterminator
return self
def with_verbose(self, verbose):
self.verbose = verbose
return self
if __name__ == '__main__':
file = (
CSV('/tmp/myfile.csv')
.with_delimiter(',')
.with_encoding('utf-8')
.with_quotechar('"')
.with_lineterminator('\n')
.with_verbose(True)
)
class CSV:
def __init__(self, filename, delimiter, encoding, quotechar,
lineterminator, verbose):
self.filename = filename
self.delimiter = delimiter
self.encoding = encoding
self.quotechar = quotechar
self.lineterminator = lineterminator
self.verbose = verbose
if __name__ == '__main__':
file = CSV(
filename='/tmp/myfile.csv',
delimiter=',',
encoding='utf-8',
quotechar='"',
lineterminator='\n',
verbose=True,
)
7.5.4. Use Case - 1
def clean(text):
return (text
# Convert to common format
.lower()
.strip()
# Remove unwanted whitespaces
.replace('\n', ' ')
.replace('\t', ' ')
.replace(' ', ' ')
.replace(' ', ' ')
.replace(' ', ' ')
.replace(' ', ' ')
.replace(' ', ' ')
.strip()
# Remove unwanted special characters
.replace('!', '')
.replace('@', '')
.replace('#', '')
.replace('$', '')
.replace('%', '')
.replace('^', '')
.replace('&', '')
.replace('*', '')
.replace('(', '')
.replace(')', '')
.replace('+', '')
.replace('=', '')
.replace('_', '')
.replace('\\', '')
.replace("'", '')
.replace('"', '')
.strip()
# Remove unwanted fragments
.removeprefix('ulica')
.removeprefix('osiedle')
.removeprefix('plac')
.removeprefix('aleja')
.removeprefix('ul.')
.removeprefix('os.')
.removeprefix('pl.')
.removeprefix('al.')
.removeprefix('ul ')
.removeprefix('os ')
.removeprefix('pl ')
.removeprefix('al ')
.strip()
# Replace numbers
.replace('trzeciego', 'III')
.replace('drugiego', 'II')
.replace('pierwszego', 'I')
.replace('3', 'III')
.replace('2', 'II')
.replace('1', 'I')
.strip()
# Formatting output
.title()
.replace('Iii', 'III')
.replace('Ii', 'II')
.strip()
)
7.5.5. Use Case - 2
import pandas as pd
# PKB = 'https://pl.wikipedia.org/wiki/Lista_pa%C5%84stw_%C5%9Bwiata_wed%C5%82ug_PKB_nominalnego'
PKB = 'https://python3.info/_static/percapita-pkb.html'
USD = 1
# %% Problem
pkb = pd.read_html(PKB)[1]
pkb = pkb.rename(columns={'Państwo':'kraj', '2021 r.':'pkb'})
pkb = pkb.loc[:, ['kraj', 'pkb']]
pkb = pkb.replace({'pkb': {'\xa0': '', 'b.d.': pd.NA}}, regex=True)
pkb = pkb.dropna(how='any', axis='rows')
pkb = pkb.astype({'kraj': 'str', 'pkb': 'int64'})
pkb = pkb.convert_dtypes()
pkb = pkb.set_index('kraj', drop=True)
pkb = pkb.mul(1_000_000*USD)
# %% Solution
pkb = (
pd
.read_html(PKB)[1]
.rename(columns={'Państwo':'kraj', '2021 r.':'pkb'})
.loc[:, ['kraj', 'pkb']]
.replace({'pkb': {'\xa0': '', 'b.d.': pd.NA}}, regex=True)
.dropna(how='any', axis='rows')
.astype({'kraj': 'str', 'pkb': 'int64'})
.convert_dtypes()
.set_index('kraj', drop=True)
.mul(1_000_000*USD)
)
7.5.6. Use Case - 3
from enum import Enum
class Slide:
text: str
def __init__(self, text: str) -> None:
self.text = text
def get_text(self) -> str:
return self.text
# %% Formats
class PresentationFormat(Enum):
PDF = 1
IMAGE = 2
POWERPOINT = 3
MOVIE = 4
class PDFDocument:
def add_page(self, text: str) -> None:
print('Adding a page to PDF')
class Movie:
def add_frame(self, text: str, duration: int) -> None:
print('Adding a frame to a movie')
# %% Main
class Presentation:
slides: list[Slide]
def __init__(self) -> None:
self.slides = []
def add_slide(self, slide: Slide) -> None:
self.slides.append(slide)
def export(self, format: PresentationFormat) -> None:
if format == PresentationFormat.PDF:
pdf = PDFDocument()
pdf.add_page('Copyright')
for slide in self.slides:
pdf.add_page(slide.get_text())
elif format == PresentationFormat.MOVIE:
movie = Movie()
movie.add_frame('Copyright', duration=3)
for slide in self.slides:
movie.add_frame(slide.get_text(), duration=3)
7.5.7. Use Case - 4
from enum import Enum
class Slide:
text: str
def __init__(self, text: str) -> None:
self.text = text
def get_text(self) -> str:
return self.text
class PresentationBuilder:
def add_slide(self, slide: Slide) -> None:
raise NotImplementedError
# %% Formats
class PresentationFormat(Enum):
PDF = 1
IMAGE = 2
POWERPOINT = 3
MOVIE = 4
class PDFDocument:
def add_page(self, text: str) -> None:
print('Adding a page to PDF')
class Movie:
def add_frame(self, text: str, duration: int) -> None:
print('Adding a frame to a movie')
class PDFDocumentBuilder(PresentationBuilder):
document: PDFDocument
def __init__(self):
self.document = PDFDocument()
def add_slide(self, slide: Slide) -> None:
self.document.add_page(slide.get_text())
def get_pdf_document(self) -> PDFDocument:
return self.document
class MovieBuilder(PresentationBuilder):
movie: Movie
def __init__(self):
self.movie = Movie()
def add_slide(self, slide: Slide) -> None:
self.movie.add_frame(slide.get_text(), duration=3)
def get_movie(self) -> Movie:
return self.movie
# %% Main
class Presentation:
slides: list[Slide]
def __init__(self) -> None:
self.slides = []
def add_slide(self, slide: Slide) -> None:
self.slides.append(slide)
def export(self, builder: PresentationBuilder) -> None:
builder.add_slide(Slide('Copyright'))
for slide in self.slides:
builder.add_slide(slide)
if __name__ == '__main__':
presentation = Presentation()
presentation.add_slide(Slide('Slide 1'))
presentation.add_slide(Slide('Slide 2'))
builder = PDFDocumentBuilder()
presentation.export(builder)
movie = builder.get_pdf_document()
builder = MovieBuilder()
presentation.export(builder)
movie = builder.get_movie()
7.5.8. Use Case - 5
When language does not have keyword arguments to functions and methods
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.read_csv.html
>>> def read_csv(filepath_or_buffer, sep=', ', delimiter=None, header='infer',
... names=None, index_col=None, usecols=None, squeeze=False,
... prefix=None, mangle_dupe_cols=True, dtype=None, engine=None,
... converters=None, true_values=None, false_values=None,
... skipinitialspace=False, skiprows=None, nrows=None,
... na_values=None, keep_default_na=True, na_filter=True,
... verbose=False, skip_blank_lines=True, parse_dates=False,
... infer_datetime_format=False, keep_date_col=False,
... date_parser=None, dayfirst=False, iterator=False,
... chunksize=None, compression='infer', thousands=None,
... decimal=b'.', lineterminator=None, quotechar='"',
... quoting=0, escapechar=None, comment=None, encoding=None,
... dialect=None, tupleize_cols=None, error_bad_lines=True,
... warn_bad_lines=True, skipfooter=0, doublequote=True,
... delim_whitespace=False, low_memory=True, memory_map=False,
... float_precision=None): ...
>>> data = read_csv('/tmp/myfile.csv', ', ', None, 'infer', None, None, None,
... False, None, True, None, None, None, None, None, False,
... None, None, None, True, True, False, True, False, False,
... False, None, False, False, None, 'infer', None, b'.',
... None, '"', 0, None, None, None, None, None, True, True,
... 0, True, False, True, False, None)
>>> data = read_csv('/tmp/myfile.csv',
... chunksize=10_000,
... delimiter=',',
... encoding='utf-8')
7.5.9. Use Case - 6
>>> class Person:
... def __init__(self, firstname, lastname, email, age, height, weight):
... self.firstname = firstname
... self.lastname = lastname
... self.email = email
... self.age = age
... self.height = height
... self.weight = weight
>>> mark = Person( 'Mark', 'Watney', 'mwatney@nasa.gov', 42, 178.0, 75.5)
>>> mark = Person(
... firstname='Mark',
... lastname='Watney',
... email='mwatney@nasa.gov',
... age=42,
... height=178.0,
... weight=75.5,
... )
7.5.10. Use Case - 7
>>> class Person:
... def __init__(self, firstname, lastname, is_astronaut, is_retired,
... is_alive, friends, assignments, missions, assigned):
... ...
>>> mark = Person('Mark', 'Watney', True, False, True, None, 1, 17, False)
>>> mark = Person(
... firstname = 'Mark',
... lastname = 'Watney',
... is_astronaut = True,
... is_retired = False,
... is_alive = True,
... friends = None,
... assignments = 1,
... missions = 17,
... assigned = False,
... )
>>> class Person:
... def __init__(self):
... ...
...
... def withFirstname(self, firstname):
... self.firstname = firstname
... return self
...
... def withLastname(self, lastname):
... self.lastname = lastname
... return self
...
... def withIsAstronaut(self, is_astronaut):
... self.is_astronaut = is_astronaut
... return self
...
... def withIsRetired(self, is_retired):
... self.is_retired = is_retired
... return self
...
... def withIsAlive(self, is_alive):
... self.is_alive = is_alive
... return self
...
... def withFriends(self, friends):
... self.friends = friends
... return self
...
... def withAssignments(self, assignments):
... self.assignments = assignments
... return self
...
... def withMissions(self, missions):
... self.missions = missions
... return self
...
... def withAssigned(self, assigned):
... self.assigned = assigned
... return self
>>>
>>>
>>> mark = (
... Person()
... .withFirstname('Mark')
... .withLastname('Watney')
... .withIsAstronaut(True)
... .withIsRetired(False)
... .withIsAlive(True)
... .withFriends(None)
... .withAssignments(1)
... .withMissions(17)
... .withAssigned(False)
... )
7.5.11. Assignments
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`
# %% About
# - Name: DesignPatterns Creational BuilderEmail
# - Difficulty: easy
# - Lines: 15
# - Minutes: 5
# %% English
# 1. Create class `Email`
# 2. Use builder pattern to set:
# - `recipient: str`
# - `sender: str`
# - `subject: str`
# - `body: str`
# 3. Run doctests - all must succeed
# %% Polish
# 1. Stwórz klasę `Email`
# 2. Użyj wzorca builder, aby ustawić:
# - `recipient: str`
# - `sender: str`
# - `subject: str`
# - `body: str`
# 3. Uruchom doctesty - wszystkie muszą się powieść
# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'
>>> from pprint import pprint
>>> result = (
... Email()
... .with_recipient('mwatney@nasa.gov')
... .with_sender('mlewis@nasa.gov')
... .with_subject('Hello from Mars')
... .with_body('Greetings from Red Planet')
... )
>>> pprint(vars(result), width=72, sort_dicts=False)
{'recipient': 'mwatney@nasa.gov',
'sender': 'mlewis@nasa.gov',
'subject': 'Hello from Mars',
'body': 'Greetings from Red Planet'}
"""
# Create class `Email`
# Use builder pattern to set:
# - `recipient: str`
# - `sender: str`
# - `subject: str`
# - `body: str`
class Email:
recipient: str
sender: str
subject: str
body: str
attachment: bytes
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`
# %% About
# - Name: DesignPatterns Creational BuilderEmail
# - Difficulty: easy
# - Lines: 2
# - Minutes: 2
# %% English
# 1. Create class `Email`
# 2. Use builder pattern to set:
# - `subject: str` encode to bytes
# - `body: str` encode to bytes
# 3. Run doctests - all must succeed
# %% Polish
# 1. Stwórz klasę `Email`
# 2. Użyj wzorca builder, aby ustawić:
# - `subject: str` koduje do bajtów
# - `body: str` koduje na bajty
# 3. Uruchom doctesty - wszystkie muszą się powieść
# %% Hints
# - `str.encode('utf-8')`
# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'
>>> result = Email()
>>> assert result.with_subject('cześć').subject == b'cze\\xc5\\x9b\\xc4\\x87', \
'Encode subject with utf-8'
>>> result = Email()
>>> assert result.with_body('cześć').body == b'cze\\xc5\\x9b\\xc4\\x87', \
'Encode body with utf-8'
>>> result = (
... Email()
... .with_recipient('mwatney@nasa.gov')
... .with_sender('mlewis@nasa.gov')
... .with_subject('Hello from Mars')
... .with_body('Greetings from Red Planet')
... )
>>> from pprint import pprint
>>> pprint(vars(result), width=72, sort_dicts=False)
{'recipient': 'mwatney@nasa.gov',
'sender': 'mlewis@nasa.gov',
'subject': b'Hello from Mars',
'body': b'Greetings from Red Planet'}
"""
import re
# Create class `Email`
# Use builder pattern to set:
# - `subject: str` encode to bytes
# - `body: str` encode to bytes
class Email:
recipient: str
sender: str
subject: bytes
body: bytes
def with_recipient(self, recipient):
self.recipient = recipient
return self
def with_sender(self, sender):
self.sender = sender
return self
def with_subject(self, subject):
self.subject = subject
return self
def with_body(self, body):
self.body = body
return self
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`
# %% About
# - Name: DesignPatterns Creational BuilderEmail
# - Difficulty: easy
# - Lines: 4
# - Minutes: 3
# %% English
# 1. Create class `Email`
# 2. Use builder pattern to set:
# - `recipient: str` verify email address using regex
# - `sender: str` verify email address using regex
# 3. For email validation use regex pattern: `r'^[a-z]+@nasa.gov$'`
# 4. Run doctests - all must succeed
# %% Polish
# 1. Stwórz klasę `Email`
# 2. Użyj wzorca builder, aby ustawić:
# - `recipient: str` zweryfikuj adres e-mail za pomocą wyrażenia regularnego
# - `sender: str` zweryfikuj adres e-mail za pomocą wyrażenia regularnego
# 3. Do walidacji email użyj wzorca regex: `r'^[a-z]+@nasa.gov$'`
# 4. Uruchom doctesty - wszystkie muszą się powieść
# %% Hints
# - `re.match()`
# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'
>>> result = Email()
>>> result.with_recipient('mwatney@esa.int')
Traceback (most recent call last):
ValueError: Invalid recipient
>>> result = Email()
>>> result.with_sender('mwatney@esa.int')
Traceback (most recent call last):
ValueError: Invalid sender
>>> result = (
... Email()
... .with_recipient('mwatney@nasa.gov')
... .with_sender('mlewis@nasa.gov')
... .with_subject('Hello from Mars')
... .with_body('Greetings from Red Planet')
... )
>>> from pprint import pprint
>>> pprint(vars(result), width=72, sort_dicts=False)
{'recipient': 'mwatney@nasa.gov',
'sender': 'mlewis@nasa.gov',
'subject': b'Hello from Mars',
'body': b'Greetings from Red Planet'}
"""
import re
# Create class `Email`
# Use builder pattern to set:
# - `recipient: str` verify email address using regex
# - `sender: str` verify email address using regex
# For email validation use regex pattern: `r'^[a-z]+@nasa.gov$'`
class Email:
recipient: str
sender: str
subject: bytes
body: bytes
def with_recipient(self, recipient):
self.recipient = recipient
return self
def with_sender(self, sender):
self.sender = sender
return self
def with_subject(self, subject):
self.subject = subject
return self
def with_body(self, body):
self.body = body
return self
# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author
# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`
# %% About
# - Name: DesignPatterns Creational BuilderEmail
# - Difficulty: easy
# - Lines: 2
# - Minutes: 3
# %% English
# 1. Create class `Email`
# 2. Use builder pattern to set:
# - `attachment: bytes` base64 encoded
# 3. Run doctests - all must succeed
# %% Polish
# 1. Stwórz klasę `Email`
# 2. Użyj wzorca builder, aby ustawić:
# - `attachment: bytes` zakodowane w standardzie base64
# 3. Uruchom doctesty - wszystkie muszą się powieść
# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'
>>> from pprint import pprint
>>> result = (
... Email()
... .with_recipient('mwatney@nasa.gov')
... .with_sender('mlewis@nasa.gov')
... .with_subject('Hello from Mars')
... .with_body('Greetings from Red Planet')
... .with_attachment(b'myfile.txt')
... )
>>> pprint(vars(result), width=72, sort_dicts=False)
{'recipient': 'mwatney@nasa.gov',
'sender': 'mlewis@nasa.gov',
'subject': b'Hello from Mars',
'body': b'Greetings from Red Planet',
'attachment': b'bXlmaWxlLnR4dA=='}
"""
import re
from base64 import b64encode
# Create class `Email`
# Use builder pattern to set:
# - `attachment: bytes` base64 encoded
class Email:
recipient: str
sender: str
subject: bytes
body: bytes
attachment: bytes
def with_recipient(self, recipient: str):
if not re.match(r'^[a-z]+@nasa.gov$', recipient):
raise ValueError(f'Invalid recipient')
self.recipient = recipient
return self
def with_sender(self, sender: str):
if not re.match(r'^[a-z]+@nasa.gov$', sender):
raise ValueError(f'Invalid sender')
self.sender = sender
return self
def with_subject(self, subject):
self.subject = subject.encode()
return self
def with_body(self, body):
self.body = body.encode()
return self
def with_attachment(self, attachment):
...