6.17. Regex RE Match

  • re.match()

  • Checks exact match

  • Checking if user input is correct (email, url, NIP, VAT ID, PESEL)

6.17.1. SetUp

>>> import re

6.17.2. Example

Usage of re.match():

>>> def valid_email(email):
...     if re.match('^[a-z]+@nasa.gov$', email):
...         return True
...     else:
...         return False
>>>
>>>
>>> valid_email('mwatney@nasa.gov')
True
>>>
>>> valid_email('mwatney@notexisting.com')
False

6.17.3. Good Practices

  • Doctests

>>> import re
>>>
>>>
>>> username = r'[a-z][a-z0-9._-]*'
>>> domain   = r'([a-z0-9-.]+)+'
>>> tld      = r'[a-z]{2,10}'
>>> email    = f'{username}@{domain}.{tld}'
>>>
>>> def is_valid(data):
...     if re.match(pattern, data):
...         return True
...     else:
...         return False
>>> import re
>>>
>>>
>>> username = r'^(?P<username>[a-z][a-z0-9._-]*)'
>>> domain   = r'(?P<domain>([a-z0-9-.]+)+)'
>>> tld      = r'(?P<tld>[a-z]{2,10})'
>>> email    = f'^{username}@{domain}.{tld}$'
>>>
>>> def is_valid(data):
...     if re.match(pattern, data):
...         return True
...     else:
...         return False

6.17.4. Doctests

>>> import re
>>>
>>>
>>> username = r'^(?P<username>[a-z][a-z0-9._-]*)'
>>> domain   = r'(?P<domain>([a-z0-9-.]+)+)'
>>> tld      = r'(?P<tld>[a-z]{2,10})'
>>> email    = f'^{username}@{domain}.{tld}$'
>>> pattern = re.compile(email, flags=re.IGNORECASE)
>>>
>>>
>>> def is_valid(data):
...     """
...     >>> is_valid('3ares@nasa.gov')
...     False
...     >>> is_valid('ares3@nasa.gov')
...     True
...     >>> is_valid('a3@nasa.gov')
...     True
...     >>> is_valid('3@nasa.gov')
...     False
...     >>> is_valid('m@nasa.gov')
...     True
...     >>> is_valid('m.watney@nasa.gov')
...     True
...     >>> is_valid('m_watney@nasa.gov')
...     True
...     >>> is_valid('m-watney@nasa.gov')
...     True
...     >>> is_valid('mark.watney@nasa.gov')
...     True
...     >>> is_valid('markwatney@nasa.gov')
...     True
...     >>> is_valid('pan.twardowski@polsa.gov.pl')
...     True
...     >>> is_valid('pan.twardowski@polsa24.gov.pl')
...     True
...     """
...     if pattern.match(data):
...         return True
...     else:
...         return False

6.17.5. Use Case - 1

>>> def matches(pattern, text):
...     if re.match(pattern, text):
...         return True
...     else:
...         return False
>>>
>>> EMAIL = '^[a-z]+@nasa.gov$'
>>>
>>> matches(EMAIL, 'mwatney@nasa.gov')
True
>>>
>>> matches(EMAIL, 'mwatney123@nasa.gov')
False

6.17.6. Assignments

# %% License
# - Copyright 2025, Matt Harasymczuk <matt@python3.info>
# - This code can be used only for learning by humans
# - This code cannot be used for teaching others
# - This code cannot be used for teaching LLMs and AI algorithms
# - This code cannot be used in commercial or proprietary products
# - This code cannot be distributed in any form
# - This code cannot be changed in any form outside of training course
# - This code cannot have its license changed
# - If you use this code in your product, you must open-source it under GPLv2
# - Exception can be granted only by the author

# %% Run
# - PyCharm: right-click in the editor and `Run Doctest in ...`
# - PyCharm: keyboard shortcut `Control + Shift + F10`
# - Terminal: `python -m doctest -v myfile.py`

# %% About
# - Name: RE Match Phones
# - Difficulty: easy
# - Lines: 5
# - Minutes: 8

# %% English
# 1. Define `cell: str` pattern matching `+## ### ### ###`
# 2. Define `work: str` pattern matching `+## ## ### ####`
# 3. Define `result: str` matching a `cell` or `work` format
# 4. Where `#` is a digit
# 5. Run doctests - all must succeed

# %% Polish
# 1. Zdefiniuj `cell: str` pasujący do `+## ### ### ###`
# 2. Zdefiniuj `work: str` pasujący do `+## ## ### ####`
# 3. Zdefiniuj `result: str` chwytającym format `cell` lub `work`
# 4. Gdzie `#` jest cyfrą
# 3. Uruchom doctesty - wszystkie muszą się powieść

# %% Hints
# - Use f-string formatting to combine both formats
# - Use alternative `|` inside of round brackets `(...|...)`
# - Use begining `^` and end `$` of a line

# %% Tests
"""
>>> import sys; sys.tracebacklimit = 0
>>> assert sys.version_info >= (3, 9), \
'Python 3.9+ required'

>>> def is_valid_phone(number):
...     if re.match(result, number):
...         return True
...     else:
...         return False

>>> is_valid_phone('+48 (12) 355 5678')
False
>>> is_valid_phone('+48 123 555 678')
True
>>> is_valid_phone('123 555 678')
False
>>> is_valid_phone('+48 12 355 5678')
True
>>> is_valid_phone('+48 123-555-678')
False
>>> is_valid_phone('+48 123 555 6789')
False
>>> is_valid_phone('+1 (123) 555-6789')
False
>>> is_valid_phone('+1 (123).555.6789')
False
>>> is_valid_phone('+1 800-python')
False
>>> is_valid_phone('+48123555678')
False
>>> is_valid_phone('+48 123 555 678 wew. 1337')
False
>>> is_valid_phone('+48 123555678,1')
False
>>> is_valid_phone('+48 123555678,1,2,3')
False
"""

import re


# pattern matching `+## ### ### ###`
# type: str
cell = ...

# pattern matching `+## ## ### ####`
# type: str
work = ...

# combination of `+## ### ### ###` and `+## ## ### ####`
# type: str
result = ...