4.13. Regex RE Match

  • re.match()

  • Checks exact match

  • Checking if user input is correct (email, url, NIP, VAT ID, PESEL)

4.13.1. SetUp

>>> import re

4.13.2. Example

Usage of re.match():

>>> def valid_email(email):
...     if re.match('^[a-z]+@nasa.gov$', email):
...         return True
...     else:
...         return False
>>>
>>>
>>> valid_email('mwatney@nasa.gov')
True
>>>
>>> valid_email('mwatney@notexisting.com')
False

4.13.3. Good Practices

  • Doctests

>>> import re
>>>
>>>
>>> username = r'[a-z][a-z0-9._-]*'
>>> domain   = r'([a-z0-9-.]+)+'
>>> tld      = r'[a-z]{2,10}'
>>> email    = f'{username}@{domain}.{tld}'
>>>
>>> def is_valid(data):
...     if re.match(pattern, data):
...         return True
...     else:
...         return False
>>> import re
>>>
>>>
>>> username = r'^(?P<username>[a-z][a-z0-9._-]*)'
>>> domain   = r'(?P<domain>([a-z0-9-.]+)+)'
>>> tld      = r'(?P<tld>[a-z]{2,10})'
>>> email    = f'^{username}@{domain}.{tld}$'
>>>
>>> def is_valid(data):
...     if re.match(pattern, data):
...         return True
...     else:
...         return False

4.13.4. Doctests

>>> import re
>>>
>>>
>>> username = r'^(?P<username>[a-z][a-z0-9._-]*)'
>>> domain   = r'(?P<domain>([a-z0-9-.]+)+)'
>>> tld      = r'(?P<tld>[a-z]{2,10})'
>>> email    = f'^{username}@{domain}.{tld}$'
>>> pattern = re.compile(email, flags=re.IGNORECASE)
>>>
>>>
>>> def is_valid(data):
...     """
...     >>> is_valid('3ares@nasa.gov')
...     False
...     >>> is_valid('ares3@nasa.gov')
...     True
...     >>> is_valid('a3@nasa.gov')
...     True
...     >>> is_valid('3@nasa.gov')
...     False
...     >>> is_valid('m@nasa.gov')
...     True
...     >>> is_valid('m.watney@nasa.gov')
...     True
...     >>> is_valid('m_watney@nasa.gov')
...     True
...     >>> is_valid('m-watney@nasa.gov')
...     True
...     >>> is_valid('mark.watney@nasa.gov')
...     True
...     >>> is_valid('markwatney@nasa.gov')
...     True
...     >>> is_valid('pan.twardowski@polsa.gov.pl')
...     True
...     >>> is_valid('pan.twardowski@polsa24.gov.pl')
...     True
...     """
...     if pattern.match(data):
...         return True
...     else:
...         return False

4.13.5. Use Case - 0x01

>>> def matches(pattern, text):
...     if re.match(pattern, text):
...         return True
...     else:
...         return False
>>>
>>> EMAIL = '^[a-z]+@nasa.gov$'
>>>
>>> matches(EMAIL, 'mwatney@nasa.gov')
True
>>>
>>> matches(EMAIL, 'mwatney123@nasa.gov')
False

4.13.6. Assignments

Code 4.26. Solution
"""
* Assignment: RE Match Phones
* Complexity: easy
* Lines of code: 5 lines
* Time: 8 min

English:
    1. Use regular expressions to validate phone numbers
    2. Valid phone number format: `+## ### ### ###` or `+## ## ### ####`
    3. Run doctests - all must succeed

Polish:
    1. Użyj wyrażeń regularnych do walidacji numeru telefonu
    2. Poprawne format numeru: `+## ### ### ###` lub `+## ## ### ####`
    3. Uruchom doctesty - wszystkie muszą się powieść

Hints:
    * Use f-string formatting to combine both formats
    * Use alternative `|` inside of round brackets `(...|...)`
    * Use begining `^` and end `$` of a line

Tests:
    >>> import sys; sys.tracebacklimit = 0

    >>> def is_valid_phone(number):
    ...     if re.match(result, number):
    ...         return True
    ...     else:
    ...         return False

    >>> is_valid_phone('+48 (12) 355 5678')
    False
    >>> is_valid_phone('+48 123 555 678')
    True
    >>> is_valid_phone('123 555 678')
    False
    >>> is_valid_phone('+48 12 355 5678')
    True
    >>> is_valid_phone('+48 123-555-678')
    False
    >>> is_valid_phone('+48 123 555 6789')
    False
    >>> is_valid_phone('+1 (123) 555-6789')
    False
    >>> is_valid_phone('+1 (123).555.6789')
    False
    >>> is_valid_phone('+1 800-python')
    False
    >>> is_valid_phone('+48123555678')
    False
    >>> is_valid_phone('+48 123 555 678 wew. 1337')
    False
    >>> is_valid_phone('+48 123555678,1')
    False
    >>> is_valid_phone('+48 123555678,1,2,3')
    False
"""

import re


# pattern matching `+## ### ### ###`
# type: str
cell = ...

# pattern matching `+## ## ### ####`
# type: str
work = ...

# combination of `+## ### ### ###` and `+## ## ### ####`
# type: str
result = ...