Skip to content

Regular Expressions

Python’s re module provides full regex support.

import re
FunctionDescription
re.match(pattern, string)Match at the start of string
re.search(pattern, string)Search anywhere in string
re.findall(pattern, string)Return all matches as a list
re.finditer(pattern, string)Return iterator of match objects
re.sub(pattern, repl, string)Replace matches
re.split(pattern, string)Split string on matches
re.compile(pattern)Compile pattern for reuse
text = "Hello, my email is user@example.com and phone is 555-1234."
# Search for email
match = re.search(r'[\w.+-]+@[\w-]+\.\w+', text)
if match:
print(match.group()) # user@example.com
# Find all numbers
nums = re.findall(r'\d+', text)
print(nums) # ['555', '1234']
PatternMeaning
.Any character except newline
\dDigit [0-9]
\DNon-digit
\wWord char [a-zA-Z0-9_]
\WNon-word char
\sWhitespace
\SNon-whitespace
^Start of string
$End of string
*0 or more
+1 or more
?0 or 1 (optional)
{n}Exactly n times
{n,m}Between n and m times
[abc]Character class
[^abc]Negated class
(abc)Capturing group
(?:abc)Non-capturing group
a|ba or b
text = "2024-01-15"
match = re.search(r'(\d{4})-(\d{2})-(\d{2})', text)
if match:
print(match.group(0)) # 2024-01-15 (full match)
print(match.group(1)) # 2024
print(match.group(2)) # 01
print(match.group(3)) # 15
match = re.search(r'(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})', "2024-01-15")
if match:
print(match.group('year')) # 2024
print(match.groupdict()) # {'year': '2024', 'month': '01', 'day': '15'}
text = "Hello World"
result = re.sub(r'\s+', '_', text)
print(result) # Hello_World
# Using a function as replacement
def upper_match(m):
return m.group().upper()
result = re.sub(r'\b\w+\b', upper_match, "hello world")
print(result) # HELLO WORLD
# Case-insensitive
re.findall(r'hello', "Hello HELLO hello", re.IGNORECASE)
# ['Hello', 'HELLO', 'hello']
# Multiline — ^ and $ match start/end of each line
re.findall(r'^\d+', "123\n456\n789", re.MULTILINE)
# ['123', '456', '789']
# DOTALL — . matches newline too
re.search(r'start.*end', "start\nend", re.DOTALL)
# Compile once, use many times
email_pattern = re.compile(r'[\w.+-]+@[\w-]+\.\w+')
emails = ["user@example.com", "bad-email", "admin@site.org"]
valid = [e for e in emails if email_pattern.match(e)]
# Email
r'[\w.+-]+@[\w-]+\.\w+'
# URL
r'https?://[\w/:%#\$&\?\(\)~\.=\+\-]+'
# Phone (US)
r'\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}'
# IP address
r'\b(?:\d{1,3}\.){3}\d{1,3}\b'
# Hex color
r'#[0-9a-fA-F]{6}\b'
# Slug
r'^[a-z0-9]+(?:-[a-z0-9]+)*$'