python3的re正则的简单使用

import re
#  match----------------------------------------------------
print(re.match(r'www', 'www.runoob.com').span())  # (0,3)
print(re.match('com', 'www.runoob.com'))  # None

line = "Cats are smarter than dogs"
match_obj = re.match(r'(.*) are (.*?) .*', line, re.M | re.I)  # re.I 表示忽略大小写,re.M多行匹配,影响 ^ 和 $
if match_obj:
    print(match_obj.groups())  # ('Cats', 'smarter')
    print(match_obj.group())  # Cats are smarter than dogs
    print(match_obj.group(1))  # Cats
    print(match_obj.group(2))  # smarter
else:
    print("No match!!")

# search-----------------------------------------------------
print(re.search('www', 'www.runoob.com').span())  # (0,3)
print(re.search('com', 'www.runoob.com').span())  # (11, 14)

line = "Cats are smarter than dogs"
search_obj = re.search(r'(.*) are (.*?) .*', line, re.M | re.I)
if search_obj:
    print(search_obj.groups())  # ('Cats', 'smarter')
    print(search_obj.group())  # Cats are smarter than dogs
    print(search_obj.group(1))  # Cats
    print(search_obj.group(2))  # smarter
else:
    print("Nothing found!!")

# re.match与re.search的区别
# re.match 只匹配字符串的开始,如果字符串开始不符合正则表达式,则匹配失败,函数返回 None,而 re.search 匹配整个字符串,直到找到一个匹配。

# sub 替换-------------------------------------------------------
phone = "2004-959-559 # 这是一个电话号码"
num = re.sub(r'#.*$', "", phone)
print(num)  # 2004-959-559
num = re.sub(r'\D', "", phone)
print(num)  # 2004959559


def double(matched):
    value = int(matched.group('value'))
    return str(value * 2)


s = 'A23G4HFD567'
print(re.sub('(?P<value>\d+)', double, s))  # A46G8HFD1134


# compile --------------------------------------------------------------
pattern = re.compile(r'([a-z]+) ([a-z]+)', re.I)
m = pattern.match('Hello World Wide Web')
print(m.groups())  # ('Hello', 'World')


# findAll ----------------------------------------------------------------
result1 = re.findall(r'\d+', 'runoob 123 google 456')
pattern = re.compile(r'\d+')
result2 = pattern.findall('runoob 123 google 456')
result3 = pattern.findall('run88oob123google456', 0, 10)
print(result1)  # ['123', '456']
print(result2)  # ['123', '456']
print(result3)  # ['88', '12']

result = re.findall(r'(\w+)=(\d+)', 'set width=20 and height=10')
print(result)  # [('width', '20'), ('height', '10')]

# finditer ----------------------------------------------------------------
it = re.finditer(r"\d+", "12a32bc43jf3")
for match in it:
    print(match.group())  # 12 32 43 3

# split ------------------------------------------------------------------------
print(re.split('\W+', 'runoob, runoob, runoob.'))  # ['runoob', 'runoob', 'runoob', '']
print(re.split('\W+', 'runoob, runoob, runoob.', 1))  # ['runoob', 'runoob, runoob.']

 

posted @ 2022-12-04 12:30  carol2014  阅读(65)  评论(0编辑  收藏  举报