python3 正则表达式

正则表达式
    规定一个格式. 匹配字符串用的

    普通字符, 平时用到的一些文字信息

    元字符(重点)
    1  .  除了换行符以外的所有内容
    2  \d 数字
    3  \w 匹配数字, 字母, 下划线
    4  \n 换行
    5  \s 匹配所有的空白
    6  ^  字符串的开始
    7  $  字符串的结束
    8  [] 字符组
    9  [^....]  除了字符组中字符的所有字符
    10 () 分组 (重要)
    11 \D 非数字
　　12 \W 非数字字母下划线
　　13 \S 非空白
　　14 a|b a或b
　　15 \t 一个制表符

    量词
        *  表示匹配0次或者多次, 尽可能多的匹配
        +  表示匹配1次或多次
        ?  表示0次或者1次
        {n} 表示重复n次
        {n,} 重复n次或更多次
        {n,m} 重复n次到m次

import re


re_lst = re.findall(r"\d+", "我66的学python,挣钱88的")  # 查找所有能匹配的结果，并返回一个列表
print(re_lst)

['66', '88']

import re


re_iter = re.finditer(r"\d+", "我66的学python,挣钱88的")  # 查找所有能匹配的结果，返回一个迭代器
print(re_iter)
for item in re_iter:
    print(item.group())

<callable_iterator object at 0x00000000077DBBE0>
66
88

import re


ret = re.match(r"\d+", "我66的学python,挣钱88的")  # 从开始进行匹配，如果匹配不到返回None
print(ret)

None

import re


ret = re.search(r"\d+", "我66的学python,挣钱88的")  # 只匹配一个，就返回结果
print(ret)
print(ret.group())

<_sre.SRE_Match object; span=(1, 3), match='66'>
66

import re

print(re.findall("company|companies", "all companies will done, my company already done."))
print(re.findall("compan(?:y|ies)", "all companies will done, my company already done."))
print(re.findall("compan(?P<name>.*?)\s", "all companies will done, my company already done."))

['companies', 'company']
['companies', 'company']
['ies', 'y']

import re


print(re.findall("ab*c", "abc ac abbc"))  # * 0次或多次
print(re.findall("ab{0,}c", "abc ac abbc"))

['abc', 'ac', 'abbc']
['abc', 'ac', 'abbc']

import re


print(re.findall("ab?c", "abc ac abbc"))  # ? 两次或一次
print(re.findall("ab{0,1}c", "abc ac abbc"))

['abc', 'ac']
['abc', 'ac']

import re

print(re.findall("ab+c", "abc ac abbc"))  # + 一次或多次
print(re.findall("ab{1,}c", "abc ac abbc"))

['abc', 'abbc']
['abc', 'abbc']

import re

print(re.findall(r"abc+", "abccccabcdabcab"))  # + 一次或多次
print(re.findall(r"ab[12]+", "ab122ab2212"))

['abcccc', 'abc', 'abc']
['ab122', 'ab2212']

import re

print(re.findall(r".", "fadf2\t\ndfdf"))  # . 除换行以外的任意字符
print(re.findall(r".", "fadf2\t\ndfdf", re.S))  # 加上re.S  .可以匹配到换行符

['f', 'a', 'd', 'f', '2', '\t', 'd', 'f', 'd', 'f']
['f', 'a', 'd', 'f', '2', '\t', '\n', 'd', 'f', 'd', 'f']

import re

print(re.search('al(e)x\smak(e)', 'alex make').group(1))  # 小括号分组
print(re.search('al(e)x\smak(e)', 'alex make').group(2))
print(re.findall('al(e)x\smak(e)', 'alex make'))

e
e
[('e', 'e')]

import re

print(re.search('company|companies', 'all companies will done, my company is already done.').group())
print(re.findall('company|companies', 'all companies will done, my company is already done.'))

companies
['companies', 'company']

import re

print(re.sub('a', 'A', 'alex make love'))  # 把小写字母a换成大写字母A
print(re.sub('^a', 'A', 'alex make love'))  # 把开头的小写字母a换成大写字母A

Alex mAke love
Alex make love

import re

print(re.split('[ab]', 'qawby'))  # 先根据a切割，再根据b切割

['q', 'w', 'y']

import re


obj = re.compile(r'(?P<id>\d+)(?P<name>e+)') # 从正则表达式匹配的内容每个组起名字
ret = obj.search('abc123eeee') # 搜索
print(ret.group()) # 结果: 123eeee
print(ret.group("id")) # 结果: 123 # 获取id组的内容
print(ret.group("name")) # 结果: eeee # 获取name组的内容

123eeee
123
eeee

posted on 2019-04-30 11:35 lilyxiaoyy 阅读(130) 评论(0) 编辑收藏举报