Python就业班——正则表达式与综合实战
#!/usr/bin/env python3 # coding=utf-8 # Version:python3.6.1 # Project:practice # File:Python就业班——正则表达式与综合实战.py # Data:2020/8/1 19:17 # Author:LGSP_Harold # (默认)贪婪匹配:在整个表达式匹配成功的前提下,尽可能多的匹配 # 非贪婪匹配:在整个表达式匹配成功的前提下,以最少的匹配字符 # re模块 # compile(pattern, flags = 0) # 使用任何可选的标记来编译正则表达式的模式,然后返回一个正则表达式对象 # match(pattern, string, flags = 0) # 尝试使用带有可选的标记的正则表达式的模式来匹配字符串。如果匹配成功,就返回匹配对象;如果失败,就返回None import re # # 将正则表达式编译 # # pattern = re.compile(r'hello') # pattern = re.compile(r'hello', re.I) # # print(dir(pattern)) # # # 通过match进行匹配 # # rest = pattern.match('hello, world!') # rest = pattern.match('Hello, world!') # print(rest) # print(dir(rest)) # print('string:', rest.string) # # findall(pattern, string [, flags]) # # 查找字符串中所有(非重复)出现的正则表达式模式,并返回一个匹配列表 # # # 找出以下字符串中的数字 # # content = 'one1two22three3four4five5six698' # content = 'one1two22Three3four4five5six698' # # pattern = re.compile(r'\d+') # # pattern = re.compile(r'[a-z]+') # pattern = re.compile(r'[a-z]+', re.I) # rest = pattern.findall(content) # print(rest) # # search(pattern, string [, flags]) # # 所有可选标记搜索字符串中第一次出现的正则表达式模式。如果匹配成功,则返回匹配对象;如果失败,则返回None # content = 'hello world!' # p = re.compile(r'world') # rest = p.search(content) # print(rest) # # group()与groups()的使用 # # group(num)返回整个匹配对象或编号为num的特点子组 # # groups()返回一个包含所有匹配子组的元组(如果没有成功匹配,返回一个空元组) # # def test_group(): # content = 'hello world' # p = re.compile(r'world') # rest = p.search(content) # print(rest) # if rest: # print(rest.group()) # print(rest.groups()) # # # def test_id_card(): # # p = re.compile(r'^(\d{6})(\d{4})(\d{2})(\d{2})(\d{3})([0-9]|X)$') # p = re.compile(r'^(\d{6})(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})(\d{3})([0-9]|X)$') # id1 = '11010119900307387X' # id2 = '110101199003070652' # rest1 = p.search(id2) # # print(rest1.group(1)) # # print(rest1.group(2)) # # print(rest1.group(3)) # # print(rest1.group(4)) # # print(rest1.groups()) # print(rest1.groupdict()) # # split(pattern, string, max=0) # # 根据正则表达式的模式分隔符,split函数将字符串分割为列表,然后返回成功匹配的列表,分割最多操作max次(默认分割所有匹配成功的位置) # content = 'one1two22three3four4five5six698' # p = re.compile(r'\d+') # rest = p.split(content) # # rest = p.split(content, 2) # print(rest) # # sub(pattern, repl, string, max=0) # # 使用repl替换string中每一个匹配的子串后返回替换后的字符串,最多操作max次(默认替换所有) # content = 'one1two22three3four4five5six698' # p = re.compile(r'\d+') # rest = p.sub('@', content) # print(rest) # # # 更换位置 # content2 = 'hello world' # p2 = re.compile(r'(\w+) (\w+)') # rest_pos = p2.sub(r'\2 \1', content2) # print(rest_pos) # # # # 替换并改变内容 # def f(m): # return m.group(2).upper() + ' ' + m.group(1) # # # rest_change = p2.sub(f, content2) # print(rest_change) # # # 使用匿名函数进行替换 # rest_lamb = p2.sub(lambda m: m.group(2).upper() + ' ' + m.group(1), content2) # print(rest_lamb) if __name__ == '__main__': # test_group() # test_id_card() pass
略懂,略懂....

浙公网安备 33010602011771号