正则re模块
正则re模块
1. re模块
python中提供了re模块,可以处理正则表达式并对文本进行处理。
-
findall,获取匹配到的所有数据
import re text = "dsf130429191912015219k13042919591219521Xkk" data_list = re.findall("(\d{6})(\d{4})(\d{2})(\d{2})(\d{3})([0-9]|X)", text) print(data_list) # [('130429', '1919', '12', '01', '521', '9'), ('130429', '1959', '12', '19', '521', 'X')] -
match,从起始位置开始匹配,匹配成功返回一个对象,未匹配成功返回None
import re text = "大小逗2B最逗3B欢乐" data = re.match("逗\dB", text) print(data) # Noneimport re text = "逗2B最逗3B欢乐" data = re.match("逗\dB", text) if data: content = data.group() # "逗2B" print(content) -
search,浏览整个字符串去匹配第一个,未匹配成功返回None
import re text = "大小逗2B最逗3B欢乐" data = re.search("逗\dB", text) if data: print(data.group()) # "逗2B" -
sub,替换匹配成功的位置
import re text = "逗2B最逗3B欢乐" data = re.sub("\dB", "沙雕", text) print(data) # 逗沙雕最逗沙雕欢乐import re text = "逗2B最逗3B欢乐" data = re.sub("\dB", "沙雕", text, 1) print(data) # 逗沙雕最逗3B欢乐 -
split,根据匹配成功的位置分割
import re text = "逗2B最逗3B欢乐" data = re.split("\dB", text) print(data) # ['逗', '最逗', '欢乐']import re text = "逗2B最逗3B欢乐" data = re.split("\dB", text, 1) print(data) # ['逗', '最逗3B欢乐'] -
finditer,匹配结果为迭代器
import re text = "逗2B最逗3B欢乐" data = re.finditer("\dB", text) for item in data: print(item.group())import re text = "逗2B最逗3B欢乐" data = re.finditer("(?P<xx>\dB)", text) # 命名分组 for item in data: print(item.groupdict())text = "dsf130429191912015219k13042919591219521Xkk" data_list = re.finditer("\d{6}(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})\d{3}[\d|X]", text) for item in data_list: info_dict = item.groupdict() print(info_dict)

浙公网安备 33010602011771号