python中的re模块

re.findall() # 找到匹配字符串并返回一个结果列表
re.split() # 分割字符串 并返回一个结果列表
re.sub() # 用于替换字符串中的匹配项
re.search() # 扫描整个字符串并返回第一个成功的匹配
re.match()# 尝试从字符串的起始位置匹配一个模式，如果不是起始位置匹配成功的话match()就返回none。
re.finditer() # 返回一个结果的迭代器
re.compile() # 函数用于编译正则表达式，生成一个正则表达式（ Pattern ）对象，供 match() 和 search() 这两个函数使用。
.匹配任意字符
? 匹配0个或者1个由左边字符定义的片段
* 匹配0个或者多个左边字符表达式
+ 匹配1个或者多个左边字符表达式
*? 重复任意次，但尽可能少重复
+? 重复1次或更多次，但尽可能少重复
?? 重复0次或1次，但尽可能少重复
{n,m}? 重复n到m次，但尽可能少重复
{n,}? 重复n次以上，但尽可能少重复
贪婪匹配：在满足匹配时，匹配尽可能长的字符串，默认情况下，采用贪婪匹配
非贪婪匹配：在满足匹配时，匹配尽可能短的字符串，使用?来表示非贪婪匹配

import re   #


print(re.findall('\w', 'af2324__&^%$'))
# \w匹配字母（不包含中文）或数字或下划线
# \w\w每次匹配两个字符
print(re.findall('\Aaf\w', 'af2324a3__&^%$'))
print(re.findall('太白金星', 'ahlf太白金星af'))

# \W匹配非字母（包含中文）或数字或下划线
print(re.findall('\W', 'af2324__&^%$'))

# \s  匹配任意的空白符
print(re.findall('\s\s', 'fds f\n f\t'))
# \S  匹配任意非空白符
print(re.findall('\S', 'fds$% hdf\n f\t'))

# \A判断以xx开头，并返回xx字符
print(re.findall('\Aaf', 'af$2324a3__&^%$Aaf'))
print(re.findall('^af', 'af$2324a3__&^%$Aaf'))
print(re.findall('^hel', 'helfaslkfafehel'))

# \Z $ 匹配字符串结束
print(re.findall('af\Z', 'af$2324a3__&^%$Aaf'))
print(re.findall('af$', 'af$2324a3__&^%$Aaf'))

# \d 匹配数字
print(re.findall('\d', '2343jsdkfa_+%#f32'))
# \D 匹配非数字
print(re.findall('\D', '2343jsdkfa_+%#f32'))

# 以不同的字符分割***
print(re.split('[,$&#%]', 'af,ffg&sg#saf%da'))

# 匹配 \n \t 字符
print(re.findall('\n', '\n\nfsaf\www\tf g'))
print(re.findall('\n\t\t', 'afj\n\t\t\tk\fsaffs\nfalf\t'))


# # .匹配任意字符 三个一组 a开头b结尾
print(re.findall('a.b', 'abBfdsb'))  # 匹配3个字符 []匹配不到
print(re.findall('a.b', 'abbbbBfdsaabb atb'))  #
# 找a开始bb结束中间任意一个字符 一共找4个字符
print(re.findall('a.bb', 'abb abb aaab'))  # []
# re.DOTALL可以匹配到ab中间的\n
print(re.findall('a.b', 'a\nb', re.DOTALL))
print(re.findall('a.b', 'a\nbaf'))

# ? 匹配0个或者1个由左边字符定义的片段
# 匹配0个或1个a并且以b结尾的字符
print(re.findall('a?b', 'abbbbBfdsaabb'))

# * 匹配0个或者多个左边字符表达式
# # *左边的a有0个或多个，但b必须有
print(re.findall('a*b', 'abbbbBfdsaabb'))  # ab b b b aab b

# + 匹配1个或者多个左边字符表达式
# # +左边的a有1个或多个，但b必须有
print(re.findall('a+b', 'abbbbBfdsaabb'))     # ab aab

# # {m,n}匹配m到n个左边字符
print(re.findall('a{1,3}b', 'aaab ab aabbabb Bfds aabb'))  # aaab ab aab ab aab

# # 贪婪匹配，从头到尾，0个或者多个任意字符
print(re.findall('a.*b', 'aaab ab aabbabb Bfds aabb'))

# 只要满足a开头b结束就收下 ？对贪婪匹配做了一个限定,即匹配到就结束，再开始下一次匹配
print(re.findall('a.*?b', 'aaab ab aabbabb Bfds aabb'))
print(re.findall('(.*?)sb', 'ffsb sb jjjsb'))

# []中括号中表示匹配a开头b结束中间是abc中任意一个字符的字符串
print(re.findall('a[abc]b', 'aab abb acb'))
# 匹配a开头b结束，0-9中任意一个数字的字符串
print(re.findall('a[0-9]b', 'a1b a2b a3b  acb ayb'))
print(re.findall('a[A-Z]b','a1b a2b a3b  aAb aDb aYb'))
print(re.findall('a[a-zA-Z]b','aab aAb aWb aqb a1b'))  # 大小写字母
print(re.findall('a[A-z]b','aab aAb aWb aqb a1b'))
print(re.findall('a[0-9][0-9]b','a1b a2b a29b a56b a456b'))
print(re.findall('a[-+*/]b','a+b a-b a*b a/b a6b'))  # 单纯的想表示- 一定要放在最前面

# ()  制定一个规则,将满足规则的结果匹配出来
# 将满足规则的() 里面的内容匹配出来
print(re.findall('.*?_sb', 'gsfifr_sb gs_sb sg_sb'))
print(re.findall('(.*?)_sb', 'gsfifr_sb gs_sb sg_sb'))

# |匹配斜杠左边或右边的字符串
print(re.findall('company|companies', 'Too many companies have gone bankrupt, and the next one is my company'))
print(re.findall('compan(?:y|ies)','Too many companies have gone bankrupt, and the next one is my company'))

# \ 转义可匹配特殊字符
print(re.findall('(\.\w*)', 'dsajk.1234.dasf'))

s1 = '深圳电话：0755-546123546 深圳地址：广东..'
print(re.findall('\d+-\d+', s1))
# search 找到第一个则返回，返回一个对象，没有找到则返回None
# 对象.group() 可以将找到的第一个值取出来
print(re.search('\d+-\d+', s1).group())
print(re.search('(?P<qihao>\d+)-(?P<num>\d+)', s1).group('qihao'))
print(re.search('(?P<qihao>\d+)-(?P<num>\d+)', s1).group('num'))

# match：None,同search,不过在字符串开始处进行匹配,完全可以用search+^代替match
print(re.match('barry', 'barry fdlfjsadfkl ').group())
print(re.match('barry', 'qbarry fdlfjsadfkl '))

# 匹配ip地址
s2 = "ip='192.168.6.11',verion:1.0.0"
print(re.findall('\d*\.\d*\.\d*\.\d*', s2))
print(re.findall('\d+.*?', s2))
print(re.search('', s2))


# ^ 在中括号[]中是取反
print(re.search('([a-zA-Z]+)([^a-zA-Z]+)([a-zA-Z]+)([^a-zA-Z]+)([a-zA-Z]+)', r'alex is sb').group())
print(re.sub('([a-zA-Z]+)([^a-zA-Z]+)([a-zA-Z]+)([^a-zA-Z]+)([a-zA-Z]+)', r'\5\2\3\4\1', r'alex is sb'))  # 了解

# 制定一个统一的匹配规则
obj = re.compile('\d{2}')
print(obj.findall('fdsafsda1243fdsdf324'))
print(obj.findall('123fksldjf3432fdsjlkf453'))

ret = re.finditer('\d', 'ds3sy4784a')
print(ret)    # iter([3 4 7 8 4])

print(next(ret).group())
print(next(ret).group())
print([i.group() for i in ret])

posted @ 2019-01-12 11:38 平平无奇小辣鸡阅读(339) 评论(0) 收藏举报

刷新页面返回顶部

保持学习，天天进步

python中的re模块

公告