python基础五——正则表达式

正则表达式

在线正则表达式测试工具：http://tool.oschina.net/regex#

正则表达式默认为贪婪模式，如，(.*) 只匹配括号前面的；非贪婪模式 (.*?) 匹配括号两边的。

import requests,re

res='<a id="text-indent" href="http://www.nnzhp.cn:80/blog/page/2" title="第 2 页">2</a>111111111<a id="text-indent" href="http://www.nnzhp.cn:80/blog/page/4" title="第 2 页">2</a>'
url1=re.findall(r'\<a id\=\"text-indent\" href\=\"(.*)\" title\=',res)   #贪婪模式
print(url1)
url2=re.findall(r'\<a id\=\"text-indent\" href\=\"(.*?)\" title\=',res)   #非贪婪模式
print(url2)

一、查询功能　

import re

s='besttest is good is is'
print(re.match(r'be',s).group())   #字符串前加r代表原字符串
# match方法接收3个参数，第一个是正则表达式，第二个是要查找的字符串，第三个不是必填的，用于控制正则的匹配方式
#是从第一个单词中匹配字符串，如果匹配到返回一个对象，否则，返回None

print(re.search(r'is',s).group())
# search是从整个内容里找，找到就返回第一个，找不到返回None

print(re.findall(r'is',s)) #查找所有的字符串
print(re.sub(r'is','is very',s)) #替换字符串
print(re.split('is',s)) #分割字符串

二、数量词

  import re

 1 print(re.findall(r'be*e','b be beebest is very ok'))  # ^ 匹配*前面的一个字符出现0次或多次
 2 print(re.findall(r'st+','besttest is best s'))   # +  匹配前一个字符1次或多次，只是+前面的一个字符
 3 print(re.findall(r'st?','besttest is best s'))   # ? 匹配前一个字符0次或1次，只是？前面的一个字符
 4 print(re.findall(r't{2}er','besttest is best  letter'))   # {n} 匹配前一个字符n次
 5 print(re.findall(r't{1,3}','besttest is best  letterv ttt'))   # {n,m} 匹配前一个字符n到m次

三、一般字符

 import re

1 print(re.findall(r'b.','besttest is best  letter'))   # . 默认匹配除\n之外的任意一个字符
2 print(re.findall(r'\?','bes???ttest is best  letter'))   # \ 转译符，前面的* + ？这样的字符都有特殊含义了，如果需要找的话，就得用转译了
3 print(re.findall(r'best|is','besttest is best'))   # | 匹配|左或|右的字符
4 print(re.findall(r'e[ras]','besttest is best  letter'))   #[] 字符集合，摩羯字符的集合，匹配的时候是这个集合里面的任意一个就行
5 print(re.findall(r'e[^ras]','besttest is best  letter'))   # 在[]里面如果用^ 的话，意思是不包含这些字符串的

四、边界匹配

 import re

1 print(re.findall(r'^http://','http://www.baidu.com besttest is good\nbest'))   #^ 匹配以什么字符开头，多行情况下匹配每一行的开头
2 print(re.findall(r'^b','besttest is good\nbest',re.M))  #re.M为多行模式
3 print(re.findall(r'jpge$|png|npg$','touxiang.png'))   # $ 匹配以什么字符结尾，多行情况下匹配每一行的结尾
4 print(re.findall(r'\Ahttp://','http://www.baidu.com\nhttp://www.souhu.com',re.M))# \A 仅以什么字符开头，和^不同的是它不能用多行模式
5 print(re.findall(r'\.jpge\Z|\.png\Z|\.npg\Z','touxiang.png\nyi.npg',re.M))   # \Z 仅以什么字符结尾，和$不同的是它不能用多行模式

五、预定义字符集合

 import re

1 print(re.findall(r'\d','s135434657ehfu243fdsf'))   # \d 匹配数字0-9  # [A-z]大小写字母 [0-9]数字
2 print(re.findall(r'\d+','s135434657ehfu243fdsf'))
3 print(re.findall(r'\D','s135434657ehfu243fdsf'))   #\D 匹配非数字
4 print(re.findall(r'\w','s13543465^*7eh$@fu243fds你好f'))   # \w 匹配所有的字母、数字和中文
5 print(re.findall(r'\W','s13543465^*7eh$@fu243fdsf'))   # \W 匹配不是字母和数字
6 print(re.findall('\s','we j\tr \noe\rhgorhg'))   # \s 匹配空白字符：\t、\n、\r、空格
7 print(re.findall('\S','we j\tr \noe\rhgorhg'))   # \S 匹配非空白字符

六、分组匹配

# 分组匹配
#如匹配IP地址
print(re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}','192.168.160.3'))
#分组
print(re.search(r'\d{1,3}(\.\d{1,3}){3}','192.168.160.3').group())
print(re.findall(r'\d{1,3}(\.\d{1,3}){3}','192.168.160.3'))   # findall 里面有分组的话，默认结果就只是分组里面的内容，也就是匹配到小括号里面的内容
print(re.findall(r'\d{1,3}(?:\.\d{1,3}){3}','192.168.160.3'))

posted on 2017-06-24 00:56 笔-记阅读(237) 评论(0) 收藏举报

刷新页面返回顶部

python基础五——正则表达式

公告