【1.77】管道符 以及 正则表达式基本方法

import re

# | 管道 或者
str = "dsfdsfasd|sada"
y = re.findall("sd|d",str)
print (y)   #['d', 'd', 'sd', 'd']

str = "dsfdsfasd|sada"
y = re.findall("sd|ds",str)
print (y)   # ['ds', 'ds', 'sd']

str = "dsfdsfasd|sada"
y = re.findall("d\|",str)
print (y)   # ['d|']

str = "dsfdsfasd|sada"
y = re.findall("(sf)+",str)
print (y)  #['sf', 'sf']
管道符号 | 或者
import re

# | 管道 或者
str = "dsfdsfasd|sada"
y = re.findall("sd|d",str)
print (y)   #['d', 'd', 'sd', 'd']

str = "dsfdsfasd|sada"
y = re.findall("sd|ds",str)
print (y)   # ['ds', 'ds', 'sd']

str = "dsfdsfasd|sada"
y = re.findall("d\|",str)
print (y)   # ['d|']

str = "dsfdsfasd|sada"
y = re.findall("(sf)+",str)
print (y)  #['sf', 'sf']






str = "dsfdsfasd|sada"
#search  找到匹配规则的第一个字符串,就停下来  ,找不到返回 None


y=re.search("(?P<name>\w+)",str)
print(y)   #<re.Match object; span=(0, 9), match='dsfdsfasd'>
print(y.group(),type(y.group()))   # dsfdsfasd <class 'str'>
print(y.group("name"))   # dsfdsfasd




str = "alex36wusir34xialu33"
y=re.search("(?P<name>[a-z]+)(?P<age>\d+)",str)
print(y)   #<re.Match object; span=(0, 6), match='alex36'>
print(y.group("name"))   # alex
print(y.group("age"))   # 36
print(y.group())   # alex36
search 找到匹配规则的第一个字符串,就停下来 ,找不到返回 None
import re

str = "dsfdsfasd|sada"
#match  只是在字符串开头开始,找到匹配规则的第一个字符串,找不到返回 None


y=re.match("(?P<name>\w+)",str)
print(y)   #<re.Match object; span=(0, 9), match='dsfdsfasd'>
print(y.group(),type(y.group()))   # dsfdsfasd <class 'str'>
print(y.group("name"))   # dsfdsfasd



#match  只是在字符串开头开始,找到匹配规则的第一个字符串,找不到返回 None
str = "alex36wusir34xialu33"
y=re.match("(?P<name>[a-z]+)(?P<age>\d+)",str)
print(y)   #<re.Match object; span=(0, 6), match='alex36'>
print(y.group("name"))   # alex
print(y.group("age"))   # 36
print(y.group())   # alex36


#match  只是在字符串开头开始,找到匹配规则的第一个字符串,找不到返回 None
str = "@#alex36wusir34xialu33"
y=re.match("(?P<name>[a-z]+)(?P<age>\d+)",str)
print(y)   #None
match 只是在字符串开头开始,找到匹配规则的第一个字符串,找不到返回 None
import re



#spilt 分开字符串  但是规则里面的字符不保留,规则左边没有就为空
str = "hello everyone|student"

y = re.split("[ |]",str)
print(y) #['hello', 'everyone', 'student']

str = "hello everyone|student"
y = re.split(" |\|",str)
print(y) #['hello', 'everyone', 'student']



str = "hello everyone student"
y = re.split(" ",str)
print(y) #['hello', 'everyone', 'student']

str = '''hello
everyone
student'''
y = re.split(r"\n",str)
print(y) #['hello', 'everyone', 'student']

str = "asdasdfsbfhdsffdgsdhyrtbn"  #按照f或者d分开  不保留规则中的字符,规则左边没有就为空
y = re.split("[fd]",str) 
print(y) #['as', 'as', '', 'sb', 'h', 's', '', '', 'gs', 'hyrtbn']
spilt 分开字符串 但是规则里面的字符不保留,规则左边没有就为空
import re

#sub  相当于替换replace

str = "sdfase123teht2342hr12434asd987"
y = re.sub("\d","A",str)
print(y)  #sdfaseAAAtehtAAAAhrAAAAAasdAAA

y = re.sub("\d","A",str,3)
print(y)  #sdfaseAAAteht2342hr12434asd987

y = re.subn("\d","A",str)
print(y)    #('sdfaseAAAtehtAAAAhrAAAAAasdAAA', 15)

y = re.subn("\d+","A",str)
print(y)    #('sdfaseAtehtAhrAasdA', 4))
sub 相当于替换replace
import re

#compile  就是编译匹配规则
str = "12sdf34r}\ER$E43T@#"
com=re.compile("\d")
y=com.findall(str)
print(y)   #['1', '2', '3', '4', '4', '3']


com=re.compile("\d+")
y=com.findall(str)
print(y)   #['12', '34', '43']
compile 就是编译匹配规则
import re

#finditer  就是找出匹配数据  返回迭代器  使用 next 取出来  然后用group取

str = "2143sdfsa3r435dsy6yu5"
y= re.finditer("\d",str)
print (y)     # <callable_iterator object at 0x005C0950>

print(next(y))   #<re.Match object; span=(0, 1), match='2'>
print(next(y).group(),type(next(y).group()))   #1  <class 'str'>
finditer 就是找出匹配数据 返回迭代器 使用 next 取出来 然后用group取
import re

#findall  优先将 分组的数据显示出来

str = "www.baidu.com www.163.com"
y= re.findall("www\.(baidu|163)\.com",str)
print(y)  #['baidu', '163']

#利用?: 去掉括号的优先级显示
str = "www.baidu.com www.163.com"
y= re.findall("www\.(?:baidu|163)\.com",str)
print(y)  #['www.baidu.com', 'www.163.com']
findall 优先将 分组的数据显示出来 利用?: 去掉括号的优先级显示
import re

#findall  优先将 分组的数据显示出来

str = "www.baidu.com www.163.com"
y= re.findall("www\.(baidu|163)\.com",str)
print(y)  #['baidu', '163']


str = "abcabcabcabc|abc"
y= re.findall("(abc)+","abcabcabcabc|abc")
print(y)  # ['abc', 'abc']

#利用?: 去掉括号的优先级显示
str = "abcabcabcabc|abc"
y= re.findall("(?:abc)+",str)
print(y)  # ['abcabcabcabc', 'abc']



str = "dsfdsfasd|sada"
y = re.findall("(sf)+",str)
print (y)  #['sf', 'sf']
findall 优先将 分组的数据显示出来 re.findall("(abc)+","abcabcabcabc|abc")

 

posted @ 2016-04-23 22:39  科学小怪癖  阅读(531)  评论(0)    收藏  举报