re正则模块
1 '.' : 默认匹配除\n之外的任意字符 2 '^' : 匹配字符开头 3 '$' : 匹配字符结尾 4 '*' : 匹配 * 号前的字符 0 到多次 5 '+' : 匹配 + 号前的字符 1 到多次 6 '?' : 匹配 ? 号前一个字符 0 或 1 次 7 '{m}' : 匹配 m 前一个字符 m 次 8 '{n,m}' : 匹配前一个一个字符 n 到 m 次 9 '\' : 匹配 \ 前或 \ 后的字符 10 '(...)' : 分组匹配 11 12 '\A' : 匹配字符开头 13 '\Z' : 匹配字符结尾 14 '\d' : 匹配数字 0 - 9 15 '\D' : 匹配非数字 16 '\w' : 匹配 【A-Za-a0-9】 17 '\W' : 匹配非【A-Za-a0-9】 18 '\s' : 匹配空白字符,\t、\n、\r 19 20 re.match -- 从头开始匹配,匹配一次 21 re.search -- 匹配条件包含,匹配一次 22 re.findall -- 匹配所有结果并将结果以列表形式返回 23 re.split -- 以匹配到的字符用作列表分隔符 24 re.sub -- 对匹配到的字符进行替换 25 26 # 匹配字符或数字开头 1 次到多次 27 import re 28 >>> aa = re.match("\w+","inet 192.168.10.51 netmask 255.255.255.0 broadcast 192.168.10.255") 29 >>> print(aa.group()) 30 inet 31 # 匹配除 \n 的任意字符 32 >>> aa = re.match(".","inet 192.168.10.51 netmask 255.255.255.0 broadcast 192.168.10.255") 33 >>> print(aa.group()) 34 i 35 36 # 匹配除 \n 的任意字符 1 次到多次 37 >>> aa = re.match(".+","inet 192.168.10.51 netmask 255.255.255.0 broadcast 192.168.10.255") 38 >>> print(aa.group()) 39 inet 192.168.10.51 netmask 255.255.255.0 broadcast 192.168.10.255 40 41 # 匹配除 \n 的任意字符 0 次到多次 42 >>> aa = re.match(".*","inet 192.168.10.51 netmask 255.255.255.0 broadcast 192.168.10.255") 43 >>> print(aa.group()) 44 inet 192.168.10.51 netmask 255.255.255.0 broadcast 192.168.10.255 45 46 # 匹配字符或数字开头的 0 次到多次,文本内容以 % 开头会匹配不到,返回 null 47 >>> aa = re.match("\w*","%inet 192.168.10.51 netmask 255.255.255.0 broadcast 192.168.10.255") 48 >>> print(aa.group()) 49 >>> 50 51 # 匹配字符或数字开头的 1 次到多次,文本内容以 % 开头会匹配不到,程序会报错 52 >>> aa = re.match("\w+","%inet 192.168.10.51 netmask 255.255.255.0 broadcast 192.168.10.255") 53 >>> print(aa.group()) 54 Traceback (most recent call last): 55 File "<stdin>", line 1, in <module> 56 AttributeError: 'NoneType' object has no attribute 'group' 57 58 # 匹配字符或数字开头的 0 次或 1 次,文本内容以 % 开头会匹配不到,返回 null 59 >>> aa = re.match("\w?","%inet 192.168.10.51 netmask 255.255.255.0 broadcast 192.168.10.255") 60 >>> print(aa.group()) 61 62 # 匹配字符或数字开头的 0 次或 1 次,文本内容以字母开头 63 >>> aa = re.match("\w?","inet 192.168.10.51 netmask 255.255.255.0 broadcast 192.168.10.255") 64 >>> print(aa.group()) 65 i 66 67 # 匹配字母或数字开头 3 次 68 >>> aa = re.match("\w{3}","inet 192.168.10.51 netmask 255.255.255.0 broadcast 192.168.10.255") 69 >>> print(aa.group()) 70 ine 71 72 # 匹配字母或数字开头至少 1 次,最多 4 次,后面的最少 5 次最多 8 次会报错 73 >>> aa = re.match("\w{1,4}","inet 192.168.10.51 netmask 255.255.255.0 broadcast 192.168.10.255") 74 >>> print(aa.group()) 75 inet 76 >>> aa = re.match("\w{5,8}","inet 192.168.10.51 netmask 255.255.255.0 broadcast 192.168.10.255") 77 >>> print(aa.group()) 78 Traceback (most recent call last): 79 File "<stdin>", line 1, in <module> 80 AttributeError: 'NoneType' object has no attribute 'group' 81 82 # 匹配字母 inet 或 INET 开头 83 >>> aa = re.match("inet|INET","inet 192.168.10.51 netmask 255.255.255.0 broadcast 192.168.10.255") 84 >>> print(aa.group()) 85 inet 86 87 上面的语句都可以使用search来替换,如: 88 >>> re.search("inet|INET","inet 192.168.10.51 netmask 255.255.255.0 broadcast 192.168.10.255").group() 89 'inet' 90 91 # 使用()组合匹配,组合匹配用groups 92 >>> re.search("(\w{6}a(123|456)c)","abcabca456c").groups() 93 ('abcabca456c', '456') 94 >>> 95 >>> re.search("(\w{5}a(123|456)c)","abcabca456c").groups() 96 ('bcabca456c', '456') 97 98 >>> re.search("(\d{2})(\d{2})(\d{2})(\d{4})","110119194910011234,'guoqingsheng'").groups() 99 ('11', '01', '19', '1949') 100 101 # 匹配数字开头数字结尾,$ 符号和 \Z 102 >>> re.search("^\d.*\d$","110119194910011234,guoqingsheng1").group() 103 '110119194910011234,guoqingsheng1' 104 >>> re.search("^\d.*\d\Z","110119194910011234,guoqingsheng1").group() 105 '110119194910011234,guoqingsheng1' 106 107 # 分组采用列表和字典形式 108 >>> re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{8})","110119194910011234").groups() 109 ('1101', '19', '19491001') 110 >>> re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{8})","110119194910011234").groupdict() 111 {'province': '1101', 'city': '19', 'birthday': '19491001'} 112 113 # 从文本对象中匹配 IP 地址 114 >>> re.search("\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}","inet 192.168.10.51 netmask 255.255.255.0 broadcast 192.168.10.255").group() 115 '192.168.10.51' 116 >>> 117 >>> re.search("(\d{1,3}\.){1,3}\d{1,3}","inet 192.168.10.51 netmask 255.255.255.0 broadcast 192.168.10.255").group() 118 '192.168.10.51' 119 120 # 匹配所有的数字--findall 121 >>> re.findall("\d+","inet 192.168.10.51 netmask 255.255.255.0 broadcast 192.168.10.255") 122 ['192', '168', '10', '51', '255', '255', '255', '0', '192', '168', '10', '255'] 123 124 # 以数字为分隔符--split 125 >>> re.split("\d+","inet 192.168.10.51 netmask 255.255.255.0 broadcast 192.168.10.255") 126 ['inet ', '.', '.', '.', ' netmask ', '.', '.', '.', ' broadcast ', '.', '.', '.', ''] 127 128 # 替换--sub,查找docker(不区分大小写)并替换成container,加上count指定替换次数 129 >>> re.sub("docker","container","After successfully installing Docker and starting Docker",flags=re.I) 130 'After successfully installing container and starting container' 131 >>> re.sub("docker","container","After successfully installing Docker and starting Docker",flags=re.I,count=1) 132 'After successfully installing container and starting Docker' 133 134 # 反斜杠匹配,python中需要使用 4 个 \ 来匹配一个反斜杠 135 >>> re.split("\\\\",r"D:\PycharmProjects\second") 136 ['D:', 'PycharmProjects', 'second'] 137 138 # 忽略大小写-- flags=re.I 139 >>> re.findall("pycharm",r"D:\PycharmProjects\second",flags=re.I) 140 ['Pycharm'] 141 142 # 匹配包括换行符\n 在内的字符 -- flags=re.S 143 >>> re.search(".*","\nABDdlskwe403d21sf31dsf\nsfwii4f34ldf3d\nfewkorio133ld43",flags=re.S).group() 144 '\nABDdlskwe403d21sf31dsf\nsfwii4f34ldf3d\nfewkorio133ld43' 145 146 # 取非 -- ^ 在中括号中是代表取非的意思 147 >>> a = "2 - 1 * ( 20 - 13 + ( 12/2+2-36/9) - 27 * ( (53 -18)/7) - (90 - 72)/9 ) + 70/(60 - 15 * 2) * 3" 148 >>> re.search(r'\([^()]+\)',a).group() 149 '( 12/2+2-36/9)'
博主小学未毕业,请勿口喷!

浙公网安备 33010602011771号