正则表达式基本用法
# coding=utf-8 import re # 获取路径中的文件名 s1 = 'C:\pycharm\myblog\venv\Scripts\python.exe C:/pycharm/myblog/demo/demo.py' # [^/\\]* 匹配最后一个/或\开始的内容,\.匹配. , [^ ]*匹配. 后的文件后缀,不包括空格 print('s1:', re.findall(r'[^/\\]*\.[^ ]*', s1)) s1: ['python.exe', 'demo.py'] # 获取路径中的非文件名部分 s2 = 'C:\pycharm\myblog\venv\Scripts\python.exe C:/pycharm/myblog/demo/demo.py' # [^ ]* 匹配空格以外的所有字符,[/\\] 匹配最后以 \ 或 / 结尾 print('s2:', re.findall(r'[^ ]*[/\\]', s2)) s2: ['C:\\pycharm\\myblog\x0benv\\Scripts\\', 'C:/pycharm/myblog/demo/'] # 编译后的正则表达式 s3 = 'C:\pycharm\myblog\venv\Scripts\python.exe C:/pycharm/myblog/demo/demo.py' rec = re.compile(r'py\w+') print('s3:', rec.findall(s3)) s3: ['pycharm', 'python', 'pycharm'] # 带 flags 参数, IGNORECASE 忽略大小写 s4 = 'C:\Pycharm\myblog\venv\Scripts\python.exe C:/Pycharm/myblog/demo/demo.py' rec = re.compile(r'Py\w+', flags=re.IGNORECASE) print('s4:', rec.findall(s4)) s4: ['Pycharm', 'python', 'Pycharm'] # finditer 返回遍历迭代器, 返回的对象方法有 group, start, end, re, string 等 s5 = 'C:\Pycharm\myblog\venv\Scripts\python.exe C:/Pycharm/myblog/demo/demo.py' rec = re.compile(r'Py\w+', flags=re.IGNORECASE) it = rec.finditer(s5) for i in it: print('s5:', i.group()) s5: Pycharm s5: python s5: Pycharm # match 只匹配开始部分 s6 = 'C:\Pycharm\myblog\venv\Scripts\python.exe C:/Pycharm/myblog/demo/demo.py' rec = re.compile(r'C[^ ]*', flags=re.IGNORECASE) it = rec.match(s6) print('s6:', it.group()) s6: C:\Pycharm\myblogenv\Scripts\python.exe # search 可匹配任意位置,但只返回第一个结果 s7 = 'C:\Pycharm\myblog\venv\Scripts\python.exe C:/Pycharm/myblog/demo/demo.py' rec = re.compile(r'Py[^ ]*') it = rec.search(s7) print('s7:', it.group()) s7: Pycharm\myblogenv\Scripts\python.exe # sub, 正则中的 replace 函数, 将匹配的内容转换为制定的内容 s8 = 'C:\Pycharm\myblog\venv\Scripts\python.exe C:/Pycharm/myblog/demo/demo.py' rec = re.compile(r'Py[^ ]*') print('s8:', rec.sub('aaa', s8)) s8: C:\aaa C:/aaa # split, 按正则的分隔符处理字符串 s9 = 'C:\Pycharm\myblog\venv\Scripts\python.exe C:/Pycharm/myblog/demo/demo.py' rec = re.compile(r'[:\.\\/^ ]+') print('s9:', rec.split(s9)) s9: ['C', 'Pycharm', 'myblog\x0benv', 'Scripts', 'python', 'exe', 'C', 'Pycharm', 'myblog', 'demo', 'demo', 'py'] # 贪婪匹配与非贪婪匹配 s10 = 'C:\Pycharm\myblog\venv\Scripts\python.exe C:/Pycharm/myblog/demo/demo.py' print('贪婪匹配到最后一个点 s10:', re.findall(r'env.*\.', s10)) print('非贪婪匹配加?,匹配到第一个点停止匹配 s10:', re.findall(r'env.*?\.', s10)) 贪婪匹配 s10: ['env\\Scripts\\python.exe C:/Pycharm/myblog/demo/demo.'] 非贪婪匹配加? s10: ['env\\Scripts\\python.']