正则表达式基本用法

# coding=utf-8
import re


# 获取路径中的文件名
s1 = 'C:\pycharm\myblog\venv\Scripts\python.exe C:/pycharm/myblog/demo/demo.py'
# [^/\\]* 匹配最后一个/或\开始的内容,\.匹配. , [^ ]*匹配. 后的文件后缀,不包括空格
print('s1:', re.findall(r'[^/\\]*\.[^ ]*', s1))

s1: ['python.exe', 'demo.py']

# 获取路径中的非文件名部分
s2 = 'C:\pycharm\myblog\venv\Scripts\python.exe C:/pycharm/myblog/demo/demo.py'
# [^ ]* 匹配空格以外的所有字符,[/\\] 匹配最后以 \ 或 / 结尾
print('s2:', re.findall(r'[^ ]*[/\\]', s2))

s2: ['C:\\pycharm\\myblog\x0benv\\Scripts\\', 'C:/pycharm/myblog/demo/']


# 编译后的正则表达式
s3 = 'C:\pycharm\myblog\venv\Scripts\python.exe C:/pycharm/myblog/demo/demo.py'
rec = re.compile(r'py\w+')
print('s3:', rec.findall(s3))

s3: ['pycharm', 'python', 'pycharm']

# 带 flags 参数, IGNORECASE 忽略大小写
s4 = 'C:\Pycharm\myblog\venv\Scripts\python.exe C:/Pycharm/myblog/demo/demo.py'
rec = re.compile(r'Py\w+', flags=re.IGNORECASE)
print('s4:', rec.findall(s4))

s4: ['Pycharm', 'python', 'Pycharm']


# finditer 返回遍历迭代器, 返回的对象方法有 group, start, end, re, string 等
s5 = 'C:\Pycharm\myblog\venv\Scripts\python.exe C:/Pycharm/myblog/demo/demo.py'
rec = re.compile(r'Py\w+', flags=re.IGNORECASE)
it = rec.finditer(s5)
for i in it:
    print('s5:', i.group())

s5: Pycharm
s5: python
s5: Pycharm

# match 只匹配开始部分
s6 = 'C:\Pycharm\myblog\venv\Scripts\python.exe C:/Pycharm/myblog/demo/demo.py'
rec = re.compile(r'C[^ ]*', flags=re.IGNORECASE)
it = rec.match(s6)
print('s6:', it.group())

s6: C:\Pycharm\myblogenv\Scripts\python.exe

# search 可匹配任意位置,但只返回第一个结果
s7 = 'C:\Pycharm\myblog\venv\Scripts\python.exe C:/Pycharm/myblog/demo/demo.py'
rec = re.compile(r'Py[^ ]*')
it = rec.search(s7)
print('s7:', it.group())

s7: Pycharm\myblogenv\Scripts\python.exe

# sub, 正则中的 replace 函数, 将匹配的内容转换为制定的内容
s8 = 'C:\Pycharm\myblog\venv\Scripts\python.exe C:/Pycharm/myblog/demo/demo.py'
rec = re.compile(r'Py[^ ]*')
print('s8:', rec.sub('aaa', s8))

s8: C:\aaa C:/aaa

# split, 按正则的分隔符处理字符串
s9 = 'C:\Pycharm\myblog\venv\Scripts\python.exe C:/Pycharm/myblog/demo/demo.py'
rec = re.compile(r'[:\.\\/^ ]+')
print('s9:', rec.split(s9))

s9: ['C', 'Pycharm', 'myblog\x0benv', 'Scripts', 'python', 'exe', 'C', 'Pycharm', 'myblog', 'demo', 'demo', 'py']

# 贪婪匹配与非贪婪匹配
s10 = 'C:\Pycharm\myblog\venv\Scripts\python.exe C:/Pycharm/myblog/demo/demo.py'
print('贪婪匹配到最后一个点 s10:', re.findall(r'env.*\.', s10))
print('非贪婪匹配加?,匹配到第一个点停止匹配 s10:', re.findall(r'env.*?\.', s10))

贪婪匹配 s10: ['env\\Scripts\\python.exe C:/Pycharm/myblog/demo/demo.']
非贪婪匹配加? s10: ['env\\Scripts\\python.']
  

  

posted on 2018-01-13 21:02  wuzhuquan  阅读(126)  评论(0)    收藏  举报

导航