python 正则表达式的处理

1.基本用法

#!/usr/bin/env python
# coding=utf-8

import re

# example 1
text ="fjsk test\t fjskd bar\t \ttest"
regex = re.compile('\s+')
print regex.split(text)

# example 2
email ="""
    jfksdfasm@qq.com
    test@test.com.cn
    jfdskf@163.com
    jkmiao@yahoo.123com
    """

pattern = r'[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z.0-9]{2,6}'
regex = re.compile(pattern,flags=re.IGNORECASE)

# get all 
print regex.findall(email)

# get the first one
m = regex.search(text)
# print email[m.start():m.end()]
print m

# replace 
print regex.sub('RECORD',email)


显示:

jkmiao@sysucis:~/workplace/python/test$ python regex.py 
['fjsk', 'test', 'fjskd', 'bar', 'test']
['jfksdfasm@qq.com', 'test@test.com.cn', 'jfdskf@163.com', 'jkmiao@yahoo.123com']
None

    RECORD
    RECORD
    RECORD
    RECORD

 

2. 分组,返回元组

#example 3

pattern = r'([A-Z0-9._%+=]+)@([A-Z0-9.-]+)\.([A-Z.]{2,5})'
regex = re.compile(pattern,flags=re.IGNORECASE)
m = regex.match('name@domain.suffix')
print m.groups()

print regex.findall(email)
# output

('name', 'domain', 'suffi')
[('jfksdfasm', 'qq', 'com'), ('test', 'gamil', 'com'), ('jfdskf', '163', 'com'), ('jkmiao', 'yahoo.com', 'cn')]

 

 

3.给分组加名称,返回字典

#example 4

regex = re.compile(r"""
                   (?P<userame>[A-Z0-9._%+-]+)
                   @(?P<domain>[A-Z0-9.-]+)
                   \.
                   (?P<suffix>[A-Z0-9.]{2,4})
                   """,flags=re.IGNORECASE|re.VERBOSE)

m = regex.match("jkmaio@sysu.com")
print m.groupdict()

print regex.findall(email)

# output
jkmiao@sysucis:~/workplace/python/test$ python regex.py

{'domain': 'sysu', 'userame': 'jkmaio', 'suffix': 'com'}
[('jfksdfasm', 'qq', 'com'), ('test', 'gamil', 'com'), ('jfdskf', '163', 'com'), ('jkmiao', 'yahoo.com', 'cn')]

 

 

posted on 2015-06-29 11:05  星空守望者--jkmiao  阅读(422)  评论(0)    收藏  举报