python内置模块之正则re模块&collections模块&time&datetime模块

1. re模块

在python中想使用正则必须借助于模块，而 re就是其中之一。

import re

# 1. re.findall('正则表达式', '待匹配的文本')
# 根据正则匹配出所有符合条件的数据
res = re.findall('a', 'eva jason joshua jack')
print(res)  # 返回值是列表，没找到则返回空列表[]
# ['a', 'a', 'a', 'a']

# 2. re.search('正则表达式', '待匹配的文本')
# 根据正则匹配到一个符合条件的就结束
res1 = re.search('a', 'eva jason joshua jack')
print(res1) # <re.Match object; span=(2, 3), match='a'>
# 产生结果对象，如果没有符合条件的数据，返回None，并且使用group会直接报错。
print(res1.group())  # 获取真正的结果 ：a


# 3. re.match('正则表达式','待匹配的文本')
# 根据正则从 头 开始匹配，（文本内容必须在开头匹配上）
res2 = re.match('a', 'badc')
print(res2)
# 产生结果对象，如果没有符合条件的数据，返回None，并且使用group会直接报错。
print(res2.group())  # 获取真正的结果
if res2:
	print(res2.group())
else:
	print('不好意思，没找到')

re模块其他方法

import re

1.
res = re.split('[ab]','abcd')
print(res)   # ['','','cd']
# 以a和b为切割条件分割字符串


2.
res1 = re.sub('\d','H','eva3jason4yuan4', 1)
# 替换正则匹配到的内容，可以控制匹配个数。
print(res1)  # evaHjason4yuan4
# 类似于字符串类型的replace方法，

3.
res2 = re.subn('\d','H','eva3jason4yuan4')
print(res2)  # ('evaHjasonHyuanH', 3)
# 替换并告诉我们用sub替换了几处内容，返回的是元组

4."""常用的"""
obj = re.compile('\d{3}')
# 该方法用于将正则表达式预编译，后续重复千万次使用的时候，可以直接用！
res3 = obj.search('ajcj23o8a933')
res4 = obj.findall('lo837k38274')
print(res3.group(), res4)  # 933 ['837', '382']


5."""常用的"""
res5 = re.finditer('\d', 'ds3s4784a')
print(res)  # 返回迭代器对象
# 先for循环再group取值
print( [i.group() for i in res ] )
# 找不到时返回空列表

re扩展---分组优先机制

import re

1.
res = re.search('^[1-9](\d{14})(\d{2}[0-9x])?$','110105199812067023')
print(res.group())  # 110105199812067023
print(res.group(1))  # 10105199812067
print(res.group(2))  # 023

2. re.findall针对分组优先展示，无名分组
# 重要
res1 = re.findall('^[1-9]\d{14}(\d{2}[0-9x])?$','110105199812067023')
print(res1)  # [023]

res2 = re.findall('^[1-9]\d{14}(?:\d{2}[0-9x])?$','110105199812067023')
print(res2)  # ['110105199812067023']
# 括号内最前方加 ?: 取消分组优先展示


3. 起别名，有名分组，小括号
# 重要
res3 = re.search('^[1-9](?P<othername>\d{14})(?P<kkk>\d{2}[0-9x])?$','110105199812067023')
# ?P<别名>
print(res3)
#<re.Match object; span=(0, 18), match='110105199812067023'>

print(res3.group()) # 110105199812067023

print(res3.group(1)) # 10105199812067
print(res3.group('othername')) # 10105199812067
print(res3.group('kkk'))  # 023

4.
res4 = re.findall('^[1-9](?P<xxx>\d{14})(?P<kkk>\d{2}[0-9x])?$','110105199812067023')
print(res4)  # [('10105199812067', '023')]

re实战之爬取红牛分公司数据

import re


# 读取待匹配的数据
with open(r'redbull.txt', 'r', encoding='utf8') as f:
    # redbull.txt为读取到的红牛分支公司网页源代码
    data = f.read()
    # print(data)
    # 利用正则匹配数据
    # 分公司名称
    title_list = re.findall('<h2>(.*?)</h2>', data)
    # print(title_list)
    # 分公司地址
    address_list = re.findall("<p class='mapIco'>(.*?)</p>", data)
    # print(address_list)
    # 分公司邮箱
    email_list = re.findall("<p class='mailIco'>(.*?)</p>", data)
    # print(email_list)
    # 分公司电话
    phone_list = re.findall("<p class='telIco'>(.*?)</p>", data)

res = zip(title_list, address_list, email_list, phone_list)
for data_tuple in res:
    print("""
    公司名称：{}，
    公司地址：{}，
    公司邮编：{}，
    公司电话：{}
    """.format(data_tuple[0], data_tuple[1], data_tuple[2], data_tuple[3]))

4. collections模块

该模块内部提供了一些高阶的数据类型

namedtuple: 具名元组

from collections import namedtuple
"""
namedtuple('名称',[名字1,名字2...])
namedtuple('名称',[名字1,名字2...])
"""
# 基本用法
point = namedtuple('坐标',['东经','北纬'])
res = point(118.88, 23.5)
print(res)
print(res.东经)
print(res.北纬)
"""
坐标(东经=116.88, 北纬=23.5)
116.88
23.5"""

point1 = namedtuple('坐标','x y z')
res = potin1(1,2,3)
print(res)
print(res.x)
print(res.y)
print(res.z)

"""
坐标(x=1, y=2, z=3)
1
2
3
"""

deque：队列与双端队列

import queue
from collections import deque

# 初始化队列
q = queue.Queue(5)  # 5个位置
# 往队列中添加元素
q.put('fir')
q.put('sec')
q.put('thr')

# 从队列中获取元素
print(q.get())
print(q.get())
print(q.get())
print(q.get())  # 值 取没了就会原地等待


# 双端队列
q = deque([11,22,33])
print(q)
q.append(44)        # 从右边添加
q.appendleft(55)    # 从左边添加

print(q.pop())      # 从右边取值
print(q.popleft())  # 从左边取值

Counter：计数器，主要用来计数

from collections import Counter

# 统计各字符出现的次数
res = 'acbakjcabhacbakcbalcbacbaacdfha'
new_dict = {}
for i in res:
	if i not in new_dict:
		new_dict[i] = 1
	else:
		new_dict[i] += 1
print(new_dict)

ret = Counter(res)
print(ret)
"""
{'a': 6, 'b': 6, 'c': 5, 'h': 3}
Counter({'a': 6, 'b': 6, 'c': 5, 'h': 3})
"""

OrderedDict：有序字典

from collections import OrderedDict
normal_dict = dict([('name','joshua'),('pwd',123),('hobby','study')])
print(normal_dict)

order_dict = OrderedDict([('name','joshua'),('pwd',123),('hobby','study')])
print(order_dict)
OrderedDict 的Key会按照插入的顺序，不是按照key排序

defaultdict：带有默认值的字典

l = [11,22,33,44,55,66,77,88,99]
li_dic = {'k1':[], 'k2':[]}

for i in l:
	if i > 66:
		li_dic['k1'].append(i)
	else:
		li_dic['k2'].append(i)

# 默认值字典
from collections import defaultdict
values = [11,22,33,44,55,66,77,88,99]
my_dict = defaultdict(list)
for value in values:
	if value > 60:
		my_dict['k1'].append(value)
	else:
		my_dict['k2'].append(value)
print(my_dict)

5. time模块

时间三种表现形式：

时间戳（秒数）1970-1-1开始
结构化时间（一般给机器看的）
格式化时间（一般给人看的）
三种时间是可以相互转换的！！！

import time

time.sleep(3)  # 原地阻塞指定秒数
time.time()    # 获取时间戳时间

# 格式化时间
print(time.strftime('%Y-%m-%d'))
print(time.strftime('%Y-%m-%d %H:%M:%S'))
print(time.strftime('%Y-%m-%d %X'))  # 同上

# 更多时间相关符号，存在易查找的位置。

# 结构化时间
print(time.localtime())  # 本地时间
print(time.gmtime())  # utf时间，能将时间戳转换成结构化时间

6. datetime模块

from datetime import date,datetime, timezone, timedelta

print(date.today())
print(datetime.today())
# 等同于print(datetime.now())
"""
date 年月日
datetime 年月日时分秒
time 时分秒
"""
res = datetime.today()
print(res.year)
print(res.month)
print(res.day)
print(res.weekday())      # 星期0-6
print(res.isoweekday())   # 星期1-7

"""时间差 timedelta"""
ctime = datetime.today()
time_tel = timedelta(days=2)
print(ctime - time_tel)
print(time_tel + time)

"""
日期对象 = 日期对象 +/- timedelta对象
timedelta对象 = 日期对象 +/- 日期对象
"""

print(datetime.now())
print(datetime.utcnow())


# 格式转换
# 字符串格式时间 ---> datetime格式时间
text = '2021-11-11'
v1 = datetime.strptime(text, '%Y-%m-%d')
print(v1)

# datetime格式  ---> 转换为字符串格式
v1 = datetime.now()
val = v1.strftime('%Y-%m-%d %H:%M:%S')
print(val)

# 时间戳格式 ---> 转换为datetime格式
ctime = time.time()
v1 = datetime.fromtimestamp(ctime)
print(v1)

# datetime格式 ---> 转换为时间戳格式
v1 = datetime.now()
val = v1.timestamp()
print(val)

posted @ 2021-11-25 17:36 Joshua_jiaxue 阅读(60) 评论(0) 收藏举报

刷新页面返回顶部