python内置模块

取消转义

在原生的正则表达式中取消转义推荐使用\(每个\只能取消一个字符的转义)
在python中取消转义推荐使用r'\n\a\t'(也可以使用\)

python内置模块之re模块

在python要想使用正则必须借助于模块 re就是其中之一

'''基本操作方式'''
import re

re.findall('正则表达式'， '带匹配的文本') # 根据正则匹配所有符合条件的数据
res = findall ('a','eva jason jackson')
print(res)     # ['a', 'a', 'a'] 结果是一个列表（要么有元素 要么空列表

res = re.search('正则表达式'， '带匹配的文本')  根据正则匹配到一个符合条件的就结束
res = re.search('a', 'eva jason jackson')
print(res)  # 结果对象
print(res.group()) # 正在的结果
if res:
	print(res.group())
else:
    print('不好意思 没有找到')
"""如果没有符合条件的数据 那么search返回None 并且使用group 会直接报错"""

res = re.match('a', 'abca') # 根据正则从头开始匹配（文本内容必须在开头匹配上）
print(res)
print(res.group())
if res:
    print(res.group)
else:
    print('不好意思 没有找到')
"""如果没有符合条件的数据 那么match返回None 并且使用group会直接报错"""

re模块其他方法

import re
先按'a'分割得到''和'bcd',在对''和'bcd'分别按'b'分割
res = re.split('[ab]', 'abcd')
print(res)  # ['', '', 'cd']

类似于字符串类型的replace方法
res = re.sub('\d', 'H', 'eva3jason4yuan4', 1)  # 替换正则匹配到的内容
# res = re.sub('\d','H','eva3jason4yuan4')  # 不写默认替换所有
print(res)  # evaHjason4yuan4   1就只替换一个

"""返回元组 并提示替换了几处"""
res = re.subn('\d', 'H', 'eva3jason4yuan4', 1)
print(res)  # ('evaHjason4yuan4', 1)
res = re.subu('\d', 'H', 'eva3jason4yuan4')
print(res)  # (evaHjasonHyuanH', 3')

"""常用"""
regexp_obj = re.compile('\d+')
res = regexp_obj.search('absd213j1hjj213jk')
res1 = regexp_obj.match('123hhkj2h1j3123')
res2 = regexp_obj.findall('1213k1j2jhj21j3123hh')
print(res,res1,res2)

"""常用"""
# res = re.finditer('\d+','ashdklah21h23kj12jk3klj112312121kl131')
# print([i.group() for i in res])

# res = re.search('^[1-9](\d{14})(\d{2}[0-9x])?$','110105199812067023')
# print(res)
# print(res.group())  # 110105199812067023
# print(res.group(1))  # 10105199812067
# print(res.group(2))  # 023
group(1)里的1,2数字，只看（）括号括的


'''常用'''
# findall针对分组优先展示   无名分组
# res = re.findall("^[1-9]\d{14}(\d{2}[0-9x])?$",'110105199812067023')
# print(res)  # ['023']
# 取消分组优先展示（在分组内开头加？：）          无名分组
# res1 = re.findall("^[1-9](?:\d{14})(?:\d{2}[0-9x])?$",'110105199812067023')
# print(res1)

# 有名分组
res = re.search('^[1-9](?P<xxx>\d{14})(?P<ooo>\d{2}[0-9x])?$','110105199812067023')
print(res)
print(res.group())  # 110105199812067023
print(res.group(1))  # 10105199812067  无名分组的取值方式(索引取)
print(res.group('xxx'))  # 10105199812067
print(res.group('ooo'))  # 023

正则实战案例

import re

# 读取带匹配的数据
with open(r'a.txt', 'r', encoding='utf8') as f:
    data = f.read()
# 利用正则匹配数据
# 分公司名称
title_list = re.findall('<h2>(.*？)</h2>', data)
# print(title_list)
address_list = re.findall("<p class='mapIco'>(.*?)</p>", data)  
# print(address_list)
# 分公司邮箱
email_list = re.findall("<p class='mailIco'>(.*?)</p>", data)
# print(email_list)
# 分公司电话
phone_list = re.findall("<p class='telIco'>(.*?)</p>", data)

res = zip(title_list, address_list, email_list, phone_list)
for data_tuple in res:
    print("""
    公司名称:%s
    公司地址:%s
    公司邮箱:%s
    公司电话:%s
    """ % (data_tuple[0], data_tuple[1], data_tuple[2], data_tuple[3]))

collections模块

# 该模块内部提供了一些高阶的数据类型

1.namedtuple(具名元组)
    from collections improt nametuple
    
    """
    nametuple('名称'，[名字1，名字2，...])
    nametuple('名称'，'名字1 名字2 ...')
    """
    point = namedtuple('坐标'， ['x', 'y'])
    res = point(11, 22)
    print(res)  # 坐标（x=11， y=22）
    print(res.x)  # 11
    print(res.y)  # 22
    point = namedtuple('坐标', 'x y z')
    res = point(11, 22, 33)
    print(res)  # 坐标(x=11, y=22, z=33)
    print(res.x)  # 11
    print(res.y)  # 22
    print(res.z)  # 33
    card = namedtuple('扑克', '花色 点数')
    card1 = card('♠', 'A')
    card2 = card('♥', 'K')
    print(card1)
    print(card1.花色)
    print(card1.点数)
2.队列
    队列模块
    import queue # 内置队列模块：FIFO
    初始化队列
    q = queue.Queue()
    往队列中添加元素
    q.put('first')
    q.put('second')
    q.put('third')
    从队列中获取元素
    print(q.get())
    print(q.get())
    print(q.get())
    print(q.get())    # 值去没了就会原地等待  一直等
3.双端队列
    from collections import deque
    q = depue([11,22,33])
    q.append(44)  # 从右添加
    q.appendleft(55) # 从左边添加
    print(q.pop()) # 从右边取值
    print(q.popleft())  # 从做边取值
4.有序字典
    normal_dict = dict([('name', 'jason'), ('pwd', 123), ('hobby', 'study')])
    print(normal_dict)
    {'hobby': 'study', 'pwd': 123, 'name': 'jason'}
    from collections import OrderedDict
    order_dict = OrderedDict([('name', 'jason'), ('pwd', 123), ('hobby', 'study')])
    print(order_dict)
    OrderedDict([('name', 'jason'), ('pwd', 123), ('hobby', 'study')])
    order_dict['xxx'] = 111
    order_dict
    OrderedDict([('name', 'jason'), ('pwd', 123), ('hobby', 'study'), ('xxx', 111)])
    normal_dict['yyy'] = 222
    normal_dict
    {'hobby': 'study', 'pwd': 123, 'yyy': 222, 'name': 'jason'}
  字典本是无序的，但通过from collections import OrderedDict，可让后添加的数据，有序（排到最后

5.默认值字典
    from collections import defaultdict
    values = [11, 22, 33,44,55,66,77,88,99,90]
    my_dict = defaultdict(list)
    for value in  values:
        if value>60:
            my_dict['k1'].append(value)
        else:
            my_dict['k2'].append(value)
    print(my_dict)
    
6.计数器
    res = 'abcdeabcdabcaba'
    # 统计字符串中每个元素出现的次数
    new_dict = {}
    for i in res:
        if i not in new_dict:
            new_dict[i] = 1
        else：
            new_dict[i] += 1
    print(new_dict)
    
    from collections import Counter  # 计数器
    ret = Counter(res)
    print(ret)

time模块

"""
时间三种表现形式
	1.时间戳（秒数
	2.结构化时间（一般是给机器人看的）
	3.格式化时间（一般是给人看的）
	三种时间是可以互相转换的！！！
"""
1.time.sleep()   # 原地阻塞指定的秒数
2.time.time()    # 获取时间戳时间

import time

# 格式化时间
print(time.strfttime('%Y-%m-%d'))  # 2021-11-25
print(time.strftime('%Y-%m-%d %H:%M:%S'))  # 2021-11-25 11:48:34
print(time.strftime('%Y-%m-%d %X'))   # 2021-11-25 11:48:34
"""
更多时间相关符号 保存到容易查找的位置即可
"""
print(time.localtime())   # time.struct_time(
						# tm_year=2021,
						# tm_mon=11,
						# tm_mday=25,
						# tm_hour=11,
						# tm_min=51,
                          # tm_sec=25,
						# tm_wday=3,
						# tm_yday=329,
						# tm_isdst=0)


print(time.time())
print(time.gmtime(11111111111))
print(time.localtime())

datatime模块

import datetime
print(datatime.data.tody())   # 2021-11-15
print(datatime.datetime.today())  # 2021-11-25  12:15:11.969769
"""data年月日   datatime年月日时分秒 time时分秒 (MySQL django后期可以)"""
res = datetime.datatime.today()
print(res.year) # 2021
print(res.month)# 11
print(res.day)  # 25
print(res.weekday()) #获取星期(week星期是0-6) 0表示周一
print(res.isweekday)# 获取星期(weekday星期是1-7) 1表示周一
"""时间差（timedelta)"""
ctime = datatime.datetime.today()
time_tel = datetime.timedelta(days = 3)
print(ctime)  # 2021-11-25 12:20:48.570489
print(ctime - time_tel)  # 2021-11-22 12:21:06.712396
print(ctime + time_tel)  # 2021-11-28 12:21:06.712396

"""
日期对象 = 日期对象 +/- timedelta对象
timedelta = 日期对象 +/-日期对象
"""

# ret = ctime + time_tel
# print(ret - ctime)  # 3 days, 0:00:00
# print(ctime - ret)  # -3 days, 0:00:00


# 小练习 计算举例今年过生日还有多少天
# birthday = datetime.date(2000, 11, 11)
# now_date = datetime.date.today()
# days = birthday - now_date
# print('距离生日还有{}天'.format(days))

# UTC时间与我们的东八区时间差 八个小时
# print(datetime.datetime.now())  # 2021-11-25 12:25:33.579310
# print(datetime.datetime.utcnow())  # 2021-11-25 04:25:33.579310

posted @ 2021-11-25 20:05 殷国敏阅读(40) 评论(0) 收藏举报

刷新页面返回顶部

python内置模块

python内置模块

取消转义

python内置模块之re模块

re模块其他方法

正则实战案例

collections模块

time模块

datatime模块

公告