六:自动化模块:
1.协程
例子
1.学习的例子:
# # python 协程
import asyncio
import time
async def func():#某些程序是在警告上才能跑的 eeee async
print("我叫赛利亚")
if __name__ == '__main__':
g = func()
asyncio.run(g)
async def func1():
print("你好我叫小王")
# time.sleep(3)#当程序出现同步操作的时候,异步就中断了
await asyncio.sleep(3)#异步操作的代码
print("你好我叫小王")
async def func2():
print("你好我叫小郝")
# time.sleep(2)
await asyncio.sleep(2)
print("你好我叫小郝")
async def func3():
print("你好我叫小赵")
# time.sleep(2)
await asyncio.sleep(2)
print("你好我叫小赵")
if __name__ == '__main__':
f1 = func1()
f2 = func2()
f3 = func3()
task = [
f1,f2,f3
]
t1 = time.time()
#一次启动多个协程
asyncio.run(asyncio.wait(task))
t2 = time.time()#7.02985405921936 #3.02011775970459
print(t2-t1)
async def func1():
print("你好我叫小王")
# time.sleep(3)#当程序出现同步操作的时候,异步就中断了
await asyncio.sleep(3)#异步操作的代码
print("你好我叫小王")
async def func2():
print("你好我叫小郝")
# time.sleep(2)
await asyncio.sleep(2)
print("你好我叫小郝")
async def func3():
print("你好我叫小赵")
# time.sleep(2)
await asyncio.sleep(2)
print("你好我叫小赵")
async def main():
# 1种
# f1 =func1()
# await f1 #一般挂起操作放在协程对象前
# 2种
tasks = [
asyncio.create_task(func1()),
asyncio.create_task(func2()),
asyncio.create_task(func3())
]
await asyncio.wait(tasks)
if __name__ == '__main__':
t1=time.time()
asyncio.run(main())
t2 = time.time()
print(t2-t1)
2.爬虫利用:
# 爬虫方面的应用
async def download(url):
print("准备开始下载")
await asyncio.sleep(2) # 网络请求
print("下载完成")
async def main():
urls = {
"http://www.baidu.com",
"http://www.bilibili.com",
"http:www.163.com"
}
task = []
for url in urls:
d = download(url)
task.append(d)
await asyncio.wait(task)#多任务异步协程
if __name__ == '__main__':
asyncio.run(main())
#=-=-----------------------------------------------------------------------
#自己看看的
def foo():
print("starting")
while True:
res = yield 4
print('res',res)
""""
在函数中使用了yield ,则该函数就称为一个生成器
starting
4
********************
res None
4
当成return返回
当成生成器
"""
g=foo()#g就是一个生成器对象
print(type(g))
print(next(g))
print('*'*20)
print(next(g))
3.异步操作
# pip aiohttp
# requests.get()异步操作
import aiohttp
import asyncio
urls = [
'http://kr.shanghai-jiuxin.com/file/2020/1031/774218be86d832f359637ab120eba52d.jpg',
'http://kr.shanghai-jiuxin.com/file/2020/1031/563337d07af599a9ea64e620729f367e.jpg',
'http://kr.shanghai-jiuxin.com/file/2020/1031/a2c58d6d726fb7ef29390becac5d8643.jpg'
]
async def aiodownload(url):
name = url.rsplit('/',1)[1]#命名
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
# resp.conten.read() #图片
#请求回来了写入文件
with open(name,mode='wb',encoding='utf-8')as f:
f.write(await resp.content.read())
# session.post
# 发送请求 s =aiohttp.ClientSession() ===requests
# 得到图片内容
# 保存文件
async def main():
task = []
for url in urls:
tasks.append(asyncio.create_task(aiodownload(url)))
await asyncio.wait(tasks)
if __name__ == '__main__':
asyncio.run(main())
2.selenium模块
驱动的地址:
http://chromedriver.storage.googleapis.com/index.html
例子:
1.学习的例子:
# pip install selenium
# 下载浏览器的驱动
#把解压的浏览器的驱动 chromedriver 放到python的解释器里
#让seleniu启动浏览器
from selenium.webdriver import Chrome
#创建浏览器对象
web = Chrome()
#打开浏览器的网址
web.get("http://www.baidu.com")
print(web.title)
web.close()
2.自动化测试:
from selenium.webdriver.common.by import By
from selenium.webdriver import Chrome
from selenium.webdriver.common.keys import Keys
import time
#创建一个浏览器的对象
web = Chrome()
web.get("https://www.lagou.com/")
#找到某个元素,点击它
el = web.find_element(By.XPATH,'//*[@id="changeCityBox"]/ul/li[3]/a')
el.click()#点击事件
time.sleep(1) #让浏览器缓一会
#找到输入框,输入python =》输入回车、点击搜索
web.find_element(By.XPATH,'//*[@id="search_input"]').send_keys("python",Keys.ENTER)
#查找存放数据的位置,进行数据处理 //*[@id="jobList"]/div[1]/div[1]/div[1]/div[2]/div[1]/a
#找到页面中存取的数据的所有的li //*[@id="jobList"]/div[1]/div[1]/div[1]/div[1]/div[2]/span
# li_list =web.find_element(By.XPATH,'//*[@id="jobList"]/div[1]/div[1]/div[1]/div[1]/div[1]/a')
li_list =web.find_elements(By.XPATH,'//*[@id="jobList"]/div[1]/div')
for it in li_list:
name_1 = it.find_element(By.XPATH,'./div[1]/div[1]/div[1]/a').text#ok
# name_2 = it.find_element(By.XPATH, './div[1]/div[2]/div[1]/a').text#ok
# price_1 = it.find_element(By.XPATH, './div[1]/div[1]/div[2]/span').text
# price_2 = it.find_element(By.XPATH, './div[2]/div[1]/a').text
# print(name_1)
# print(name_2)
# print(price_1)
# print(price_2)
3.拉勾网
#得到点击后新窗口的数据
from selenium.webdriver import Chrome
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium import webdriver
import time
web = Chrome()
web.get("https://www.lagou.com/")
el = web.find_element(By.XPATH,'//*[@id="changeCityBox"]/ul/li[3]/a').click()
web.find_element(By.XPATH,'//*[@id="search_input"]').send_keys("python",Keys.ENTER)
time.sleep(1)
web.find_element(By.XPATH,'//*[@id="jobList"]/div[1]/div[1]/div[1]/div[1]/div[1]').click()
time.sleep(1)
#如何进入窗口
#注意开启新窗口默认不跳转selenuim
web.switch_to.window(web.window_handles[-1])#调整视角
#在新窗口提取内容
job_data= web.find_element(By.XPATH,'//*[@id="job_detail"]/dd[2]').text
print(job_data)
#关掉子窗口
web.close()
#变更视角到原来的窗口
web.switch_to.window(web.window_handles[0])#调整视角
爬代码
from selenium.webdriver.common.by import By
from selenium.webdriver import Chrome
from selenium.webdriver.common.keys import Keys
import time
#创建一个浏览器的对象
web = Chrome()
web.get("https://blog.csdn.net/weixin_43918803/article/details/114412133")
#找到某个元素,点击它
# time.sleep(10)
# el1 = web.find_element(By.XPATH,'//*[@id="passportbox"]/span')
# el1.click()
time.sleep(1)
# el = web.find_element(By.XPATH,'/html/body/div[3]/div[1]/main/div[1]/article/div/div[1]/pre[1]/code')
#
# # el.click()#点击事件
# time.sleep(1) #让浏览器缓一会
#找到输入框,输入python =》输入回车、点击搜索
#查找存放数据的位置,进行数据处理 //*[@id="jobList"]/div[1]/div[1]/div[1]/div[2]/div[1]/a
#找到页面中存取的数据的所有的li //*[@id="jobList"]/div[1]/div[1]/div[1]/div[1]/div[2]/span
# li_list =web.find_element(By.XPATH,'//*[@id="jobList"]/div[1]/div[1]/div[1]/div[1]/div[1]/a')
# li_list =web.find_elements(By.XPATH,'//*[@id="jobList"]/div[1]/div').text
# for it in li_list:
# name_1 = it.find_element(By.XPATH,'./div[1]/div[1]/div[1]/a').text#ok
# name_2 = it.find_element(By.XPATH, './div[1]/div[2]/div[1]/a').text#ok
# price_1 = it.find_element(By.XPATH, './div[1]/div[1]/div[2]/span').text
# price_2 = it.find_element(By.XPATH, './div[2]/div[1]/a').text
# print(name_1)
# print(name_2)
# print(price_1)
# print(price_2)
li_list =web.find_elements(By.XPATH,'/html/body/div[3]/div[1]/main/div[1]/article/div/div[1]/pre[1]/code')
for it in li_list:
cod=it.text
print(cod)
浙公网安备 33010602011771号