python爬虫边看边学(多线程多进程协程)
进程是资源单位,线程是执行单位,每一个进程至少需要一个线程
一、多线程
1、单线程
def fun():
for i in range(1000):
print("fun",i)
if __name__ == '__main__':
for i in range(1000):
print("main",i)
2、多线程
from threading import Thread # 线程类
def fun():
for i in range(1000):
print("fun", i)
if __name__ == '__main__':
t = Thread(target=fun)
t.start() # 多线程状态为可以开始工作,具体的执行时间由cpu决定
for i in range(1000):
print("main", i)
3、多线程传参
from threading import Thread # 线程类
def func(name):
for i in range(1000):
print(name, i)
if __name__ == '__main__':
t = Thread(target=func, args=("第一子线程",))
t.start() # 多线程状态为可以开始工作,具体的执行时间由cpu决定
t1 = Thread(target=func, args=("第二子线程",))
t1.start() # 多线程状态为可以开始工作,具体的执行时间由cpu决定
4、多线程(类实现)
from threading import Thread # 线程模块
class MyThead(Thread): def run(self): # 当线程被执行之后,默认执行的就是run() for i in range(10000): print('子线程', i) if __name__ == '__main__': t = MyThead() t.start() # 开启子线程 for i in range(10000): # 主线程 print('主线程', i)
二、多进程
from multiprocessing import Process
def func():
for i in range(100000):
print('子进程', i)
if __name__ == '__main__':
p = Process(target=func)
p.start()
for i in range(100000):
print('主进程', i)
三、线程池和进程池
一次性开辟一些线程,用户直接给线程池提交任务,线程任务的调度交给线程池完成。
from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor
def fn(name):
for i in range(1000):
print(name,i)
if __name__ == '__main__':
with ThreadPoolExecutor(50) as t:
for i in range(100):
t.submit(fn, name=f"线程{i}")
print('123')
四、线程池应用于新发地批发市场
import requests
from lxml import etree
import csv
from concurrent.futures import ThreadPoolExecutor
f = open('data.csv', "w", encoding='utf-8', newline='')
csvwriter = csv.writer(f)
def down_one_page(url):
resp = requests.get(url)
resp.encoding = 'utf-8'
html = etree.HTML(resp.text)
table = html.xpath('/html/body/div[2]/div[4]/div[1]/table')[0]
trs = table.xpath('./tr')[1:]
# trs=table.xpath('./tr[position()>1]')
for tr in trs:
td = tr.xpath('./td/text()')
td = (item.replace("\\", "").replace("/", "") for item in td)
csvwriter.writerow(td)
# print(list(td))
if __name__ == '__main__':
with ThreadPoolExecutor(50) as t:
for i in range(1, 200):
t.submit(down_one_page, f"http://www.xinfadi.com.cn/marketanalysis/0/list/{i}.shtml")
f.close()
五、协程
当程序遇见了IO操作的时候,可以选择性的切换到其他任务上。
import time
import asyncio
async def func1():
print('@@@@@@')
await asyncio.sleep(3)
print('@@@@@@@@')
async def func2():
print('###########')
await asyncio.sleep(5)
print('##########')
async def fun3():
print('*******')
await asyncio.sleep(2)
print('************')
async def main():
l = [func1(), func2(), fun3()]
await asyncio.wait(l)
if __name__ == '__main__':
t1 = time.time()
asyncio.run(main())
t2 = time.time()
print(t2 - t1)

浙公网安备 33010602011771号