python爬虫边看边学(多线程多进程协程)

进程是资源单位,线程是执行单位,每一个进程至少需要一个线程

一、多线程

       1、单线程

def fun():
    for i in range(1000):
        print("fun",i)

if __name__ == '__main__':
    
    for i in range(1000):
        print("main",i)

  2、多线程

from threading import Thread  # 线程类


def fun():
    for i in range(1000):
        print("fun", i)


if __name__ == '__main__':
    t = Thread(target=fun)
    t.start()  # 多线程状态为可以开始工作,具体的执行时间由cpu决定

    for i in range(1000):
        print("main", i)

       3、多线程传参

from threading import Thread  # 线程类

   def func(name):

    for i in range(1000):
        print(name, i)


if __name__ == '__main__':
    t = Thread(target=func, args=("第一子线程",))
    t.start()  # 多线程状态为可以开始工作,具体的执行时间由cpu决定

    t1 = Thread(target=func, args=("第二子线程",))
    t1.start()  # 多线程状态为可以开始工作,具体的执行时间由cpu决定

  4、多线程(类实现)

from threading import Thread  # 线程模块

class MyThead(Thread): def run(self): # 当线程被执行之后,默认执行的就是run() for i in range(10000): print('子线程', i) if __name__ == '__main__': t = MyThead() t.start() # 开启子线程 for i in range(10000): # 主线程 print('主线程', i)

  

 二、多进程

from multiprocessing import Process


def func():
    for i in range(100000):
        print('子进程', i)


if __name__ == '__main__':
    p = Process(target=func)
    p.start()
    for i in range(100000):
        print('主进程', i)

  

三、线程池和进程池

       一次性开辟一些线程,用户直接给线程池提交任务,线程任务的调度交给线程池完成。

from concurrent.futures import ThreadPoolExecutor,ProcessPoolExecutor


def fn(name):
    for i in range(1000):
        print(name,i)


if __name__ == '__main__':
    with ThreadPoolExecutor(50) as t:
        for i in range(100):
            t.submit(fn, name=f"线程{i}")
    print('123')

  

四、线程池应用于新发地批发市场

import requests
from lxml import etree
import csv
from concurrent.futures import ThreadPoolExecutor

f = open('data.csv', "w", encoding='utf-8', newline='')
csvwriter = csv.writer(f)


def down_one_page(url):
    resp = requests.get(url)
    resp.encoding = 'utf-8'
    html = etree.HTML(resp.text)
    table = html.xpath('/html/body/div[2]/div[4]/div[1]/table')[0]
    trs = table.xpath('./tr')[1:]
    # trs=table.xpath('./tr[position()>1]')
    for tr in trs:
        td = tr.xpath('./td/text()')
        td = (item.replace("\\", "").replace("/", "") for item in td)
        csvwriter.writerow(td)
        # print(list(td))


if __name__ == '__main__':
    with ThreadPoolExecutor(50) as t:
        for i in range(1, 200):
            t.submit(down_one_page, f"http://www.xinfadi.com.cn/marketanalysis/0/list/{i}.shtml")
f.close()

  

 五、协程

       当程序遇见了IO操作的时候,可以选择性的切换到其他任务上。

import time
import asyncio


async def func1():
    print('@@@@@@')
    await asyncio.sleep(3)
    print('@@@@@@@@')


async def func2():
    print('###########')
    await asyncio.sleep(5)
    print('##########')


async def fun3():
    print('*******')
    await asyncio.sleep(2)
    print('************')


async def main():
    l = [func1(), func2(), fun3()]
    await asyncio.wait(l)


if __name__ == '__main__':
    t1 = time.time()
    asyncio.run(main())
    t2 = time.time()
    print(t2 - t1)

  

posted @ 2021-04-06 10:22  wangshanglinju  阅读(81)  评论(0编辑  收藏  举报