python进程和线程
进程和线程
进程与线程介绍:
工作最小单元是线程
应用程序 -> 至少有一个进程 -> 至少有一个线程
应用场景:
线程:IO密集型程序
进程:计算密集型程序
GIL:全局解释器锁
保证同一个进程中只有一个线程同时被调度
![]()
进程与线程的区别:
1、进程内存独立,线程共享同一进程的内存
2、进程是资源的集合,线程是执行单位
3、进程之间不能直接互相访问,线程可以互相通信
4、创建新进程非常消耗系统资源(申请进程号、资源池、内存空间....),线程非常轻量,只保存线程需要运行时的必要数据,如上下文,程序堆栈
5、同一进程里的线程可以互相控制,父进程可以控制子进程
线程的基本使用
import threading import time def task(arg): time.sleep(arg) print(arg) for i in range(5): t = threading.Thread(target=task,args=[i,]) # t.setDaemon(True) # 主线程终止,不等待子线程 # t.setDaemon(False) t.start() # t.join() # 一直等,相当于串行 # t.join(1) # 主线程等待最大时间,超时怎停止所有子线程 print('end')
自定义线程类
class MyThread(threading.Thread): def __init__(self,func,*args,**kwargs): super(MyThread,self).__init__(*args,**kwargs) # self._target = 函数 # self._target() self.func = func def run(self): self.func() def task(): time.sleep(1) print(11) obj = MyThread(func=task) obj.start() # 调用start方法时,会执行MyThread类中的run方法
线程中的锁
1、只能有一个人使用锁
import threading import time v = 10 lock = threading.Lock() # 只能开一把 #lock = threading.RLock()# 可以开多把 def task(arg): time.sleep(2) # 申请使用锁,其他人等 lock.acquire() lock.acquire() global v v -= 1 print(v) # 释放 lock.release() lock.release() for i in range(10): t = threading.Thread(target=task,args=(i,)) t.start()
2、多个人同时使用锁
import threading import time v = 10 lock = threading.BoundedSemaphore(3) def task(arg): # 申请使用锁,其他人等 lock.acquire() time.sleep(1) global v v -= 1 print(v) # 释放 lock.release() for i in range(10): t = threading.Thread(target=task,args=(i,)) t.start()
3、所有的解脱锁的限制
import threading import time v = 10 lock = threading.Event() def task(arg): time.sleep(1) # 锁住所有的线程 lock.wait() # 申请使用锁,其他人等 print(arg) for i in range(v): t = threading.Thread(target=task,args=(i,)) t.start() while True: value = input('>>>>') if value == '1': lock.set() lock.clear()
4、肆意妄为
import threading import time v = 10 lock = threading.Condition() def task(arg): time.sleep(1) # 锁住所有的线程 lock.acquire() lock.wait() # 申请使用锁,其他人等 print('线程',arg) lock.release() for i in range(v): t = threading.Thread(target=task,args=(i,)) t.start() while True: value = input('>>>>') lock.acquire() lock.notify(int(value)) lock.release()
线程池
1、直接处理,正常使用连接池并发
from concurrent.futures import ThreadPoolExecutor import requests # 并发发送Http请求,获取结果 def task(url): response = requests.get(url) print('得到结果:',url,len(response.content)) pool = ThreadPoolExecutor(2) url_list = [ 'http://www.oldboyedu.com', 'http://www.autohome.com.cn', 'http://www.baidu.com', ] for url in url_list: print('开始请求',url) # 去连接池中获取链接 pool.submit(task,url)
from concurrent.futures import ThreadPoolExecutor import paramiko def task(host): ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) ssh.connect(hostname=host, port=22, username='wupeiqi', password='123') stdin, stdout, stderr = ssh.exec_command('df') result = stdout.read() ssh.close() print(result) pool = ThreadPoolExecutor(2) host_list = [ 'c1.com', 'c2.com', 'c3.com', ] for host in host_list: print('开始请求',host) # 去连接池中获取链接 pool.submit(task,host)
2、分步处理,使用回调函数的连接池并发
from concurrent.futures import ThreadPoolExecutor import requests def txt(future): download_response = future.result() print('处理中',download_response.url,download_response.status_code) def download(url): response = requests.get(url) return response # response包含了下载的所有内容 pool = ThreadPoolExecutor(2) url_list = [ 'http://www.oldboyedu.com', 'http://www.autohome.com.cn', 'http://www.baidu.com', ] for url in url_list: # 去连接池中获取链接 # 去下载吧 print('开始请求',url) future = pool.submit(download,url) # 下载完成之后,执行txt函数 future.add_done_callback(txt)
进程的基本使用
from multiprocessing import Process import time def task(arg): time.sleep(arg) print(arg) if __name__ == '__main__': for i in range(10): p = Process(target=task,args=(i,)) p.daemon = True # p.daemon = False p.start() p.join(1) print('主进程最后...')
进程的数据共享
线程之间由于使用的是同一块内存,因此数据可以再线程之间共享。但是进程正常来讲是不能数据共享的。不过有两种方式可以实现进程之间的数据共享:
from multiprocessing import Process from threading import Thread # 验证进程之间数据不共享 def task(num,li): li.append(num) print(li) if __name__ == '__main__': v = [] for i in range(10): # p = Process(target=task,args=(i,v,)) p = Thread(target=task,args=(i,v,)) p.start()
1、Array:基于C语言中的数组实现
缺点:需要提前设定好列表的长度和数据类型
from multiprocessing import Process,Array def task(num,li): li[num] = 1 print(list(li)) if __name__ == '__main__': v = Array('i',10) # v[0] for i in range(10): p = Process(target=task,args=(i,v,)) p.start()
2、Manager:通过socket方式实现
from multiprocessing import Process,Manager def task(num,li): li.append(num) print(li) if __name__ == '__main__': v = Manager().list() # v = Manager().dict() for i in range(10): p = Process(target=task,args=(i,v,)) p.start() p.join()
进程池
1、直接处理,正常使用进程池并发
from concurrent.futures import ProcessPoolExecutor def task(arg): print(arg) pool = ProcessPoolExecutor(5) for i in range(10): pool.submit(task,i)
2、分步处理,使用回调函数的进程池并发
from concurrent.futures import ProcessPoolExecutor def call(arg): data = arg.result() print(data) def task(arg): print(arg) return arg + 100 if __name__ == '__main__': pool = ProcessPoolExecutor(5) for i in range(10): obj = pool.submit(task,i) obj.add_done_callback(call)
协程
1、协程永远是一个线程在支持,对线程的一个分片处理,在多个函数中来回跳
from greenlet import greenlet def test1(): print(12) gr2.switch() print(34) gr2.switch() def test2(): print(56) gr1.switch() print(78) gr1 = greenlet(test1) gr2 = greenlet(test2) gr1.switch() 执行顺序: 1、打印12 2、打印56 3、打印34 4、打印78
2、二次加工:
自定义方式
现成的模块 - gevent模块
from gevent import monkey; monkey.patch_all() import gevent import requests def f(url): response = requests.get(url) print(response.url,response.status_code) gevent.joinall([ gevent.spawn(f, 'http://www.oldboyedu.com/'), gevent.spawn(f, 'http://www.baidu.com/'), gevent.spawn(f, 'http://github.com/'), ])
IO多路复用
基于select实现服务端的“伪”并发,同时监听多个socket对象(包括监听端口和从客户端发来的conn),并在内部进行循环操作
另外还有两种方式,poll和epoll。poll也是内部进行循环操作,属于主动方式,epoll是触发式,属于被动方式。
什么是可读状态:服务端等待接收数据的状态
什么是可写状态:服务端发送数据的状态
import socket import select sk1 = socket.socket() sk1.bind(('127.0.0.1',8001,)) sk1.listen(5) sk2 = socket.socket() sk2.bind(('127.0.0.1',8002,)) sk2.listen(5) inputs = [sk1,sk2,] w_inputs = [] while True: # IO多路复用,同时监听多个socket对象 # - select,内部进行循环操作(1024) 主动查看 # - poll, 内部进行循环操作 主动查看 # - epoll, 被动告知 r,w,e = select.select(inputs,w_inputs,inputs,0.05) for obj in r: if obj in [sk1,sk2]: # 新连接捡来了... print('新连接来了:',obj) conn,addr = obj.accept() inputs.append(conn) else: # 有连接用户发送消息来了.. print('有用户发送数据了:',obj) try: data = obj.recv(1024) except Exception as ex: data = "" if data: w_inputs.append(obj) # obj.sendall(data) else: obj.close() inputs.remove(obj) w_inputs.remove(obj) for obj in w: obj.sendall(b'ok') w_inputs.remove(obj)
1、使用IO多路复用和线程实现模拟socketserver并发
def process_request(conn): while True: v = conn.recv(1024) conn.sendall(b'1111') sk1 = socket.socket() sk1.bind(('127.0.0.1',8001,)) sk1.listen(5) inputs=[sk1,] while True: r,w,e = select.select(inputs,[],inputs,0.05) for obj in r: if obj in sk1: # conn客户端的socket conn,addr = obj.accept() t = threading.Thread(target=process_request,args=(conn,)) t.start()
2、自定义异步非阻塞web框架
通过IO多路复用和socket中的setblocking实现
import select import socket class MyWeb(object): def __init__(self,routers): self.routers = routers def process_data(self,conn): data = bytes() while True: try: chunk = conn.recv(1024) except Exception as e: chunk = None if not chunk: break data = data + chunk if data: data_str = str(data, encoding='utf-8') header, body = data_str.split('\r\n\r\n', 1) header_list = header.split('\r\n') header_dict = {} for line in header_list: value = line.split(':', 1) if len(value) == 2: k, v = value header_dict[k] = v else: header_dict['method'], header_dict['url'], header_dict['protcol'] = line.split(' ') return header_dict, body else: return None, None def run(self,host='127.0.0.1',port=8888): sock = socket.socket() # sock.setblocking(False) sock.bind((host,port,)) sock.listen(5) # while True: # conn,addr = sock.accept() # 不在hang主,直接报错 # conn.setblocking(False) # conn.recv(100) # 不在hang主,直接报错 inputs = [sock,] while True: # [1,] rList,wList,eList = select.select(inputs,[],[],0.5) for client in rList: # 建立连接 if client == sock: conn,addr = client.accept() conn.setblocking(False) inputs.append(conn) else: header_dict, body = self.process_data(client) # client.recv(1024) if not header_dict: continue request_url = header_dict['url'] # routers = [ # ('/index.html', f1), # ('/login.html', f2) # ] func_name = None for item in self.routers: if item[0] == request_url: func_name = item[1] break if not func_name: client.sendall(b'404') else: result = func_name(header_dict,body) client.sendall(result.encode('utf-8')) inputs.remove(client) client.close() def f1(header_dict,body): # ... Http... # ..... return 'ffffff' def f2(header_dict,body): return 'aaaaaaaaaa' routers = [ ('/index.html',f1), ('/login.html',f2) ] obj = MyWeb(routers) obj.run()
3、模拟爬虫
利用一个线程,同时发送n个请求(异步非阻塞模块)
a. 循环列表,为每一个URL生成Socket对象
b. 每一个socket对象,向远程发送链接请求
connect: 阻塞
c. 如果连接:
发送数据:遵循格式
d. 获取响应内容
e. 关闭
fileno:文件描述符
import socket import select class Foo(object): def __init__(self,sock,callback,url,host): self.sock = sock self.callback = callback self.url = url self.host = host # 为了能让客户端传进来的函数f1、f2被sock对象调用 def fileno(self): return self.sock.fileno() class NbIO(object): def __init__(self): self.fds = [] self.connections = [] def connect(self,url_list): for item in url_list: conn = socket.socket() conn.setblocking(False) # 1. 发送链接请求 try: conn.connect((item['host'],80)) except BlockingIOError as e: pass obj = Foo(conn,item['callback'],item['url'],item['host']) self.fds.append(obj) self.connections.append(obj) def send(self): while True: # wList,有对象;当前socket已经创建链接 try: if len(self.fds) == 0: return rList,wList,eList = select.select(self.fds,self.connections,[],0.5) # 【1,11】 for obj in rList: # 4.有数据响应回来了 conn = obj.sock data = bytes() while True: try: d = conn.recv(1024) data = data + d except BlockingIOError as e: d = None if not d: break # print(data) obj.callback(data) # 自定义操作 f1 f2 self.fds.remove(obj) # print(len(self.fds),len(self.connections)) # 执行当前请求 函数:f1 f2 # 【1,2,3,】 for obj in wList: # 2.已经连接上远程 conn = obj.sock # 3. 发送数据 # HTTP/1.0\r\nHost: %s\r\n\r\n template = "GET %s HTTP/1.1\r\nHost: %s\r\n\r\n" %(obj.url,obj.host,) # template = "POST %s HTTP/1.1\r\nHost: 127.0.0.1:8888\r\n\r\nk1=v1&k2=v2" %(obj.url,) conn.sendall(template.encode('utf-8')) self.connections.remove(obj) except OSError as e: pass def f1(data): print(data) def f2(data): print(data) url_list = [ {'host': "www.baidu.com", 'url': '/', 'callback':f1 }, # socket {'host': "www.cnblogs.com", 'url': '/wupeiqi', 'callback':f1 }, {'host': "www.oldboyedu.com", 'url': '/', 'callback':f1 }, ] obj = helei_new.NbIO() obj.connect(url_list) obj.send()
实现线程的两种方法
1、线程函数
import threading import time def sayhi(num): # 定义每个线程要运行的函数 print("running on number:%s" % num) time.sleep(3) if __name__ == '__main__': t1 = threading.Thread(target=sayhi, args=(1,)) # 生成一个线程实例 t2 = threading.Thread(target=sayhi, args=(2,)) # 生成另一个线程实例 t1.start() # 启动线程 t2.start() # 启动另一个线程 print(t1.getName()) # 获取线程名 print(t2.getName())
2、线程类
import threading import time class MyThread(threading.Thread): def __init__(self, num): threading.Thread.__init__(self) self.num = num def run(self): # 定义每个线程要运行的函数 print("running on number:%s" % self.num) time.sleep(3) if __name__ == '__main__': t1 = MyThread(1) t2 = MyThread(2) t1.start() t2.start()
3、多线程(等待子线程执行完,再执行主线程)
import threading import time def sayhi(num): # 定义每个线程要运行的函数 time.sleep(3) print("running on number:%s" % num) if __name__ == '__main__': thread_list = [] for i in range(10): t = threading.Thread(target=sayhi, args=(i,)) t.start() thread_list.append(t) for r in thread_list: r.join() # 相当于shell里面的wait print('-----------主线程----------')

浙公网安备 33010602011771号