自定义异步非阻塞web框架
Python的Web框架中Tornado以异步非阻塞而闻名。本文基于非阻塞的Socket以及IO多路复用从而实现异步非阻塞的Web框架
一、异步非阻塞和io多路复用
出现的原因:
由于进程的执行过程是线性的(也就是顺序执行),当我们调用低速系统I/O(read,write, accept等等),进程可能阻塞,此时进程就阻塞 在这个调用上,不能执行其他操作.阻塞很正常。接下来考虑这么一个问题: 一个服务器进程和一个客户端进程通信,服务器端read(sockfd1,bud,bufsize),此时客户端进程没有发送数据,那么read(阻塞调用)将 阻塞直到客户端调用write(sockfd,but,size) 发来数据. 在一个客户和服务器通信时这没 什么问题,当多个客户与服务器通信时,若服 务器阻塞于其中一个客户sockfd1,当另一个 客户的数据到达套接字sockfd2时,服务器不能处理,仍然阻塞在read(sockfd1,...)上;此时问题就出现了,不能及时处理另一个客户的 服务,咋么办?I/O多路复用来解决!
io多路复用:
继续上面的问题,有多个客户连接, sockfd1,sockfd2,sockfd3..sockfdn 同时监听这n个客户,当其中有一个发来消息时就从select的阻塞中返回,然后就调用read读取收到消息的sockfd,然后又循环回select 阻塞; 这样就不会因为阻塞在其中一个上而不能处理另一个客户的消息 原理:(socket 设置socket对象点setblocking(False)+select)
Q: 那这样子,在读取socket1的数据时,如果其它socket有数据来,那么也要等到socket1读取完了才能继续读取其它socket的数据吧。那不是也阻塞住了吗?而且读取到的数据也要开启线程处理吧,那这和多线程IO有什么区别呢?
A:
- CPU本来就是线性的 不论什么都需要顺序处理 并行只能是多核CPU
- io多路复用本来就是用来解决对多个I/O监听时,一个I/O阻塞影响其他I/O的问题,跟多线程没关系.
- 跟多线程相比较,线程切换需要切换到内核进行线程切换,需要消耗时间和资源. 而I/O多路复用不需要切换线/进程,效率相对较高,特别是对高并发的应用nginx就是用I/O多路复用,故而性能极佳.但多线程编程逻辑和处理上比I/O多路复用简单.而I/O多路复用处理起来较为复杂.
二、实现流程
1、sleep异步非阻塞
import tornado.ioloop
import tornado.web
class MainHandler(tornado.web.RequestHandler):
def get(self):
import time
time.sleep(10)
self.write("Hello, world")
class IndexHandler(tornado.web.RequestHandler):
def get(self):
self.write("Index")
application = tornado.web.Application([
(r"/main", MainHandler),
(r"/index", IndexHandler),
])
if __name__ == "__main__":
application.listen(8888)
tornado.ioloop.IOLoop.instance().start()
import tornado.ioloop
import tornado.web
from tornado import gen
from tornado.concurrent import Future
import time
class MainHandler(tornado.web.RequestHandler):
@gen.coroutine
def get(self):
future = Future() #Future对象 记住这个很重要,后面主要靠他实现异步非阻塞
# 特殊的形式等待5s
tornado.ioloop.IOLoop.current().add_timeout(time.time() + 5, self.done) # 5秒之后回调函数done
yield future
def done(self, *args, **kwargs):
self.write('Main')
self.finish()
class IndexHandler(tornado.web.RequestHandler):
def get(self):
self.write("Index")
application = tornado.web.Application([
(r"/main", MainHandler),
(r"/index", IndexHandler),
])
if __name__ == "__main__":
application.listen(8888)
tornado.ioloop.IOLoop.instance().start()
2、requests请求异步非阻塞
import tornado.ioloop
import tornado.web
class MainHandler(tornado.web.RequestHandler):
def get(self):
import requests
requests.get('http://www.google.com')
self.write('xxxxx')
class IndexHandler(tornado.web.RequestHandler):
def get(self):
self.write("Index")
application = tornado.web.Application([
(r"/main", MainHandler),
(r"/index", IndexHandler),
])
if __name__ == "__main__":
application.listen(8888)
tornado.ioloop.IOLoop.instance().start()
import tornado.ioloop
import tornado.web
from tornado import gen
class MainHandler(tornado.web.RequestHandler):
@gen.coroutine
def get(self):
from tornado import httpclient
http = httpclient.AsyncHTTPClient()
yield http.fetch("http://www.google.com", self.done)
def done(self, *args, **kwargs):
self.write('Main')
self.finish()
class IndexHandler(tornado.web.RequestHandler):
def get(self):
self.write("Index")
application = tornado.web.Application([
(r"/main", MainHandler),
(r"/index", IndexHandler),
])
if __name__ == "__main__":
application.listen(8888)
tornado.ioloop.IOLoop.instance().start()
3、future异步非阻塞
import tornado.ioloop
import tornado.web
from tornado import gen
from tornado.concurrent import Future
future = None
class MainHandler(tornado.web.RequestHandler):
@gen.coroutine
def get(self):
global future
future = Future()
future.add_done_callback(self.done)
yield future
def done(self, *args, **kwargs):
self.write('Main')
self.finish()
class IndexHandler(tornado.web.RequestHandler):
def get(self):
global future
future.set_result(None) # 不设置值会一直请求下去,直到地老天荒
self.write("Index")
application = tornado.web.Application([
(r"/main", MainHandler),
(r"/index", IndexHandler),
])
if __name__ == "__main__":
application.listen(8888)
tornado.ioloop.IOLoop.instance().start()
future = Future()
原理:返回future 看看future的result里面有没有值 (有值就断开返回回去)
4、自定义服务端web框架(不支持异步)
import socket
import select
class HttpRequest(object):
"""
用户封装用户请求信息
"""
def __init__(self, content):
"""
:param content:用户发送的请求数据:请求头和请求体
"""
self.content = content
self.header_bytes = bytes()
self.body_bytes = bytes()
self.header_dict = {}
self.method = ""
self.url = ""
self.protocol = ""
self.initialize()
self.initialize_headers()
def initialize(self):
temp = self.content.split(b'\r\n\r\n', 1)
if len(temp) == 1:
self.header_bytes += temp
else:
h, b = temp
self.header_bytes += h
self.body_bytes += b
@property
def header_str(self):
return str(self.header_bytes, encoding='utf-8')
def initialize_headers(self):
headers = self.header_str.split('\r\n')
first_line = headers[0].split(' ')
if len(first_line) == 3:
self.method, self.url, self.protocol = headers[0].split(' ')
for line in headers:
kv = line.split(':')
if len(kv) == 2:
k, v = kv
self.header_dict[k] = v
def main(request):
return "main"
def index(request):
return "index"
routers = [
('/main/',main),
('/index/',index),
]
def run():
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.bind(("127.0.0.1", 9999,))
sock.setblocking(False)
sock.listen(128)
inputs = []
inputs.append(sock)
while True:
rlist,wlist,elist = select.select(inputs,[],[],0.05)
for r in rlist:
if r == sock:
"""新请求到来"""
conn,addr = sock.accept()
conn.setblocking(False)
inputs.append(conn)
else:
"""客户端发来数据"""
data = b""
while True:
try:
chunk = r.recv(1024)
data = data + chunk
except Exception as e:
chunk = None
if not chunk:
break # 没数据终止接受
# data进行处理:请求头和请求体
request = HttpRequest(data)
# 1. 请求头中获取url
# 2. 去路由中匹配,获取指定的函数
# 3. 执行函数,获取返回值
# 4. 将返回值 r.sendall(b'alskdjalksdjf;asfd')
import re
flag = False
func = None
for route in routers:
if re.match(route[0],request.url):
flag = True
func = route[1]
break
if flag: # 如果有这个路由调用函数执行
result = func(request)
r.sendall(bytes(result,encoding='utf-8'))
else:
r.sendall(b"404")
inputs.remove(r)
r.close()
if __name__ == '__main__':
run()
5、支持异步非阻塞的web框架:
import socket
import select
import time
class HttpRequest(object):
"""
用户封装用户请求信息
"""
def __init__(self, content):
"""
:param content:用户发送的请求数据:请求头和请求体
"""
self.content = content
self.header_bytes = bytes()
self.body_bytes = bytes()
self.header_dict = {}
self.method = ""
self.url = ""
self.protocol = ""
self.initialize()
self.initialize_headers()
def initialize(self):
temp = self.content.split(b'\r\n\r\n', 1)
if len(temp) == 1:
self.header_bytes += temp
else:
h, b = temp
self.header_bytes += h
self.body_bytes += b
@property
def header_str(self):
return str(self.header_bytes, encoding='utf-8')
def initialize_headers(self):
headers = self.header_str.split('\r\n')
first_line = headers[0].split(' ')
if len(first_line) == 3:
self.method, self.url, self.protocol = headers[0].split(' ')
for line in headers:
kv = line.split(':')
if len(kv) == 2:
k, v = kv
self.header_dict[k] = v
class Future(object):
def __init__(self,timeout=0):
self.result = None
self.timeout = timeout
self.start = time.time()
def main(request):
f = Future(5)
return f
def index(request):
return "indexasdfasdfasdf"
routers = [
('/main/',main),
('/index/',index),
]
def run():
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
sock.bind(("127.0.0.1", 8080,))
sock.setblocking(False)
sock.listen(128)
inputs = []
inputs.append(sock)
async_request_dict = {
# 'socket': futrue
}
while True:
rlist,wlist,elist = select.select(inputs,[],[],0.05)
for r in rlist:
if r == sock:
"""新请求到来"""
conn,addr = sock.accept()
conn.setblocking(False)
inputs.append(conn)
else:
"""客户端发来数据"""
data = b""
while True:
try:
chunk = r.recv(1024)
data = data + chunk
except Exception as e:
chunk = None
if not chunk:
break
# data进行处理:请求头和请求体
request = HttpRequest(data)
# 1. 请求头中获取url
# 2. 去路由中匹配,获取指定的函数
# 3. 执行函数,获取返回值
# 4. 将返回值 r.sendall(b'alskdjalksdjf;asfd')
import re
flag = False
func = None
for route in routers:
if re.match(route[0],request.url):
flag = True
func = route[1]
break
if flag:
result = func(request)
if isinstance(result,Future):
async_request_dict[r] = result
else:
r.sendall(bytes(result,encoding='utf-8'))
inputs.remove(r)
r.close()
else:
r.sendall(b"404")
inputs.remove(r)
r.close()
for conn in async_request_dict.keys():
future = async_request_dict[conn]
start = future.start
timeout = future.timeout
ctime = time.time()
if (start + timeout) <= ctime :
future.result = b"timeout"
if future.result:
conn.sendall(future.result)
conn.close()
del async_request_dict[conn]
inputs.remove(conn)
if __name__ == '__main__':
run()
想看完整且详细用法请参考:200行自定义异步非阻塞Web框架

浙公网安备 33010602011771号