1 #!/usr/bin/env python
2 # -*- coding:utf-8 -*-
3
4 # requests -> urllib -> socket
5 import socket
6 from urllib.parse import urlparse
7
8
9 def get_url(url):
10 # 通过socket请求html
11 url = urlparse(url)
12 host = url.netloc
13 path = url.path
14 if path == '':
15 path = '/'
16
17 # 建立连接
18 client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
19 client.connect((host, 80))
20
21 client.send("GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(path, host).encode('utf-8'))
22
23 data = b""
24 while True:
25 d = client.recv(1024)
26 if d:
27 data += d
28 else:
29 break
30
31 data = data.decode('utf-8')
32 html_data = data.split('\r\n\r\n')[1]
33 print(data)
34 print(html_data)
35 client.close()
36
37
38 if __name__ == '__main__':
39 get_url('http://www.baidu.com')