Python开发Http代理服务器 - socketref,呆在autonavi.com - C++博客
之前开发酒店广告投放系统编写的Http代理服务程序,功能实现广告插播进Html DOM结构内。一般都是DIV被插入,当然包括script都是可以从数据库中动态获得。
简单修改之后当做Http代理服务器程序,在浏览器中设置Http转发程序的Ip即可,只要代理程序的机器能上网,客户机便能上网(其中涉及Page gzip的工作有点麻烦)1 # -*- coding:utf-8 -*-
2 # http代理服务器
3 # 1.ip限制,mac限制
4 #
5 # socketref@hotmail.com
6 # www.sw2us.com
7
8 "exec" "python" "-O" "$0" "$@"
9
10 __doc__ = """sw2us HTTP Proxy.
11
12 """
13
14 __version__ = "0.2.1"
15
16 import BaseHTTPServer, select, socket, SocketServer, urlparse
17 import httplib,traceback,re
18 import os,sys,re,mimetools,zlib,StringIO,gzip,time,StringIO
19
20
21 class ConfigProperty:
22 def __init__(self,owner):
23 self.key=''
24 self.value=''
25
26 def create(self,text):
27 #text - key=value
28 #@return: boolean
29 pos = text.find('#')
30 if(pos !=-1):
31 text = text[:pos]
32 pair = text.split('=')
33 if len(pair) !=2:
34 #print "Property Line Invalid:%s"%(text)
35 return False
36 k = pair[0].strip()
37 v = pair[1].strip()
38 self.key = k
39 self.value = v
40
41 return True
42
43 def toString(self):
44 s =''
45 try:
46 s = "%s=%s"%(self.key,self.value)
47 except:
48 return ''
49 return s
50
51 def toInt(self):
52 r=0
53 try:
54 r = int(self.value)
55 except:
56 r =0
57 return r
58
59 def toFloat(self):
60 r=0.0
61 try:
62 r = float(self.value)
63 except:
64 r=0.0
65 return r
66
67
68 #@def SimpleConfig
69 # 简单配置信息文件,基本格式 : key=value
70 class SimpleConfig:
71 def __init__(self):
72 self._file=''
73 self._props=[]
74 self._strip = True
75
76 def open(self,file,strip=True):
77 #打开配置文件
78 #@param strip - 是否裁剪不可见首尾两端的字符
79 try:
80 self._strip = strip
81 self._props=[]
82 fh = open(file,'r')
83 lines = fh.readlines()
84 for text in lines:
85 prop = ConfigProperty(self)
86 if prop.create(text) == False:
87 prop = None
88 else:
89 self._props.append(prop)
90 fh.close()
91 except:
92 return False
93 return True
94
95 def toString(self):
96 s=''
97 for p in self._props:
98 s = s + p.toString() +"\n"
99 return s
100
101 def saveAs(self,file):
102 #保存配置信息到文件
103 try:
104 fh = open(file,'w')
105 fh.write(toString())
106 fh.close()
107 except:
108 print "write File Failed!"
109 return False
110 return True
111
112 def getProperty(self,name):
113 #取属性值
114 prop=None
115 try:
116 for p in self._props:
117 if p.key == name:
118 prop = p
119 break
120 except:
121 pass
122
123 return prop
124
125 def getPropertyValue(self,key,default=''):
126 prop = self.getProperty(key)
127 if not prop:
128 return default
129 return prop.value
130
131 def getPropertyValueAsInt(self,name,default=0):
132 prop = self.getPropertyValue(name)
133
134 if not prop:
135 return default
136 r=default
137 try:
138 r = int(prop)
139 except:pass
140 return r
141
142 def getPropertyValueAsFloat(self,name,default=0.0):
143 prop = self.getPropertyValue(name)
144 if not prop:
145 return default
146 r = default
147 try:
148 r = float(r)
149 except:pass
150 return r
151
152
153 #===========================================#
154
155
156 #===========================================#
157
158 def getMacList():
159 maclist=[]
160 f = os.popen('arp -a','r')
161 while True:
162 line = f.readline()
163 if not line:
164 break
165 line = line.strip()
166 rst = re.match('^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s+([0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}\-[0-9a-fA-F]{1,2}).*',line)
167 #rst = re.match('^(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})',line)
168 if rst:
169 #print rst.groups()
170 maclist.append(rst.groups())
171 #print maclist
172 return maclist
173
174
175
176 ##########################################
177 confile = SimpleConfig()
178 confile.open('proxy.conf')
179 dbconn = None
180
181 ##########################################
182 #初始化系统配置
183 def initConfiguration():
184 r = True
185
186 return r
187
188 ##########################################
189
190 class ProxyHandler (BaseHTTPServer.BaseHTTPRequestHandler):
191 __base = BaseHTTPServer.BaseHTTPRequestHandler
192 __base_handle = __base.handle
193 server_version = "TinyHTTPProxy/" + __version__
194 rbufsize = 0 # self.rfile Be unbuffered
195
196
197 #######################################################33
198
199 #handle()是在单独线程中执行
200 def handle(self): # 调用入口,线程刚进入,携带socket进入
201 print 'client incoming'
202 #self.__base_handle()
203 #return
204 (ip, port) = self.client_address
205 if hasattr(self, 'allowed_clients') and ip not in self.allowed_clients:
206 self.raw_requestline = self.rfile.readline()
207 if self.parse_request():
208 self.send_error(403)
209 else:
210 self.__base_handle()
211
212 def _connect_to(self, netloc, soc):
213 i = netloc.find(':')
214 if i >= 0:
215 host_port = netloc[:i], int(netloc[i+1:])
216 else:
217 host_port = netloc, 80
218 #print "\t" "connect to %s:%d" % host_port
219 try: soc.connect(host_port)
220 except socket.error, arg:
221 try: msg = arg[1]
222 except: msg = arg
223 self.send_error(404, msg)
224 return 0
225 return 1
226
227 def do_CONNECT(self):
228 soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
229 try:
230 if self._connect_to(self.path, soc):
231 self.log_request(200)
232 self.wfile.write(self.protocol_version +
233 " 200 Connection established\r\n")
234 self.wfile.write("Proxy-agent: %s\r\n" % self.version_string())
235 self.wfile.write("\r\n")
236 self._read_write(soc, 300)
237 finally:
238 print "\t" "bye"
239 soc.close()
240 self.connection.close()
241
242
243 def do_GET(self):
244 (scm, netloc, path, params, query, fragment) = urlparse.urlparse(
245 self.path, 'http')
246 piars = (scm, netloc, path, params, query, fragment)
247 if not netloc:
248 netloc = self.headers.get('Host', "")
249 #print ">>requester:",self.connection.getpeername(),"path:",self.path
250 #print '>>2. ',(scm, netloc, path, params, query, fragment)
251 #print 'next host:',netloc
252 if scm != 'http' or fragment or not netloc:
253 self.send_error(400, "bad url %s" % self.path)
254 return
255 soc = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
256 try:
257 if self._connect_to(netloc, soc):
258 self.log_request()
259 soc.send("%s %s %s\r\n" % (
260 self.command,
261 urlparse.urlunparse(('', '', path, params, query, '')),
262 self.request_version))
263 self.headers['Connection'] = 'close'
264 del self.headers['Proxy-Connection']
265 for key_val in self.headers.items():
266 soc.send("%s: %s\r\n" % key_val)
267 soc.send("\r\n")
268 #到此完成发送请求和头部信息
269 self._read_write(soc)
270 finally:
271 print "\t" "bye"
272 soc.close()
273 self.connection.close()
274
275
276
277 def insertTags(self,tag,body,insert):
278 p1 = body.find('<%s'%tag)
279 if p1!=-1 :
280 p2 = body.find('>',p1)
281 if p2!=-1:
282 part1 = body[:p2+1]
283 part2 = body[p2+1:]
284 print '*-'*20
285 body = part1 + insert + part2
286 return body
287
288 # google页面的数据请求时,返回的数据进行的是gzip压缩,所以过滤文本存在问题,先要解压缩之后才可以
289 # 插入数据之后要重新计算 content-length 并返回给客户浏览器
290 # 发现压缩的有很多 , content-encoding:gzip
291
292 # 处理 'transfer-encoding': 'chunked'类型
293 #gzip 有两种存储,一种是直接gzip压缩的数据跟在header之后;另外一种是采用chunck块存储
294 #在这里将gzip数据全部解压,还原成原始数据传出到客户端
295 def sendBackResponse(self,command,headers,body):
296
297 insert='<h1>This is Test </h1>'
298 if headers.has_key('content-encoding') and headers['content-encoding'].strip().lower()=='gzip':
299 try:
300 del headers['content-encoding']
301 gzipdata=''
302 if headers.has_key('transfer-encoding') and headers['transfer-encoding']=='chunked':
303 del headers['transfer-encoding']
304
305 pos = 0
306 while pos < len(body):
307 p = body.find('\x0d\x0a',pos)
308 sizewidth = p-pos
309
310 chuncksize = int(body[pos:p],16)
311 #print 'chunck size:',body[pos:p]
312 p +=2
313 gzipdata+=body[p:p+chuncksize]
314 pos= p+chuncksize+2
315 if chuncksize ==0 :
316 break
317 #
318 body = gzipdata
319
320 #
321
322 #ss = zlib.decompress(gzipdata)
323 compressedstream = StringIO.StringIO(body)
324 gzipper = gzip.GzipFile(fileobj=compressedstream)
325 if gzipper == None:
326 print '*'*200
327 body = gzipper.read()
328 #f = open('body%s.txt'%time.time(),'wb')
329 #f.write(body)
330 #f.close()
331
332
333 #body = gzipdata
334 except:
335 print traceback.print_exc()
336 print 'decompress failed!'
337 #pos = body.find('\x0d\x0a')
338 #pos = body.find('\x1f\x8b\x08\x00\x00\x00\x00\x00\x02\xff')
339 #if pos!=-1:
340 # body = body[pos+9:]
341 #
342 #compressedstream = StringIO.StringIO(body)
343 #gzipper = gzip.GzipFile(fileobj=compressedstream)
344 #if gzipper == None:
345 # print '*'*200
346 #body = gzipper.read()
347
348 #body = zlib.decompressobj().decompress('x\x9c'+body)
349
350 #m = re.search('(<body.*>)',body,re.I)
351 #if m:
352 # pos = m.start(0)
353 # part1 = body[:pos+len(m.group(0))]
354 # part2 = body[pos+len(m.group(0)):]
355 # body = part1 + insert + part2
356 # print '-*'*20,insert,'-*'*20
357
358 #self.insertTags('body',body,insert)
359
360 css=""" <style>
361 #kk{
362 border:1px dotted red;
363 width:200px;
364 height:300px;
365 float:left;
366 background:#0x00ff00;
367 }
368 </style>
369 """
370 #body =self.insertTags('head',body,css)
371
372 #body =self.insertTags('body',body,insert)
373 div="""
374 <div id="kk">
375 This is Test DIV Block!!
376 </div>
377 """
378
379 #read external html tags
380 try:
381 #ff = open('head.tag','r')
382 #div = ff.read()
383 #ff.close()
384 #body =self.insertTags('head',body,div)
385 body = self.publish_advertisement(body) #插入配置的广告信息
386 except:
387 pass
388
389 #p1 = body.find('<body')
390 #if p1!=-1 :
391 # p2 = body.find('>',p1)
392 # if p2!=-1:
393 # part1 = body[:p2+1]
394 # part2 = body[p2+1:]
395 # print '*-'*20
396 # body = part1 + insert + part2
397 #print m.group(0)
398 headers['Content-Length'] = str(len(body))
399
400 #if headers.has_key('content-length'):
401
402 self.connection.send(command)
403 self.connection.send('\r\n')
404 for k,v in headers.items():
405 self.connection.send("%s: %s\r\n"%(k,v))
406 self.connection.send("\r\n")
407 self.connection.sendall(body)
408
409
410
411 #----------------------------------------------------
412
413 def _read_write(self, soc, max_idling=20):
414 #getMacList()
415 iw = [self.connection, soc] # self.connnection - 内网主机连接,soc - 向外连接
416 ow = []
417 count = 0
418 #respfile = soc.makefile('rb', 1024)
419 httpCommand=''
420 httpBody=''
421 httpHeaders={}
422 isOkPageResponse=False
423 nextReadBytes=0
424 datacnt=0
425 NoContentLength = False
426 #print self.connection.getpeername()
427 while 1:
428 count += 1
429 datacnt+=1
430 (ins, _, exs) = select.select(iw, ow, iw, 3)
431 if exs:
432 print 'error occr!'
433 break #异常产生
434 if ins:
435 for i in ins:
436 if i is soc:
437 out = self.connection
438 else:
439 out = soc
440
441 data = i.recv(8192)
442 if data:
443 out.send(data)
444 count = 0
445 else:
446 if not isOkPageResponse:
447 return
448 else:
449 pass #print "\t" "idle", count
450 if count == max_idling:
451 print 'idling exit'
452 break # 指定时间内都接收不到双向数据便退出循环 20*3 = 60 secs
453
454
455 do_HEAD = do_GET
456 do_POST = do_GET
457 do_PUT = do_GET
458 do_DELETE=do_GET
459
460 class ThreadingHTTPServer (SocketServer.ThreadingMixIn,
461 BaseHTTPServer.HTTPServer): pass
462
463
464
465
466 def serving(HandlerClass,
467 ServerClass, protocol="HTTP/1.0"):
468
469 if len(sys.argv) <2 or sys.argv[1]!='www.sw2us.com':
470 sys.exit()
471
472 if sys.argv[2:]:
473 port = int(sys.argv[2])
474 else:
475
476 port = confile.getPropertyValueAsInt('httpport',8000)
477
478 #port = 8000
479
480 server_address = ('', port)
481
482 HandlerClass.protocol_version = protocol
483 httpd = ServerClass(server_address, HandlerClass)
484
485 sa = httpd.socket.getsockname()
486 print "www.sw2us.com@2010 v.1.0.0"
487 print "Serving HTTP on", sa[0], "port", sa[1], ""
488 sys.stdout = buff
489 sys.stderr = buff
490
491 httpd.serve_forever()
492
493
494
495 if __name__ == '__main__':
496 #getMacList()
497 from sys import argv
498
499 f = open('proxy.pid','w')
500 f.write(str(os.getpid()))
501 f.close()
502
503 #ProxyHandler.allowed_clients = []
504 try:
505 allowed = []
506 ss = confile.getPropertyValue('allowed_clients').strip()
507 hosts = ss.split(',')
508 for h in hosts:
509 if h:
510 client = socket.gethostbyname(h.strip())
511 allowed.append(client)
512 if len(allowed):
513 ProxyHandler.allowed_clients = allowed
514 buff = StringIO.StringIO()
515
516 serving(ProxyHandler, ThreadingHTTPServer)
517 except:
518 pass
 
                     
                    
                 
                    
                 
 
        
 
                
            
         浙公网安备 33010602011771号
浙公网安备 33010602011771号