Python 爬虫 多线程 线程池
写一个例子:
1 import requests 2 import time 3 4 if __name__ == '__main__': 5 codes = ['sh600993', 'sh000006', 'sh600658', 'sh600153', 'sh600005'] 6 start = time.time() 7 for code in codes: 8 url = 'http://hq.sinajs.cn/list=' + code 9 response = requests.get(url).text 10 print response 11 print time.time() - start 12 13 """ 14 var hq_str_sh600993="马应龙,20.020,20.090,20.060,11:30:00,00"; 15 var hq_str_sh000006="地产指数,6567.8364 "; 16 var hq_str_sh600658="电子城,13.320,13. "; 17 var hq_str_sh600153="建发股份,10.520,10.5 "; 18 var hq_str_sh600005="武钢股份,0.000,3.710, 03"; 19 20 0.110999822617 21 """
换成多线程之后:
1 import requests 2 import threading 3 import time 4 5 def get_stock(code): 6 url = 'http://hq.sinajs.cn/list=' + code 7 response = requests.get(url).text 8 # js_info = response.read() 9 print response 10 11 if __name__ == '__main__': 12 codes = ['sh600993', 'sh000006', 'sh600658', 'sh600153', 'sh600005'] 13 start = time.time() 14 threads = [threading.Thread(target = get_stock,args = (code,)) for code in codes] 15 for t in threads: 16 t.start() 17 for t in threads: 18 t.join() 19 print time.time()-start 20 21 ''' 22 var hq_str_sh600993="马应龙,20.020,20.090,20.020.080,0.100,2017-01-24,11:30:00,00"; 23 var hq_str_sh600658="电子城,13.320,13.200,13.270,13.329020,2017-01-24,11:30:00,00"; 24 var hq_str_sh000006="地产指数,6567.8360"; 25 var hq_str_sh600153="建发股份,10.520,110400,10.530,139900,10-24,11:30:00,00"; 26 var hq_str_sh600005="武钢股份,0.000,3.710,3.3"; 27 28 0.0379998683929 29 '''
线程池
1 import requests 2 import threadpool 3 import time 4 5 def get_stock(code): 6 url = 'http://hq.sinajs.cn/list=' + code 7 response = requests.get(url).text 8 # js_info = response.read() 9 print response 10 11 if __name__ == '__main__': 12 codes = ['sh600993', 'sh000006', 'sh600658', 'sh600153', 'sh600005'] 13 start = time.time() 14 pool = threadpool.ThreadPool(5) 15 tasks = threadpool.makeRequests(get_stock,codes) 16 [pool.putRequest(task) for task in tasks] 17 pool.wait() 18 print time.time() - start 19 20 ''' 21 threadpool.ThreadPool定义了一个线程池,创建5个线程; 22 makeRequests创建了要开启多线程的函数,已经函数的参数以及回调函数,回调函数callback默认是无。 23 pool.wait()是等待所有工作完成后退出。当数量多了起来之后,线程池的效果会好一些。 24 [pool.putRequest(task) for task in tasks] 25 将所有多线程的请求扔进了线程池,等价于 26 for code in codes: 27 pool.putRequest(code) 28 '''

浙公网安备 33010602011771号