爬取-姓名测试打分
从姓名测试网站 爬去,所有‘李金’开头的三个字的名字对应的得分,取拍分比较高的名字。
1 # -*- coding: utf-8 -*- 2 """ 3 Created on Thu Jan 30 12:45:02 2020 4 5 @author: Administrator 6 """ 7 8 #获取所有中文汉字 9 chineses = [chr(i) for i in range(0x4e00,0x9fa6)] 10 print(len(chineses)) #20902 11 12 #爬取网站姓名评分 13 import requests 14 from bs4 import BeautifulSoup 15 import time 16 17 18 def get_score(name_str): 19 data = {'xm': name_str, 20 'dxfx': 1, 21 'Input': '开始测算'} 22 # 发送一个 HTTP POST 请求: 23 r = requests.post("http://m.name321.com/xm.php",data = data) 24 try: 25 soup = BeautifulSoup(r.text, features="lxml") 26 c2 = soup.select('div.c_c h4')[0].text 27 c2 = c2.split(':') 28 c2_2 = int(c2[1].replace(' ','').replace('分','')) 29 return {'name':name_str,'score':c2_2,'status_code':r.status_code} 30 except Exception as e: 31 return {'name':name_str,'score':-1,'status_code':r.status_code} 32 dict0 = get_score(' ') 33 34 35 36 def writ(dict0): 37 time0 = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) 38 with open('score.csv','a') as f: 39 f.write(dict0.get('name')+','+str(dict0.get('score'))+','+time0+','+str(dict0.get('status_code'))+'\n') 40 pass 41 writ(dict0) 42 43 44 # 45 #list0=[] 46 #list0.append(list0) 47 #11000 48 for i,j in enumerate(chineses): 49 if i>8803: 50 dict0 = get_score('李金'+j) 51 writ(dict0) 52 print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),i,'20902') 53 print(dict0) 54 else: 55 pass 56 time.sleep(0.1) 57
浙公网安备 33010602011771号