1 1 ##爬取任意地方肯德基的餐厅所有位置信息(数据以获取但还没分析。)
2 2 import requests
3 3
4 4 url = 'http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword'
5 5 user_input = input("enter a search:")
6 6 headers = {
7 7 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"
8 8 }
9 9 for i in range(1,20):
10 10 data = {
11 11 "cname":"",
12 12 "pid":"",
13 13 "keyword":user_input,
14 14 "pageIndex":i,
15 15 "pageSize": '10'
16 16 }
17 17 res = requests.post(url=url, data=data, headers=headers)
18 18 stats = res.headers
19 19 content = res.text
20 20 print(content+"\n")
21 21 try:
22 22 if not stats["Vary"]:
23 23 pass
24 24 except:
25 25 break
1 爬取豆瓣电影中更多的电影详情数据(数据以获取但还没分析。)
2 import requests
3 headers = {
4 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"
5 }
6 #https://movie.douban.com/j/chart/top_list?type=10&interval_id=100%3A90&action=&start=0&limit=1//悬疑
7 #https://movie.douban.com/j/chart/top_list?type=17&interval_id=100%3A90&action=&start=0&limit=20/科幻
8 #https://movie.douban.com/j/chart/top_list?type=11&interval_id=100%3A90&action=&start=0&limit=20//剧情
9 #https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&start=0&limit=20//喜剧
10 #https://movie.douban.com/j/chart/top_list?type=13&interval_id=100%3A90&action=&start=0&limit=20//爱情
11 #https://movie.douban.com/j/chart/top_list?type=25&interval_id=100%3A90&action=&start=0&limit=20//动画
12 url_secelt = {
13 "动作":"5",
14 "悬疑":"10",
15 "科幻":"17",
16 "剧情":"11",
17 "喜剧":"24",
18 "爱情":"13",
19 "动画":"25"
20 }
21 user_movie = input("请输入电影类型 \n目前只支持搜索:动作 喜剧 科幻 剧情 爱情 动画 悬疑 \n")
22 url = 'https://movie.douban.com/j/chart/top_list?type={}&interval_id=100%3A90&action=None'.format(url_secelt[user_movie])
23 for i in range(0,800,20):
24 param = {
25 "start":i,
26 "limit":"20"
27 }
28 res = requests.get(url=url,params=param,headers=headers)
29 content = res.json()
30 stats = res.headers
31 if content == []:
32 break
33 print(content)
34 try:
35 if not stats["Vary"]:
36 pass
37 except:
38 break
1 ##http://125.35.6.84:81/xk/ 爬取一页每家企业的企业详情数据 (数据以获取但还没分析。)
2 import requests
3 headers = {
4 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"
5 }
6 url = "http://125.35.6.84:81/xk/itownet/portalAction.do?method=getXkzsList"
7
8 data = {
9 "on": 'true',
10 "page": "3",
11 "pageSize": "15",
12 "productName": "",
13 "conditionType": "1",
14 "applyname": "",
15 "applysn": ""
16 }
17 res = requests.post(url=url,data=data,headers=headers)
18 content = res.json()
19 for i in content["list"]:
20 url = "http://125.35.6.84:81/xk/itownet/portalAction.do?method=getXkzsById"
21 data = {
22 "id": i["ID"]
23
24 }
25 res = requests.post(url=url, data=data, headers=headers)
26 content = res.json()
27 print(content)
28 #