import xlrd
import time
import sys
import os
import requests
import sqlite3
import threading
curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath)
MAX_USED_TIMES, overrun_str, DB_KEY_EXHAUST = 1980, '天配额超限,限制访问', 'DB_KEY_EXHAUST'
db = 'py_bdspider_status.db'
db = '%s\\%s' % (curPath, db)
def db_init_key_table():
conn = sqlite3.connect(db)
c = conn.cursor()
sql = 'DELETE FROM baidu_map_key_used'
c.execute(sql)
conn.commit()
pcity_file = '%s\\%s' % (curPath, 'bdmap_key.txt')
with open(pcity_file, 'r', encoding='utf-8') as pf:
c_ = 0
for i in pf:
if len(i) < 4:
continue
author, key = i.replace('\n', '').split('\t')
localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
sql = 'INSERT INTO baidu_map_key_used (author,key,update_time,today_used) VALUES ("%s","%s","%s",%s) ' % (
author, key, localtime_, 0)
c.execute(sql)
conn.commit()
conn.close()
# db_init_key_table()
def db_get_one_effective():
conn = sqlite3.connect(db)
c = conn.cursor()
sql = 'SELECT key FROM baidu_map_key_used WHERE today_used<=%s ' % (MAX_USED_TIMES)
res = c.execute(sql).fetchone()
if res is None:
return DB_KEY_EXHAUST
else:
return res[0]
conn.close
def db_update_one_today_used(key):
conn = sqlite3.connect(db)
c = conn.cursor()
localtime_ = time.strftime("%y%m%d%H%M%S", time.localtime())
sql = 'UPDATE baidu_map_key_used SET today_used = today_used+1 ,update_time=%s WHERE key="%s" ' % (
localtime_, key)
c.execute(sql)
conn.commit()
conn.close()
dir_, dir_exception = 'baidu_map_uid', 'baidu_map_uid_exception'
requested_file_list = []
requested_file_dir_str, requested_file_dir_exception_str = '%s\\%s\\' % (curPath, dir_), '%s\\%s\\' % (
curPath, dir_exception)
requested_file_dir = os.listdir(requested_file_dir_str)
def chk_if_requested_file():
for f in requested_file_dir:
to_in = f.split('.txt')[0]
if to_in not in requested_file_list:
requested_file_list.append(to_in)
def write_requested_res(request_name, str_, type_='.txt'):
fname = '%s%s%s' % (requested_file_dir_str, request_name, type_)
with open(fname, 'w', encoding='utf-8') as ft:
ft.write(str_)
print('ok', threading.get_ident(), request_name)
def write_requested_exception_res(request_name, str_, type_='.txt'):
fname = '%s%s%s' % (requested_file_dir_exception_str, request_name, type_)
with open(fname, 'w', encoding='utf-8') as ft:
ft.write(str_)
request_dic = {}
def gen_request_dic_list():
fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'
fname_open = '%s\\%s' % (curPath, fname_source)
FEXCEL = '%s%s' % (fname_open, '.xlsx')
data = xlrd.open_workbook(FEXCEL)
table = data.sheets()[0]
nrows, ncols = table.nrows, table.ncols
for i in range(1, nrows):
l = table.row_values(i)
dbid, area_code, name_, request_name, type_, city, district, addr, street = l
request_name_chk = '%s%s%s' % (city, district, request_name)
chk_if_requested_file()
if request_name_chk in requested_file_list:
continue
if city not in request_dic:
request_dic[city] = {}
if district not in request_dic[city]:
request_dic[city][district] = {}
request_dic[city][district] = []
if request_name not in request_dic[city][district]:
request_dic[city][district].append(request_name)
gen_request_dic_list()
fname_source = '官方上传任务.csv_py170829093808-BD_request_name-REDUCTION170829142821'
# http://api.map.baidu.com/place/v2/suggestion?query=瀛嘉天下®ion=重庆市&city_limit=true&output=json&ak=oy2Q7IluhhwTGlz6l8pXYv6a0m6hXxr1
base_url = 'http://api.map.baidu.com/place/v2/suggestion?query=R-QUERY®ion=R-CITY&city_limit=true&output=json&ak=R-AK'
def fun_(city):
for district in request_dic[city]:
for request_name in request_dic[city][district]:
request_name_chk = '%s%s%s' % (city, district, request_name)
chk_if_requested_file()
if request_name_chk in requested_file_list:
continue
ak = db_get_one_effective()
if ak == DB_KEY_EXHAUST:
print(DB_KEY_EXHAUST)
break
else:
url_ = base_url.replace('R-QUERY', request_name).replace('R-CITY', city).replace('R-AK', ak)
try:
bd_res_json_str = requests.get(url_).text
db_update_one_today_used(ak)
write_requested_res(request_name_chk, bd_res_json_str)
except Exception:
bd_res_json_str = '请求百度-异常'
write_requested_exception_res(request_name_chk, bd_res_json_str)
print(bd_res_json_str)
class MyThread(threading.Thread):
def __init__(self, func):
threading.Thread.__init__(self)
self.func = func
def run(self):
self.func()
class MyThread(threading.Thread):
def __init__(self, func, args):
threading.Thread.__init__(self)
self.func, self.args = func, args
def run(self):
self.func(self.args)
target_city_list = ['广州市', '厦门市', '深圳市', '北京市', '杭州市', '成都市', '上海市', '西安市']
thread_sum = len(target_city_list)
def main():
threads_list = []
for nloop in range(0, thread_sum, 1):
city = target_city_list[nloop]
thread_instance = MyThread(fun_, (city))
threads_list.append(thread_instance)
for t in threads_list:
t.setDaemon = False
t.start()
for t in threads_list:
t.join()
if __name__ == '__main__':
main()