MSK的港口五字码的爬虫Python开发
def op_ports(citykey):
try:
citykey = str(citykey).replace(" ", "%20")
xurl = "http://api.maersk.com/locations/?brand=maeu&cityName=" + citykey + "&type=city&pageSize=50&sort=cityName"
# print(xurl)
res = sp.get_res_no_verify(xurl)
res = '{"data":[' + res + ']}'
res = res.replace("\n", "")
res = res.replace("}{", "} , {")
print(res)
dict = json.loads(res)
arr = dict["data"]
for x in arr:
regionCode = ""
regionName = ""
unLocCode = ""
if (str(x).find("unLocCode") == -1):
unLocCode = x["maerskGeoLocationId"]
else:
unLocCode = x["unLocCode"]
if (str(x).find("regionCode") > -1):
regionCode = str(x["regionCode"])
if (str(x).find("regionName") > -1):
regionName = str(x["regionName"]).replace("'", "")
id = str(x["type"]) + "','" + str(
x["maerskGeoLocationId"]) + "','" + str(unLocCode) + "','" + str(
unLocCode) + "','" + str(
x["countryName"]) + "','" + str(
x["countryCode"]) + "','" + str(x["cityName"]) + "','" + str(x["maerskRkstCode"]) + "','" + str(
x["maerskRktsCode"]) + "','" + str(x["timezoneId"]) + "','" + str(
"MAEU,MCPU,SAFM,SEAU,SEJJ") + "','" + str(
"MCC Transport,Maersk Line,Safmarine,SeaGo Line,SeaLand") + "','" + str(
str(x["countryGeoId"])) + "','" + regionCode + "','" + regionName
id = jd.get_md5(id)
# if len(db.get_sql(" select UUID from tablexxxx where UUID='" + id + "' ")) > 0:
# print("---- database tablexxxx exists ----")
# continue
try:
sqlx = " INSERT into tablexxxx (UUID,type,maerskGeoLocationId,unLocCode,mskcode,countryName,countryCode,cityName,maerskRkstCode,maerskRktsCode,timezoneId,brands,brandNames,countryGeoId,regionCode,regionName,createtime,sortv) VALUES ('" + id + "','" + str(x["type"]) + "','" + str(
x["maerskGeoLocationId"]) + "','" + str(unLocCode) + "','" + str(unLocCode) + "','" + str(
x["countryName"]).replace("'", "") + "','" + str(
x["countryCode"]) + "','" + str(x["cityName"]).replace("'", "") + "','" + str(
x["maerskRkstCode"]) + "','" + str(
x["maerskRktsCode"]) + "','" + str(x["timezoneId"]).replace("'", "") + "','" + str("MAEU,MCPU,SAFM,SEAU,SEJJ") + "','" + str("MCC Transport,Maersk Line,Safmarine,SeaGo Line,SeaLand") + "','" + str(str(x["countryGeoId"])) + "','" + regionCode + "','" + regionName + "',now(),0) "
print("")
print(sqlx)
print("")
db.exec_sql(sqlx)
print("---- databse insert success ----")
except Exception as e:
print(str(e))
except Exception as e:
print(str(e))
# 去重unlockCode
def remove_unlockcode():
sql = " SELECT UUID,cityName,countryName from tablexxxx ORDER BY createtime desc "
arr = db.get_sql(sql)
arrcode = []
arrsql = []
for x in arr:
if arrcode.__contains__(str(x[1]) + str(x[2])):
print(str(x[0]))
arrsql.append(" delete from tablexxxx where UUID='" + x[0] + "'")
pass
else:
arrcode.append(str(x[1]) + str(x[2]))
if (len(arrsql) > 0):
db.exec_sqls(arrsql)
print(arrcode)
print("done")
# 刷新马士基船公司的code
def op_msk_port_data():
sql = " SELECT cityName from tablexxxx WHERE unLocCode is not null GROUP BY cityName ORDER BY cityName desc "
arr = db.get_sql(sql)
for x in arr:
op_ports(x[0])
# 处理搜索记录
def op_user_search_his():
try:
arr_sql = []
sql = "select id,k from table_search_xxxxx"
arr = db.get_sql(sql)
print("get arr from table_search_xxxxx length is:" + str(len(arr)))
for x in arr:
id = str(x[0])
key = str(x[1])
arr_sql.append("delete from table_search_xxxxx where id='" + id + "';")
if redisx.has_key(key):
print(key + " have done redis")
continue
redisx.set(key, "")
op_ports(key)
if len(arr_sql) > 0:
db.exec_sqls(arr_sql)
except Exception as ex:
print(str(ex))
# 搜索城市关键字
if __name__ == '__main__':
# while True:
# op_user_search_his()
# time.sleep(90)
op_ports('apapa')
以上是不需要登录也不需要token的方法获取马士基船公司的港口五字码。
用于做pol,pod的基础数据,马士基港口五字码可能会更新(2020年比如青岛港是CNQIN,目前已经变为了国际化的CNTAO),并且各个船公司的港口表体现在web或是app上的结构是完全不一样的,cosco则是long数值类型。

浙公网安备 33010602011771号