MSK的港口五字码的爬虫Python开发
def op_ports(citykey): try: citykey = str(citykey).replace(" ", "%20") xurl = "http://api.maersk.com/locations/?brand=maeu&cityName=" + citykey + "&type=city&pageSize=50&sort=cityName" # print(xurl) res = sp.get_res_no_verify(xurl) res = '{"data":[' + res + ']}' res = res.replace("\n", "") res = res.replace("}{", "} , {") print(res) dict = json.loads(res) arr = dict["data"] for x in arr: regionCode = "" regionName = "" unLocCode = "" if (str(x).find("unLocCode") == -1): unLocCode = x["maerskGeoLocationId"] else: unLocCode = x["unLocCode"] if (str(x).find("regionCode") > -1): regionCode = str(x["regionCode"]) if (str(x).find("regionName") > -1): regionName = str(x["regionName"]).replace("'", "") id = str(x["type"]) + "','" + str( x["maerskGeoLocationId"]) + "','" + str(unLocCode) + "','" + str( unLocCode) + "','" + str( x["countryName"]) + "','" + str( x["countryCode"]) + "','" + str(x["cityName"]) + "','" + str(x["maerskRkstCode"]) + "','" + str( x["maerskRktsCode"]) + "','" + str(x["timezoneId"]) + "','" + str( "MAEU,MCPU,SAFM,SEAU,SEJJ") + "','" + str( "MCC Transport,Maersk Line,Safmarine,SeaGo Line,SeaLand") + "','" + str( str(x["countryGeoId"])) + "','" + regionCode + "','" + regionName id = jd.get_md5(id) # if len(db.get_sql(" select UUID from tablexxxx where UUID='" + id + "' ")) > 0: # print("---- database tablexxxx exists ----") # continue try: sqlx = " INSERT into tablexxxx (UUID,type,maerskGeoLocationId,unLocCode,mskcode,countryName,countryCode,cityName,maerskRkstCode,maerskRktsCode,timezoneId,brands,brandNames,countryGeoId,regionCode,regionName,createtime,sortv) VALUES ('" + id + "','" + str(x["type"]) + "','" + str( x["maerskGeoLocationId"]) + "','" + str(unLocCode) + "','" + str(unLocCode) + "','" + str( x["countryName"]).replace("'", "") + "','" + str( x["countryCode"]) + "','" + str(x["cityName"]).replace("'", "") + "','" + str( x["maerskRkstCode"]) + "','" + str( x["maerskRktsCode"]) + "','" + str(x["timezoneId"]).replace("'", "") + "','" + str("MAEU,MCPU,SAFM,SEAU,SEJJ") + "','" + str("MCC Transport,Maersk Line,Safmarine,SeaGo Line,SeaLand") + "','" + str(str(x["countryGeoId"])) + "','" + regionCode + "','" + regionName + "',now(),0) " print("") print(sqlx) print("") db.exec_sql(sqlx) print("---- databse insert success ----") except Exception as e: print(str(e)) except Exception as e: print(str(e)) # 去重unlockCode def remove_unlockcode(): sql = " SELECT UUID,cityName,countryName from tablexxxx ORDER BY createtime desc " arr = db.get_sql(sql) arrcode = [] arrsql = [] for x in arr: if arrcode.__contains__(str(x[1]) + str(x[2])): print(str(x[0])) arrsql.append(" delete from tablexxxx where UUID='" + x[0] + "'") pass else: arrcode.append(str(x[1]) + str(x[2])) if (len(arrsql) > 0): db.exec_sqls(arrsql) print(arrcode) print("done") # 刷新马士基船公司的code def op_msk_port_data(): sql = " SELECT cityName from tablexxxx WHERE unLocCode is not null GROUP BY cityName ORDER BY cityName desc " arr = db.get_sql(sql) for x in arr: op_ports(x[0]) # 处理搜索记录 def op_user_search_his(): try: arr_sql = [] sql = "select id,k from table_search_xxxxx" arr = db.get_sql(sql) print("get arr from table_search_xxxxx length is:" + str(len(arr))) for x in arr: id = str(x[0]) key = str(x[1]) arr_sql.append("delete from table_search_xxxxx where id='" + id + "';") if redisx.has_key(key): print(key + " have done redis") continue redisx.set(key, "") op_ports(key) if len(arr_sql) > 0: db.exec_sqls(arr_sql) except Exception as ex: print(str(ex)) # 搜索城市关键字 if __name__ == '__main__': # while True: # op_user_search_his() # time.sleep(90) op_ports('apapa')
以上是不需要登录也不需要token的方法获取马士基船公司的港口五字码。
用于做pol,pod的基础数据,马士基港口五字码可能会更新(2020年比如青岛港是CNQIN,目前已经变为了国际化的CNTAO),并且各个船公司的港口表体现在web或是app上的结构是完全不一样的,cosco则是long数值类型。