MSK的港口五字码的爬虫Python开发

def op_ports(citykey):
    try:
        citykey = str(citykey).replace(" ", "%20")
        xurl = "http://api.maersk.com/locations/?brand=maeu&cityName=" + citykey + "&type=city&pageSize=50&sort=cityName"
        # print(xurl)
        res = sp.get_res_no_verify(xurl)
        res = '{"data":[' + res + ']}'
        res = res.replace("\n", "")
        res = res.replace("}{", "} , {")
        print(res)
        dict = json.loads(res)
        arr = dict["data"]
        for x in arr:
            regionCode = ""
            regionName = ""
            unLocCode = ""
            if (str(x).find("unLocCode") == -1):
                unLocCode = x["maerskGeoLocationId"]
            else:
                unLocCode = x["unLocCode"]
            if (str(x).find("regionCode") > -1):
                regionCode = str(x["regionCode"])
            if (str(x).find("regionName") > -1):
                regionName = str(x["regionName"]).replace("'", "")
            id = str(x["type"]) + "','" + str(
                x["maerskGeoLocationId"]) + "','" + str(unLocCode) + "','" + str(
                unLocCode) + "','" + str(
                x["countryName"]) + "','" + str(
                x["countryCode"]) + "','" + str(x["cityName"]) + "','" + str(x["maerskRkstCode"]) + "','" + str(
                x["maerskRktsCode"]) + "','" + str(x["timezoneId"]) + "','" + str(
                "MAEU,MCPU,SAFM,SEAU,SEJJ") + "','" + str(
                "MCC Transport,Maersk Line,Safmarine,SeaGo Line,SeaLand") + "','" + str(
                str(x["countryGeoId"])) + "','" + regionCode + "','" + regionName
            id = jd.get_md5(id)
            # if len(db.get_sql(" select UUID from tablexxxx where UUID='" + id + "' ")) > 0:
            #     print("---- database tablexxxx exists ----")
            #     continue
            try:
                sqlx = "  INSERT into tablexxxx (UUID,type,maerskGeoLocationId,unLocCode,mskcode,countryName,countryCode,cityName,maerskRkstCode,maerskRktsCode,timezoneId,brands,brandNames,countryGeoId,regionCode,regionName,createtime,sortv) VALUES ('" + id + "','" + str(x["type"]) + "','" + str(
                    x["maerskGeoLocationId"]) + "','" + str(unLocCode) + "','" + str(unLocCode) + "','" + str(
                    x["countryName"]).replace("'", "") + "','" + str(
                    x["countryCode"]) + "','" + str(x["cityName"]).replace("'", "") + "','" + str(
                    x["maerskRkstCode"]) + "','" + str(
                    x["maerskRktsCode"]) + "','" + str(x["timezoneId"]).replace("'", "") + "','" + str("MAEU,MCPU,SAFM,SEAU,SEJJ") + "','" + str("MCC Transport,Maersk Line,Safmarine,SeaGo Line,SeaLand") + "','" + str(str(x["countryGeoId"])) + "','" + regionCode + "','" + regionName + "',now(),0) "
                print("")
                print(sqlx)
                print("")
                db.exec_sql(sqlx)
                print("---- databse insert success ----")
            except Exception as e:
                print(str(e))
    except Exception as e:
        print(str(e))


# 去重unlockCode
def remove_unlockcode():
    sql = "   SELECT UUID,cityName,countryName from  tablexxxx   ORDER BY createtime desc   "
    arr = db.get_sql(sql)
    arrcode = []
    arrsql = []
    for x in arr:
        if arrcode.__contains__(str(x[1]) + str(x[2])):
            print(str(x[0]))
            arrsql.append(" delete from  tablexxxx where UUID='" + x[0] + "'")
            pass
        else:
            arrcode.append(str(x[1]) + str(x[2]))
    if (len(arrsql) > 0):
        db.exec_sqls(arrsql)
    print(arrcode)
    print("done")


# 刷新马士基船公司的code
def op_msk_port_data():
    sql = "  SELECT cityName from  tablexxxx WHERE unLocCode is not null GROUP BY cityName  ORDER BY cityName desc  "
    arr = db.get_sql(sql)
    for x in arr:
        op_ports(x[0])


# 处理搜索记录
def op_user_search_his():
    try:
        arr_sql = []
        sql = "select id,k from table_search_xxxxx"
        arr = db.get_sql(sql)
        print("get arr from table_search_xxxxx length is:" + str(len(arr)))
        for x in arr:
            id = str(x[0])
            key = str(x[1])
            arr_sql.append("delete from table_search_xxxxx where id='" + id + "';")
            if redisx.has_key(key):
                print(key + " have done redis")
                continue
            redisx.set(key, "")
            op_ports(key)
        if len(arr_sql) > 0:
            db.exec_sqls(arr_sql)
    except Exception as ex:
        print(str(ex))


# 搜索城市关键字
if __name__ == '__main__':
    # while True:
    #     op_user_search_his()
    #     time.sleep(90)
    op_ports('apapa')
  

  

 

以上是不需要登录也不需要token的方法获取马士基船公司的港口五字码。

 

用于做pol,pod的基础数据,马士基港口五字码可能会更新(2020年比如青岛港是CNQIN,目前已经变为了国际化的CNTAO),并且各个船公司的港口表体现在web或是app上的结构是完全不一样的,cosco则是long数值类型。

 

posted @ 2023-01-13 18:06  liskov_design  阅读(131)  评论(0编辑  收藏  举报