利用Python(copy_from)将BeiJing T-Drive Taxi Trajectories txt文件批量导入到 PostgresSQL(PostGIS)

import psycopg2
import pandas as pd
from io import StringIO
import numpy as np
import os
def table_exist(table_name=None, conn=None, cur=None):
    try:
        cur.execute("select to_regclass(" + "\'" + table_name + "\'" + ") is not null")
        rows = cur.fetchall()
    except Exception as e:
        rows = []
        conn.close()
    if rows:
        data = rows
        flag = data[0][0]
        return flag

txt_dir = r'F:/DataSets/北京/北京出租车/BeiJing T-Drive Taxi Trajectories/Microsoft T-drive dataset/T-drive Taxi Trajectories/release/taxi_log_2008_by_id'
conn = psycopg2.connect(database="beijing", user="jiangshan", password="xxxx", host="192.168.xx.xxx", port="5432")
cur = conn.cursor()

table_name = "T_Drive"
the_geom_SRID = "4326"

# CREATE TABLE IF table IS NOT EXIST
# 查询出来的表是否存在的状态,存在则为True,不存在则为False
table_flg = table_exist(table_name, conn, cur)
if table_flg is False:
    cur.execute("CREATE TABLE IF NOT EXISTS "+table_name+" (t_id BIGINT, tstamp TIMESTAMP, lon  DOUBLE PRECISION, lat  DOUBLE PRECISION, WKT TEXT)")
    conn.commit()

# INSERT DATA FROM TXT or CSV FILE
# 插入数据
print('IMPORT FILES......') # copy_from 不支持 GEOMETRY对象批量导入
txt_files = os.listdir(txt_dir)
ExceptionDataFile = []
id_list = []
for txt in txt_files:
    # print(txt)
    txt_path = os.path.join(txt_dir, txt)
    data = pd.read_csv(txt_path, header=None, names=['t_id', 'tstamp', 'lon', 'lat'])
    shape = data.shape
    if shape[0] >= 5:
        data = data.round(decimals=7)

        add_new_value = 'POINT(' + data['lon'].astype(str) + ' ' + data['lat'].astype(str) + ')'
        data['pt_wkt'] = add_new_value  # WKT字段备用  若不想要,可以删除表中该字段及其数据

        t_id = np.unique(data.t_id.values)[0]
        id_list.append(t_id)

        # dataframe类型转换为IO缓冲区中的str类型
        output = StringIO()
        data.to_csv(output, sep='\t', index=False, header=False)
        output = output.getvalue()
        # print(output)
        cur.copy_from(StringIO(output), table_name)
        conn.commit()
    else:
        ExceptionDataFile.append(txt)
print('IMPORT FILES OK!!') # copy_from 不支持 GEOMETRY对象批量导入

print('ADD A GEOMETRY COLUMN......')
# ADD A GEOMETRY COLUMN
cur.execute("alter table " + table_name + " add the_geom GEOMETRY")
conn.commit()

print(' UPDATE THE GEOMETRY.....')
# UPDATE THE GEOMETRY
for id in id_list:
    sql = "UPDATE " + table_name + " set the_geom=st_geomfromtext(\'POINT(\'|| lon ||' '|| lat ||\')\',\'"+the_geom_SRID+"\') where t_id = {0}".format(id)
    cur.execute(sql)
    conn.commit()

cur.close()
conn.close()
print('done')
print("Exception Data File:", ExceptionDataFile)

  

posted @ 2021-03-24 13:42  土博姜山山  阅读(154)  评论(0)    收藏  举报