利用Python(copy_from)将BeiJing T-Drive Taxi Trajectories txt文件批量导入到 PostgresSQL(PostGIS)
import psycopg2
import pandas as pd
from io import StringIO
import numpy as np
import os
def table_exist(table_name=None, conn=None, cur=None):
try:
cur.execute("select to_regclass(" + "\'" + table_name + "\'" + ") is not null")
rows = cur.fetchall()
except Exception as e:
rows = []
conn.close()
if rows:
data = rows
flag = data[0][0]
return flag
txt_dir = r'F:/DataSets/北京/北京出租车/BeiJing T-Drive Taxi Trajectories/Microsoft T-drive dataset/T-drive Taxi Trajectories/release/taxi_log_2008_by_id'
conn = psycopg2.connect(database="beijing", user="jiangshan", password="xxxx", host="192.168.xx.xxx", port="5432")
cur = conn.cursor()
table_name = "T_Drive"
the_geom_SRID = "4326"
# CREATE TABLE IF table IS NOT EXIST
# 查询出来的表是否存在的状态,存在则为True,不存在则为False
table_flg = table_exist(table_name, conn, cur)
if table_flg is False:
cur.execute("CREATE TABLE IF NOT EXISTS "+table_name+" (t_id BIGINT, tstamp TIMESTAMP, lon DOUBLE PRECISION, lat DOUBLE PRECISION, WKT TEXT)")
conn.commit()
# INSERT DATA FROM TXT or CSV FILE
# 插入数据
print('IMPORT FILES......') # copy_from 不支持 GEOMETRY对象批量导入
txt_files = os.listdir(txt_dir)
ExceptionDataFile = []
id_list = []
for txt in txt_files:
# print(txt)
txt_path = os.path.join(txt_dir, txt)
data = pd.read_csv(txt_path, header=None, names=['t_id', 'tstamp', 'lon', 'lat'])
shape = data.shape
if shape[0] >= 5:
data = data.round(decimals=7)
add_new_value = 'POINT(' + data['lon'].astype(str) + ' ' + data['lat'].astype(str) + ')'
data['pt_wkt'] = add_new_value # WKT字段备用 若不想要,可以删除表中该字段及其数据
t_id = np.unique(data.t_id.values)[0]
id_list.append(t_id)
# dataframe类型转换为IO缓冲区中的str类型
output = StringIO()
data.to_csv(output, sep='\t', index=False, header=False)
output = output.getvalue()
# print(output)
cur.copy_from(StringIO(output), table_name)
conn.commit()
else:
ExceptionDataFile.append(txt)
print('IMPORT FILES OK!!') # copy_from 不支持 GEOMETRY对象批量导入
print('ADD A GEOMETRY COLUMN......')
# ADD A GEOMETRY COLUMN
cur.execute("alter table " + table_name + " add the_geom GEOMETRY")
conn.commit()
print(' UPDATE THE GEOMETRY.....')
# UPDATE THE GEOMETRY
for id in id_list:
sql = "UPDATE " + table_name + " set the_geom=st_geomfromtext(\'POINT(\'|| lon ||' '|| lat ||\')\',\'"+the_geom_SRID+"\') where t_id = {0}".format(id)
cur.execute(sql)
conn.commit()
cur.close()
conn.close()
print('done')
print("Exception Data File:", ExceptionDataFile)
个人学习记录

浙公网安备 33010602011771号