利用Python(copy_from)将beijing geolife trajectories txt文件批量导入到 PostgresSQL(PostGIS)

import skmob.io.file as file# https://scikit-mobility.github.io/scikit-mobility/
import psycopg2
import os
import pandas as pd
from io import StringIO
import numpy as np

def table_exist(table_name=None, conn=None, cur=None):
    try:
        cur.execute("select to_regclass(" + "\'" + table_name + "\'" + ") is not null")
        rows = cur.fetchall()
    except Exception as e:
        rows = []
        conn.close()
    if rows:
        data = rows
        flag = data[0][0]
        return flag

def geolif2csv(user_ids, path_to_geolife_data_dir, csv_file_dir, compress_radius_km):
    for ind in range(len(user_ids)):
        user_id = user_ids[ind: ind + 1]
        TrajData = file.load_geolife_trajectories(path_to_geolife_data_dir=path_to_geolife_data_dir,
                                                  user_ids=user_id,
                                                  filter_kwargs={},
                                                  compress_kwargs={'spatial_radius_km': compress_radius_km})
        # 列表
        Trjs_name_list = ['lat', 'lon', 'datatime', 'uid']
        # list转dataframe
        df = pd.DataFrame(TrajData.values, columns=Trjs_name_list)
        csv_name = user_id[0] + ".csv"
        csv_path = os.path.join(csv_file_dir, csv_name)
        # 保存到
        df.to_csv(csv_path, header=False, index=False, encoding="utf-8")

path_to_geolife_data_dir = r'D:/data/Geolife Trajectories 1.3/'
csv_file_dir = "D:/data/Geolife Trajectories 1.3/Data_csv"
user_ids = os.listdir(path_to_geolife_data_dir + "Data")
pgisCon = psycopg2.connect(database="beijing", user="jiangshan", password="xxxx", host="192.168.x.xxx", port="5432")
pgisCursor = pgisCon.cursor()
table_name = "geo_life_table"
the_geom_SRID = "4326"
# CREATE TABLE IF table IS NOT EXIST
# 查询出来的表是否存在的状态,存在则为True,不存在则为False
table_flg = table_exist(table_name, pgisCon, pgisCursor)
if table_flg is False:
    sql = "DROP TABLE public.{0} CASCADE".format(table_name)# -- 删除表
    sql = "CREATE TABLE IF NOT EXISTS {0} (lat  DOUBLE PRECISION, lon  DOUBLE PRECISION, tstamp TIMESTAMP, u_id TEXT)".format(table_name)
    pgisCursor.execute(sql)
    pgisCon.commit()

print("Geolife Data To CSV.......")
compress_radius_km = 0.005
geolif2csv(user_ids, path_to_geolife_data_dir, csv_file_dir, compress_radius_km)
print("Geolife Data To CSV OK !!!")

# 插入数据
print('IMPORT FILES......') # copy_from 不支持 GEOMETRY对象批量导入
txt_files = os.listdir(csv_file_dir)
col_names = ['lat', 'lon', 'datatime', 'uid']
dtype_dic = {'lat': object, 'lon': object, 'datatime': object, 'uid': str}
uid_list = []
ExceptionDataFile = []
for txt in txt_files:
    # print(txt)
    txt_path = os.path.join(csv_file_dir, txt)
    data = pd.read_csv(txt_path, header=None, names=col_names, dtype=dtype_dic)
    shape = data.shape
    if shape[0] >= 5:
        data = data.round(decimals=7)
        u_id = np.unique(data.uid.values)[0]
        uid_list.append(u_id)
        # dataframe类型转换为IO缓冲区中的str类型
        output = StringIO()
        data.to_csv(output, sep='\t', index=False, header=False)
        output = output.getvalue()

        pgisCursor.copy_from(StringIO(output), table_name)
        pgisCon.commit()
    else:
        ExceptionDataFile.append(txt)
print('IMPORT FILES OK!!')  # copy_from 不支持 GEOMETRY对象批量导入

print('ADD A GEOMETRY COLUMN......')
# ADD A GEOMETRY COLUMN
pgisCursor.execute("alter table " + table_name + " add the_geom GEOMETRY")
pgisCon.commit()

print('UPDATE THE GEOMETRY.....')
# UPDATE THE GEOMETRY
for uid in uid_list:
    sql = "UPDATE " + table_name + " set the_geom=st_geomfromtext(\'POINT(\'|| lon ||' '|| lat ||\')\',\'"+the_geom_SRID+"\') where u_id = \'{0}\'".format(uid)
    pgisCursor.execute(sql)
    pgisCon.commit()

print("Exception Data File:", ExceptionDataFile)

print('CREATE INDEX OF THE GEOMETRY.....')
sql = "CREATE INDEX {0} ON t_drive USING gist ({1})".format("pt_idx", "the_geom")
pgisCursor.execute(sql)
pgisCon.commit()

pgisCursor.close()
pgisCon.close()
print('done')

  

posted @ 2021-03-28 20:16  土博姜山山  阅读(491)  评论(0)    收藏  举报