import json
import psycopg2
import sys
conn = psycopg2.connect(dbname='dev', host='127.0.0.1',
port='5439', user='master', password='123456')
cur = conn.cursor()
column_names = None
def execute(sql):
cur.execute(sql)
conn.commit()
def build_value(item):
global column_names
column_names = []
fields = []
if tablename.startswith('sold_'):
keylist = ['soldUrl', 'country', 'category0', 'itemNumber', 'price', 'quantity', 'Date_of_Purchase', 'ts',
'ts_string']
else:
keylist = None
for key in sorted(item.keys()) if not keylist else keylist:
field = item.get(key, None)
if isinstance(field, str):
field = field.replace("'", "''").replace("\\", "")
column_names.append(key)
if key in ['page', 'index']:
fields.append(str(field))
else:
fields.append("'{}'".format(field))
return '({})'.format(', '.join(fields))
if __name__ == '__main__':
data_file = sys.argv[1]
tablename = sys.argv[2]
values = []
for line in open(data_file):
item = json.loads(line)
values.append(build_value(item))
if len(values) == 10000:
sql = 'insert into {}({}) values {};'.format(tablename, ', '.join(column_names), ', '.join(values))
execute(sql)
values.clear()
execute('insert into {}({}) values {};'.format(tablename, ', '.join(column_names), ', '.join(values)))