千万数据条 用户特征数据 写入mysql

 

from mysql_tool import *
import copy

s = '''
INSERT INTO `qqzone`.`myu` (`id`, `uid`, `age`, `gender`, `marriageStatus`, `education`, `consumptionAbility`, `LBS`, `interest1`, `interest2`, `interest3`, `interest4`, `interest5`, `kw1`, `kw2`, `kw3`, `kw4`, `kw5`, `topic1`, `topic2`, `topic3`, `topic4`, `topic5`, `ct`, `appIdAction`, `appIdInstall`, `os`, `carrier`, `house`) VALUES ('33', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
'''.replace('\n', '')

indb_fields_s = '{}{}'.format(s.split('VALUES')[0], ' VALUES ').replace('`id`,', '')
fields_l = [i.replace(' ', '').replace('`', '') for i in s.split('(')[1].split(')')[0].split(',')]

val_d = {}
for i in fields_l:
    val_d[i] = 'NULL'
del val_d['id']

indb_step, indb_step_s = 2000, ''
f = 'userFeature.data'
with open(f, 'r') as fr:
    sql_d_l, indb_step_c = [], 0
    for i in fr:
        break
        sql_d = copy.deepcopy(val_d)
        l = i.replace('\n', '').split('|')
        for ii in l:
            try:
                ix_ = ii.index(' ')
                k, v = ii[0:ix_], ii[ix_ + 1:]
                # if k == 'appIdInstall':
                #     continue
                sql_d[k] = v
            except Exception as e:
                print(e)

        sql_d_l.append(sql_d)
        indb_step_c += 1
        if indb_step_c % indb_step == 0:
            s_l = []
            for d in sql_d_l:
                s_l.append('("{}")'.format('","'.join([d[k] for k in d])))
            indb_step_s = '{}{};'.format(indb_fields_s, ','.join(s_l))
            try:
                mysql_write(indb_step_s)
                indb_step_c = 0
            except Exception as e:
                logs_l = [e, indb_step_s]
                logs_s = '||'.join(logs_l)
                print(logs_s)
            sql_d, sql_d_l, indb_step_c = val_d, [], 0

if indb_step_c % indb_step != 0:
    s_l = []
    for d in sql_d_l:
        s_l.append('("{}")'.format('","'.join([d[k] for k in d])))
    indb_step_s = '{}{};'.format(indb_fields_s, ','.join(s_l))
    try:
        mysql_write(indb_step_s)
        indb_step_c = 0
    except Exception as e:
        logs_l = [e, indb_step_s]
        logs_s = '||'.join(logs_l)
        print(logs_s)

def myindb(f, indb_fields_s, indb_step=2000):
    with open(f, 'r') as fr:
        sql_l, indb_step_c, indb_step_s = [], 0,''
        for i in fr:
            if 'aid' in i:
                continue
            sql_l.append([ii for ii in i.replace('\n', '').split(',')])
            indb_step_c += 1
            if indb_step_c % indb_step == 0:
                s_l = []
                for l in sql_l:
                    s_l.append('("{}")'.format('","'.join(l)))
                indb_step_s = '{}{};'.format(indb_fields_s, ','.join(s_l))
                try:
                    mysql_write(indb_step_s)
                    sql_l, indb_step_c = [], 0
                except Exception as e:
                    logs_l = [e, indb_step_s]
                    logs_s = '||'.join(logs_l)
                    print(indb_step_s)
                    print(logs_s)
                    print(e)
    if indb_step_c % indb_step != 0:
        s_l = []
        for l in sql_l:
            s_l.append('("{}")'.format('","'.join(l)))
        indb_step_s = '{}{};'.format(indb_fields_s, ','.join(s_l))
        try:
            mysql_write(indb_step_s)
        except Exception as e:
            logs_l = [e, indb_step_s]
            logs_s = '||'.join(logs_l)
            print(logs_s)
            print(e)


f = 'train.csv'
indb_step, indb_step_s = 10000, ''
indb_fields_s = 'INSERT INTO `qqzone`.`myt` ( `aid`, `uid`, `label`) VALUES '
#myindb(f, indb_fields_s, indb_step)

#f = 'test1.csv'
indb_step, indb_step_s = 10000, ''
indb_fields_s = 'INSERT INTO `qqzone`.`myr` ( `aid`, `uid`) VALUES '
#myindb(f, indb_fields_s, indb_step)

f = 'adFeature.csv'
indb_step, indb_step_s = 200, ''
indb_fields_s = 'INSERT INTO `qqzone`.`myadf` (`aid`, `advertiserId`, `campaignId`, `creativeId`, `creativeSize`,`adCategoryId`, `productId`, `productType`) VALUES '
#myindb(f, indb_fields_s, indb_step)

 

 

总的字段数、字段名确定,但是每行数据可能

 

 

缺失某些字段

posted @ 2018-04-30 15:30  papering  阅读(351)  评论(0编辑  收藏  举报