mongo2csv

#!/usr/bin/env python3

import pymongo
import datetime
import os

yesterday = str(datetime.date.today() - datetime.timedelta(days=1))


def mongo2csv(db_name, ts_string=yesterday, column_handle=None, column_delete=None):
    '''

    :param db_name: 表名
    :param ts_string: 日期
    :param column_handle: 需要处理的列,
                        格式{k:v} ,
                        k:需要处理的列,
                        v:1.数字,即列表索引,取列表的第几个元素
                           2.字符串,即字典key,取子字典的某个value
                           3.join ,即需要将列表转为字符串

    :param column_delete: 需要删除的列
    :return:
    '''
    print('starting...')
    host = '127.0.0.1'
    port = 5600
    user = 'root'
    password = ''
    url = 'mongodb://' + user + ':' + password + '@' + host + ':' + str(port) + '/'
    client = pymongo.MongoClient(url)
    db = client.rental
    collection = db[db_name]
    select_condition = {'_id': 0, 'ts': 0}
    if not column_delete:
        column_delete = []
    if not column_handle:
        column_handle = {}
    if len(column_delete) > 0:
        for temp in column_delete:
            select_condition[temp] = 0
    title_dic = collection.find_one({'ts_string': ts_string}, select_condition)
    title = sorted(title_dic.keys())
    con = collection.find({'ts_string': ts_string}, select_condition)
    file_name = db_name + '_' + yesterday + '.csv'
    with open(file_name, 'a', encoding='utf8') as f:
        f.write(','.join(title) + '\n')
        for item in con:
            for k, v in column_handle.items():
                item[k] = item[k][v] if v != 'join' else (
                    '|'.join([str(x) for x in item[k]]) if isinstance(item[k], list) else item[k])
            f.write(','.join([str(item[x]) for x in title]) + '\n')


if __name__ == '__main__':
     mongo2csv('lianjia_detail')
     mongo2csv('mogu_detail', column_handle={'metroInfo': 0, 'rentType': 'value'})
     mongo2csv('qingke_detail', column_handle={})
     mongo2csv('xiangyu_detail', column_delete=['endDate','vacantStartDate','tabList','vacantEndDate'])
     mongo2csv('ziru_detail', column_handle={'subway_line_code': 'join', 'subway_station_code': 'join'})
     print(os.system('wc -l *.csv'))



 

 

posted @ 2018-08-07 14:51  wangheng1409  阅读(157)  评论(0编辑  收藏  举报