pandas对excel行合并,行数据更新, 数据库连接等示例

示例

import pandas as pd

data_list = [
    {'tier_1': 'alpha', 'tier_2': 'do', 'tier_3': 'copy', 'department': 'AAA'},
    {'tier_1': 'alpha', 'tier_2': 'thing', 'tier_3': 'vv', 'department': 'AAA'},
    {'tier_1': 'alpha', 'tier_2': 'thing', 'tier_3': 'vv', 'department': 'AAA'},  # duplicate
    {'tier_1': 'alpha', 'tier_2': 'thing', 'tier_3': 'cookie', 'department': 'AAA'},
    {'tier_1': 'python', 'tier_2': 'code', 'tier_3': 'lg', 'department': 'BBB'},
    {'tier_1': 'python', 'tier_2': 'c#', 'tier_3': 'nothing', 'department': 'BBB'},
    {'tier_1': 'sock', 'tier_2': 'bea', 'tier_3': 'tea', 'department': 'CCC'},
]

df = pd.DataFrame(data_list)
print(df)
''' output:
   tier_1 tier_2   tier_3 department
0   alpha     do     copy        AAA
1   alpha  thing       vv        AAA
2   alpha  thing       vv        AAA
3   alpha  thing   cookie        AAA
4  python   code       lg        BBB
5  python     c#  nothing        BBB
6    sock    bea      tea        CCC
'''
# 数据去重
df.drop_duplicates(subset=['tier_1', 'tier_2', 'tier_3', 'department'], inplace=True)
print(df)
''' output:
   tier_1 tier_2   tier_3 department
0   alpha     do     copy        AAA
1   alpha  thing       vv        AAA
3   alpha  thing   cookie        AAA
4  python   code       lg        BBB
5  python     c#  nothing        BBB
6    sock    bea      tea        CCC
'''

# 数据更新
df.loc[df[(df['tier_1'] == 'sock') & (df['department'] == 'CCC')].index, 'tier_1'] = 'busong'
print(df)
''' output:
   tier_1 tier_2   tier_3 department
0   alpha     do     copy        AAA
1   alpha  thing       vv        AAA
3   alpha  thing   cookie        AAA
4  python   code       lg        BBB
5  python     c#  nothing        BBB
6  busong    bea      tea        CCC
'''
# 使用group分组
df_group = pd.DataFrame(df.groupby(['tier_1', 'tier_2', 'tier_3', 'department']).size(), columns=['count'])
# 按字段排序
df_group.sort_values(['tier_1', 'tier_2'], ascending=True, inplace=True)
df_group.to_excel('test.xlsx')

 

 

 

# method='pad' 进行行分解, 与上文行合并一直
df = pd.read_excel('test.xlsx')
df.fillna(method='pad', inplace=True)
print(df)
'''output
   tier_1 tier_2   tier_3 department  count
0   alpha     do     copy        AAA      1
1   alpha  thing   cookie        AAA      1
2   alpha  thing       vv        AAA      1
3  busong    bea      tea        CCC      1
4  python     c#  nothing        BBB      1
5  python   code       lg        BBB      1
'''

 

使用sqlalchemy配合pandas使用

from sqlalchemy import create_engine
import pandas as pd
import numpy as np
import logging

your_kwargs = {}


def read():
    # 读取数据示例
    try:
        sql = ''
        engine = create_engine('postgresql://{user}:{password}@{host}/{database}'.format(
            **your_kwargs
        ))
        with engine.connect() as conn, conn.begin():
            df = pd.read_sql(sql, conn)
            return df
    except Exception as e:
        logging.error(e)
        return np.nan


def insert(data_list: [{}, {}]):
    # 插入数据到数据库
    try:
        df = pd.DataFrame(data_list)
        engine = create_engine('postgresql://{user}:{password}@{host}/{database}'.format(
            **your_kwargs
        ))
        df.to_sql(
            name='tb_name', index=False, con=engine, if_exists='append', method='multi'
        )
    except Exception as e:
        logging.error(e)

 

posted @ 2021-01-08 12:14  士为知己  阅读(459)  评论(0)    收藏  举报