Python SQL相关操作

环境

Anaconda3 Python 3.6, Window 64bit

  • 目的

从MySQL数据库读取数据,进行数据查询、关联

  • 代码
# -*- coding: utf-8 -*-

"""
Author: kimbo zhang
Mail: kimbo_zhang@163.com
"""

import pymysql
import pandas as pd
import numpy as np

db_conn = pymysql.connect(
    host="***",
    user="kimbo",
    passwd="***",
    port=3306,
    database="kimbo_test",
    charset="utf8"
)

# 执行sql语句
sql_cmd = "select id,coll_type,coll_amt,coll_cost from kimbo_test;"
sql_cmd2 = "select id,re_type as coll_type,re_amt as coll_amt,re_cost as coll_cost from kimbo_test2;"

# 导入数据
data1 = pd.read_sql(sql_cmd, db_conn)  # 赋值 kimbo_test
data2 = pd.read_sql(sql_cmd2, db_conn)  # 赋值 kimbo_test2

# 1. select 查询
# eg: select * from kimbo_test limit 5;
sl = data1.head(10)
print("查询前10条记录:\n")
print(sl)
print("\n-------------->\n")

# 2. 查询类型是:偏低的数据
# eg: select * from kimbo_test where coll_type='偏低';
wh = data1[(data1['coll_type'] == '偏低')]
print(wh)

# 3. 查询类型是:偏低和昂贵的数据
# eg:select * from kimbo_test where coll_type in ('偏低','昂贵');
wh2 = data1[(data1['coll_type'] == '偏低') | (data1['coll_type'] == '昂贵')]
print(wh2)

# 4. 查询类型是:偏低和昂贵,id 大于7000
# eg: select * from kimbo_test where coll_type in ('偏低','昂贵') and id >5000;
wh3 = data1[((data1['coll_type'] == '偏低') | (data1['coll_type'] == '昂贵')) & (data1['id'] > 5000)]
print(wh3)

# 5. 分组、汇总
# eg: select coll_type,sum(coll_amt)as coll_amt,sum(coll_cost)as coll_cost from kimbo_test group by coll_type;
gb = data1.groupby('coll_type').agg({'coll_amt': np.sum, 'coll_cost': np.sum})
print(gb)

# 5. 关联 inner join
# eg: select * from kimbo_test a inner join kimbo_test2 b on a.id=b.id ;
jn = pd.merge(data1, data2, on='id')
print(jn)

# 6. 关联 left join
# eg: select * from kimbo_test a left join kimbo_test2 b on a.id=b.id ;
jn2 = pd.merge(data1, data1, on='id', how='left')
print(jn2)

# 7. 关联 full join
# eg: select * from kimbo_test a full join kimbo_test2 b on a.id=b.id ;
jn3 = pd.merge(data1, data1, on='id', how='outer')
print(jn3)

# 8. 合并 union all ,列名需要一致
# eg select * from kimbo_test a union all select * from kimbo_test2 b ;
un2 = pd.concat([data1, data2])
print(un2)

# 9. 分析函数
# eg:
# SELECT * FROM (
# SELECT
# t.*,
# ROW_NUMBER() OVER(PARTITION BY coll_type ORDER BY coll_amt DESC) AS rn
# FROM kimbo_test t
# )
# WHERE rn < 3
# ORDER BY rn;
rn = data1.assign(rn=data1.sort_values(['coll_amt'], ascending=False).groupby('coll_type').cumcount() + 1
                  ).query('rn < 3').sort_values(['coll_type', 'rn'])
print("row_number 分析函数结果:")
print(rn)
View Code

 

  • 结果展示

第一个和最后一个结果:

 

posted @ 2017-03-21 20:42  Kimbo  阅读(322)  评论(0编辑  收藏  举报