Python 数据分析练习1

环境

Anaconda3 Python 3.6, Window 64bit

  • 目的

从MySQL数据库读取数据,进行数据清理,数据展示

  • 代码
# -*- coding: utf-8 -*-

import pymysql
import pandas as pd
import matplotlib.pylab as mtp
import numpy as np

# 数据库连接
db_conn = pymysql.connect(
    host="***",
    user="kimbo",
    passwd="***",
    database="kimbo_test",
    port=3306,
    charset="utf8"
)

# 执行sql语句
sql_cmd = "select id,order_id,valid_amt,coll_amt,product_type,one_dept from kimbo_test;"

# 导入数据
data1 = pd.read_sql(sql_cmd, db_conn)

# 数据清洗,发现缺失值
c = 0

data2 = data1.values  # 转换成DateFrame模式
rows = len(data2)  # 行数
cols = len(data2[0])  # 列数
# print(data2)
print(rows, cols)

for i in range(rows):
    for j in range(cols):
        if (data2[i][j] == "NULL"):
            data2[i][j] = None
        if (data2[i][2] >= 1000):
            data2[i][2] -= 100
            c += 1
print("总共修改数值:%d" % c)

data3 = data2.T  # 行列转换
price = data3[2]  # 价格
amt = data3[3]  # 金额
pricemax = data3[2].max()
pricemin = data3[2].min()
amtmax = data3[3].max()
amtmin = data3[3].min()

# 极差 最大值-最小值
pricediff = pricemax - pricemin
amtdiff = amtmax - amtmin
# 组距 极差/组数
pricedst = pricediff / 7
amtdst = amtdiff / 7
# 根据组距 切分
pricesty = np.arange(pricemin, pricemax, pricedst)
amtsty = np.arange(amtmin, amtmax, amtdst)

# 散点图
mtp.subplot(2, 1, 1)
mtp.plot(price, amt, 'o')
mtp.title("ctp")

# 画价格的直方图
mtp.subplot(2, 2, 3)
mtp.hist(data3[2], pricesty, color='y')
mtp.title("pri")

# 画金额的直方图
mtp.subplot(2, 2, 4)
mtp.hist(data3[3], amtsty, color='b')
mtp.title("amt")
mtp.show()
View Code
  • 结果展示

 

posted @ 2017-03-18 16:57  Kimbo  阅读(1803)  评论(0编辑  收藏  举报