pandas数据清洗的一些操作

import pandas as pd
 
# 显示所有列,行
# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
# pd.set_option('max_colwidth',100)
 
 
# 读取文件
df = pd.read_csv("z:/clear1.csv", encoding="utf-8", low_memory=True)
print(df.head())
# 查询平方差(控制统计)
nan_mean = df.isna().mean()
nan_mean = nan_mean[nan_mean != 0].sort_values()
print(nan_mean)
# 日期格式
dt_series = pd.to_datetime(df["issue_d"])
df["year"] = dt_series.dt.year
# 筛选
df = df.loc[df["year"] >= 2014]
print(df["year"].value_counts())
# 删除
df.drop(["total_bal_ex_mort", 'tot_coll_amt', 'sub_grade'], axis=1, inplace=True)
# 删除指定内容的行
df = df[~ df['issue_d'].str.contains('issue_d')]
# 统计指定列
print(df["issue_d"].value_counts(sort=True))
print(df)
# 保存数据
df.to_csv("z:/clear1.csv", index=False, encoding="utf-8")
 

 

posted @ 2019-09-12 01:55  Jumpkin1122  阅读(245)  评论(0编辑  收藏  举报