code
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
import pandas as pd
prefix = "./exp5/"
datafile = 'air_data.csv'
resultfile = 'explore_result.xls'
data_table = pd.read_csv(prefix + datafile, encoding='utf-8')
""" Returns
DataFrame or TextParser
A comma-separated values (csv) file is returned as two-dimensional data structure with labeled axes. """
df_described = data_table.describe(percentiles=[
0.75], include='all')
df_described_T = df_described.T
print(df_described_T)
'''
DataFrame.count
Count number of non-NA/null observations.
DataFrame.max
Maximum of the values in the object.
DataFrame.min
Minimum of the values in the object.
DataFrame.mean
Mean of the values.
DataFrame.std
Standard deviation of the observations.
DataFrame.select_dtypes
Subset of a DataFrame including/excluding columns based on their dtype. '''
df_described_T['null'] = len(data_table)-df_described_T['count']
df_described_T['standard deviation'] = data_table.std()
print(df_described_T)
''' get the sepecified colums :(use a list contains column names) '''
df_described_5 = df_described_T[['null', 'max', 'min','mean', 'std']]
df_described_5.columns = [u'空值数', u'最大值', u'最小值', u'均值',u'标准差']
'''这里只选取部分探索结果。
describe()函数自动计算的字df = pd.DataFrame({'categorical': pd.Categorical(['d','e','f']),
'numeric': [1, 2, 3],
'object': ['a', 'b', 'c']
})段有count(非空值数)、unique(唯一值数)、top(频数最高者)、freq(最高频数)、mean(平均值)、std(标准差)、min(最小值)、50%(中位数)、max(最大值)'''
wb = Workbook()
ws = wb.active
for r in dataframe_to_rows(df_described_5, index=True, header=True):
ws.append(r)
wb.save(prefix+resultfile)
result:
![在这里插入图片描述]()