import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets,preprocessing
from sklearn.model_selection import learning_curve
from sklearn.model_selection import train_test_split,GridSearchCV,cross_val_score
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
from pandas import read_csv
data_set=read_csv("Smarket.csv")
data = data_set.values[:,:]
# 平均数
np.mean(data_set)
np.mean(data_set["Lag1"])
# 中位数
np.median(data_set["Lag1"])
# 方差
np.var(data_set)
#标准差
np.std(data_set)
# 极差
np.ptp(data_set["Lag1"])
# 两组数据的相关性矩阵
np.cov(data_set["Lag1"],data_set["Lag2"])
# 协方差矩阵
np.corrcoef(data_set["Lag1"],data_set["Lag2"])
# 分位数
q1=data_set.quantile(0.25)
q2=data_set.quantile(0.5)
q3=data_set.quantile(0.75)
# 汇总统计
data_set.describe()
#显示所有列
pd.set_option('display.max_columns', None)
#显示所有行
pd.set_options('display.max_rows', None)