from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
def datasets_demo():
# 获取数据集,iris是Bunch类型(字典类型)
iris = load_iris()
# print("鸢尾花数据集:",iris)
# print("鸢尾花描述:",iris["DESCR"])
# print("特征值的名字:",iris.feature_names)
# print("特征值:",iris.data)
# print("特征值中样本的个数和类别数:",iris.data.shape)
# print("标签数组:",iris.target)
# 数据集的划分,iris.data特征值,iris.target标签数组,test_size为比例(train数据为0.8,test数据为0.2),random_state随机数种子
xtrain, xtext, ytrain, ytext = train_test_split(iris.data,iris.target,test_size=0.2,random_state=22) # 默认是0.25
print(xtrain,xtrain.shape)
print("="*100)
print(ytrain,ytrain.shape)
print("=" * 100)
print(xtext,xtext.shape)
print("=" * 100)
print(ytext,ytext.shape)
if __name__ == '__main__':
datasets_demo()