day01_01数据集

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
def datasets_demo():
    # 获取数据集,iris是Bunch类型(字典类型)
    iris = load_iris()
    # print("鸢尾花数据集:",iris)
    # print("鸢尾花描述:",iris["DESCR"])
    # print("特征值的名字:",iris.feature_names)
    # print("特征值:",iris.data)
    # print("特征值中样本的个数和类别数:",iris.data.shape)
    # print("标签数组:",iris.target)
    # 数据集的划分,iris.data特征值,iris.target标签数组,test_size为比例(train数据为0.8,test数据为0.2),random_state随机数种子
    xtrain, xtext, ytrain, ytext = train_test_split(iris.data,iris.target,test_size=0.2,random_state=22) # 默认是0.25
    print(xtrain,xtrain.shape)
    print("="*100)
    print(ytrain,ytrain.shape)
    print("=" * 100)
    print(xtext,xtext.shape)
    print("=" * 100)
    print(ytext,ytext.shape)
if __name__ == '__main__':
    datasets_demo()

 

posted @ 2020-09-11 17:22  My帝王源  阅读(52)  评论(0)    收藏  举报