#字典特征提取
def dict_dome():
data=[{"city":"北京","temperature":100},{"city":"上海","temperature":100},{"city":"深圳","temperature":100}]
#1.实例化一个转换类器
transfer=DictVectorizer(sparse=False)#sparse稀疏矩阵 将非零值按位置表示出来
#2.调用fit_transform()
data_new=transfer.fit_transform(data)
print(data_new)
print(transfer.get_feature_names())
#对文章进行特征提取
def count_dome():
data=["Life is short,i like python","Life is too long,i dislike python"]
#1.实例化一个转换器对象
transfer=CountVectorizer(stop_words=[])#停用词
#2.调用fit_transform()
data_new=transfer.fit_transform(data)
print(data_new.toarray())
print(transfer.get_feature_names())
#数据预处理:归一化
def minmax_demo():
#1.获取文件
data=pd.read_csv("dating.txt")
data=data.iloc[:, :3]
#2.实例化一个转换器类
transfer=MinMaxScaler()
#3.调用fit_transform
data_new=transfer.fit_transform(data)
print(data_new)
#数据预处理:标准化
def stand_demo():
# 1.获取文件
data = pd.read_csv("dating.txt")
data = data.iloc[:, :3]
# 2.实例化一个转换器类
transfer = StandardScaler()
# 3.调用fit_transform
data_new = transfer.fit_transform(data)
print(data_new)