day01_05instacart综合案例

import pandas as pd
from sklearn.decomposition import PCA
def instacart():
    # 1、获取数据
    aisles = pd.read_csv("./instacart/aisles.csv")
    order_products = pd.read_csv("./instacart/order_products.csv")
    orders = pd.read_csv("./instacart/orders.csv")
    products = pd.read_csv("./instacart/products.csv")
    # 2、合并表
    # 1)aisles 和 products 合并
    tab1 = pd.merge(aisles,products,on=["aisle_id","aisle_id"])
    # 2)合并tab1和products on=product_id tab2:aisle_id
    tab2 = pd.merge(tab1, products, on=["product_id", "product_id"])
    # 3)合并tab2和aisles on=aisle_id tab3:user_id, aisle
    tab3 = pd.merge(tab2, aisles, on=["aisle_id", "aisle_id"])
    # 3、交叉表处理,把user_id和aisle进行分组
    table = pd.crosstab(tab3["user_id"], tab3["aisle"])
    # 4、主成分分析的方法进行降维
    # 1)实例化一个转换器类PCA
    transfer = PCA(n_components=0.95)
    # 2)fit_transform
    data = transfer.fit_transform(table)

if __name__ == '__main__':
    instacart()

 

posted @ 2020-09-11 17:29  My帝王源  阅读(119)  评论(0)    收藏  举报