import pandas as pd
from sklearn.decomposition import PCA
def instacart():
# 1、获取数据
aisles = pd.read_csv("./instacart/aisles.csv")
order_products = pd.read_csv("./instacart/order_products.csv")
orders = pd.read_csv("./instacart/orders.csv")
products = pd.read_csv("./instacart/products.csv")
# 2、合并表
# 1)aisles 和 products 合并
tab1 = pd.merge(aisles,products,on=["aisle_id","aisle_id"])
# 2)合并tab1和products on=product_id tab2:aisle_id
tab2 = pd.merge(tab1, products, on=["product_id", "product_id"])
# 3)合并tab2和aisles on=aisle_id tab3:user_id, aisle
tab3 = pd.merge(tab2, aisles, on=["aisle_id", "aisle_id"])
# 3、交叉表处理,把user_id和aisle进行分组
table = pd.crosstab(tab3["user_id"], tab3["aisle"])
# 4、主成分分析的方法进行降维
# 1)实例化一个转换器类PCA
transfer = PCA(n_components=0.95)
# 2)fit_transform
data = transfer.fit_transform(table)
if __name__ == '__main__':
instacart()