py

import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.manifold import TSNE
import plotly.express as px

# 1. 加载数据并转化为DataFrame
data = pd.read_csv('ma2.csv', header=None)

# 假设第一列至第三列分别为年龄聚类、城市聚类和满意度体验聚类
features_df = data.iloc[:, :3]

# 2. 对类别特征进行独热编码
ohc = OneHotEncoder(sparse=False)
encoded_data = ohc.fit_transform(features_df)

# 3. 使用t-SNE进行降维
tsne_model = TSNE(n_components=2, random_state=42)
transformed_data = tsne_model.fit_transform(encoded_data)

# 将原始分类信息与降维后的数据结合
combined_data = pd.DataFrame(transformed_data, columns=['Dim_1', 'Dim_2'])
combined_data['Satisfaction'] = features_df.iloc[:, 2]
combined_data['Age'] = features_df.iloc[:, 0]
combined_data['City'] = features_df.iloc[:, 1]

# 4. 创建交互式散点图
fig = px.scatter(combined_data,
x='Dim_1',
y='Dim_2',
color='Satisfaction',
hover_data=['Age', 'City'],
title='t-SNE Visualization with Combined Features Colors (Interactive)',
labels={'Satisfaction': '满意度', 'Age': '年龄聚类', 'City': '城市聚类'})

fig.show()

posted @ 2024-03-19 23:34  橘子味芬达水  阅读(63)  评论(0)    收藏  举报