分析和爬取天猫,京东数据

1数据如下:

    

2.数据清洗:

#数据清洗,对颜色和尺寸数据进行标准化设置

#天猫数据清洗,颜色
ALTER TABLE t_sales ADD color_std VARCHAR(128) NOT NULL AFTER color;

UPDATE t_sales SET color_std = '黑色' WHERE color LIKE '%黑%';
UPDATE t_sales SET color_std = '灰色' WHERE color LIKE '%灰%';
UPDATE t_sales SET color_std = '红色' WHERE color LIKE '%红%';
UPDATE t_sales SET color_std = '紫色' WHERE color LIKE '%紫%';
UPDATE t_sales SET color_std = '绿色' WHERE color LIKE '%绿%';
UPDATE t_sales SET color_std = '肤色' WHERE color LIKE '%肤%';
UPDATE t_sales SET color_std = '蓝色' WHERE color LIKE '%蓝%';
UPDATE t_sales SET color_std = '青色' WHERE color LIKE '%青%';
UPDATE t_sales SET color_std = '卡其色' WHERE color LIKE '%卡其%';
UPDATE t_sales SET color_std = '玫瑰色' WHERE color LIKE '%玫瑰%';
UPDATE t_sales SET color_std = '银灰' WHERE color LIKE '%银灰%';
UPDATE t_sales SET color_std = '水晶虾' WHERE color LIKE '%水晶虾%';


# 尺寸
ALTER TABLE t_sales ADD size1 VARCHAR(128) NOT NULL AFTER size;
ALTER TABLE t_sales ADD size2 VARCHAR(128) NOT NULL AFTER size1;

UPDATE t_sales SET size1 = 'AA' WHERE size LIKE '%AA%';
UPDATE t_sales SET size1 = 'A' WHERE size LIKE '%A%' and size not like '%AA%';
UPDATE t_sales SET size1 = 'B' WHERE size LIKE '%B%';
UPDATE t_sales SET size1 = 'C' WHERE size LIKE '%C%';
UPDATE t_sales SET size1 = 'D' WHERE size LIKE '%D%';

UPDATE t_sales SET size2 = '70' WHERE size LIKE '%70%';
UPDATE t_sales SET size2 = '75' WHERE size LIKE '%75%';
UPDATE t_sales SET size2 = '80' WHERE size LIKE '%80%';
UPDATE t_sales SET size2 = '85' WHERE size LIKE '%85%';
UPDATE t_sales SET size2 = '90' WHERE size LIKE '%90%';

 清洗完成数据:

      

3.计算比列:

#原生sql语句计算比列
#计算天猫数据
SELECT 'A' AS 分类, (100*COUNT(*))/(SELECT COUNT(*) FROM t_sales WHERE SIZE1 IS NOT NULL) AS 比例 FROM t_sales WHERE SIZE1='A'
UNION ALL
SELECT 'B' AS 分类, (100*COUNT(*))/(SELECT COUNT(*) FROM t_sales WHERE SIZE1 IS NOT NULL) AS 比例 FROM t_sales WHERE SIZE1='B'
UNION ALL
SELECT 'C' AS 分类, (100*COUNT(*))/(SELECT COUNT(*) FROM t_sales WHERE SIZE1 IS NOT NULL) AS 比例 FROM t_sales WHERE SIZE1='C'
UNION ALL
SELECT 'D' AS 分类, (100*COUNT(*))/(SELECT COUNT(*) FROM t_sales WHERE SIZE1 IS NOT NULL) AS 比例 FROM t_sales WHERE SIZE1='D';

结果如下:

    

 4.按销量排序:

#按销量排序
SELECT 'A' AS 分类, (100*COUNT(*))/(SELECT COUNT(*) FROM t_sales WHERE SIZE1 IS NOT NULL) AS 比例, COUNT(*) AS 销量 FROM t_sales WHERE SIZE1='A'
UNION ALL
SELECT 'B' AS 分类, (100*COUNT(*))/(SELECT COUNT(*) FROM t_sales WHERE SIZE1 IS NOT NULL) AS 比例, COUNT(*) AS 销量 FROM t_sales WHERE SIZE1='B'
UNION ALL
SELECT 'C' AS 分类, (100*COUNT(*))/(SELECT COUNT(*) FROM t_sales WHERE SIZE1 IS NOT NULL) AS 比例, COUNT(*) AS 销量 FROM t_sales WHERE SIZE1='C'
UNION ALL
SELECT 'D' AS 分类, (100*COUNT(*))/(SELECT COUNT(*) FROM t_sales WHERE SIZE1 IS NOT NULL) AS 比例, COUNT(*) AS 销量 FROM t_sales WHERE SIZE1='D'
ORDER BY 销量 DESC;

    

 

posted @ 2024-09-21 14:21  silence_cho  阅读(61)  评论(0)    收藏  举报