分析和爬取天猫,京东数据
1数据如下:

2.数据清洗:
#数据清洗,对颜色和尺寸数据进行标准化设置 #天猫数据清洗,颜色 ALTER TABLE t_sales ADD color_std VARCHAR(128) NOT NULL AFTER color; UPDATE t_sales SET color_std = '黑色' WHERE color LIKE '%黑%'; UPDATE t_sales SET color_std = '灰色' WHERE color LIKE '%灰%'; UPDATE t_sales SET color_std = '红色' WHERE color LIKE '%红%'; UPDATE t_sales SET color_std = '紫色' WHERE color LIKE '%紫%'; UPDATE t_sales SET color_std = '绿色' WHERE color LIKE '%绿%'; UPDATE t_sales SET color_std = '肤色' WHERE color LIKE '%肤%'; UPDATE t_sales SET color_std = '蓝色' WHERE color LIKE '%蓝%'; UPDATE t_sales SET color_std = '青色' WHERE color LIKE '%青%'; UPDATE t_sales SET color_std = '卡其色' WHERE color LIKE '%卡其%'; UPDATE t_sales SET color_std = '玫瑰色' WHERE color LIKE '%玫瑰%'; UPDATE t_sales SET color_std = '银灰' WHERE color LIKE '%银灰%'; UPDATE t_sales SET color_std = '水晶虾' WHERE color LIKE '%水晶虾%'; # 尺寸 ALTER TABLE t_sales ADD size1 VARCHAR(128) NOT NULL AFTER size; ALTER TABLE t_sales ADD size2 VARCHAR(128) NOT NULL AFTER size1; UPDATE t_sales SET size1 = 'AA' WHERE size LIKE '%AA%'; UPDATE t_sales SET size1 = 'A' WHERE size LIKE '%A%' and size not like '%AA%'; UPDATE t_sales SET size1 = 'B' WHERE size LIKE '%B%'; UPDATE t_sales SET size1 = 'C' WHERE size LIKE '%C%'; UPDATE t_sales SET size1 = 'D' WHERE size LIKE '%D%'; UPDATE t_sales SET size2 = '70' WHERE size LIKE '%70%'; UPDATE t_sales SET size2 = '75' WHERE size LIKE '%75%'; UPDATE t_sales SET size2 = '80' WHERE size LIKE '%80%'; UPDATE t_sales SET size2 = '85' WHERE size LIKE '%85%'; UPDATE t_sales SET size2 = '90' WHERE size LIKE '%90%';
清洗完成数据:

3.计算比列:
#原生sql语句计算比列 #计算天猫数据 SELECT 'A' AS 分类, (100*COUNT(*))/(SELECT COUNT(*) FROM t_sales WHERE SIZE1 IS NOT NULL) AS 比例 FROM t_sales WHERE SIZE1='A' UNION ALL SELECT 'B' AS 分类, (100*COUNT(*))/(SELECT COUNT(*) FROM t_sales WHERE SIZE1 IS NOT NULL) AS 比例 FROM t_sales WHERE SIZE1='B' UNION ALL SELECT 'C' AS 分类, (100*COUNT(*))/(SELECT COUNT(*) FROM t_sales WHERE SIZE1 IS NOT NULL) AS 比例 FROM t_sales WHERE SIZE1='C' UNION ALL SELECT 'D' AS 分类, (100*COUNT(*))/(SELECT COUNT(*) FROM t_sales WHERE SIZE1 IS NOT NULL) AS 比例 FROM t_sales WHERE SIZE1='D';
结果如下:

4.按销量排序:
#按销量排序 SELECT 'A' AS 分类, (100*COUNT(*))/(SELECT COUNT(*) FROM t_sales WHERE SIZE1 IS NOT NULL) AS 比例, COUNT(*) AS 销量 FROM t_sales WHERE SIZE1='A' UNION ALL SELECT 'B' AS 分类, (100*COUNT(*))/(SELECT COUNT(*) FROM t_sales WHERE SIZE1 IS NOT NULL) AS 比例, COUNT(*) AS 销量 FROM t_sales WHERE SIZE1='B' UNION ALL SELECT 'C' AS 分类, (100*COUNT(*))/(SELECT COUNT(*) FROM t_sales WHERE SIZE1 IS NOT NULL) AS 比例, COUNT(*) AS 销量 FROM t_sales WHERE SIZE1='C' UNION ALL SELECT 'D' AS 分类, (100*COUNT(*))/(SELECT COUNT(*) FROM t_sales WHERE SIZE1 IS NOT NULL) AS 比例, COUNT(*) AS 销量 FROM t_sales WHERE SIZE1='D' ORDER BY 销量 DESC;


浙公网安备 33010602011771号