从产品表中归纳出同类前缀再实验

表结构：

create table hy_product(
    id number(9,0) primary key,
    name nvarchar2(20) not null,
    price integer not null);

数据：

insert into hy_product(id,name,price) values('1','AA','10');
insert into hy_product(id,name,price) values('2','AA款','10');
insert into hy_product(id,name,price) values('3','AA屏风','10');
insert into hy_product(id,name,price) values('4','PDST','20');
insert into hy_product(id,name,price) values('5','PDST款','20');
insert into hy_product(id,name,price) values('6','PDST-TJ','20');
insert into hy_product(id,name,price) values('7','ASDF','20');
insert into hy_product(id,name,price) values('8','ASDF款','20');
insert into hy_product(id,name,price) values('9','ASDF-TJ','20');
insert into hy_product(id,name,price) values('10','PDF','20');
insert into hy_product(id,name,price) values('11','PDF款','20');
insert into hy_product(id,name,price) values('12','PDF-TJ','20');

需求：上表的name字段里，有AA款，AA，AA屏风等产品，实际上它们都是同一产品的不同表述。现在需要把前缀相同的归到最一类，举例来说AA款，AA，AA屏风等应该被归纳到AA类里。

期待值：按照需求，归类完的产品应该是AA,3;PDST,3;ASDF,3;PDF,3;四种。

昨天的文章 https://www.cnblogs.com/xiandedanteng/p/12709690.html 是边讲解边运行，SQL显得有点多，今天只把必要的列出。

创建辅助序列表：

create table tb_seq(
    id number(9,0) primary key);


insert into tb_seq
select rownum from dual
connect by level<16
order by dbms_random.random;

整理数据放到临时表里：

create table tb_tmp1 as select d.sery,d.cnt from
(select c.sery,count(*) as cnt from 
(select b.sery from
(select (case when a.namelen<a.cutlen then 'extra' else to_char(substr(a.name,1,a.cutlen)) end) as sery from
(select p.name,length(p.name) as namelen,seq.id as cutlen from hy_product p,(select id from tb_seq where id<=(select max(len) as maxlen from (select name,length(name) as len from hy_product)) order by id) seq) a) b
where b.sery<>'extra') c
group by c.sery) d
where length(d.sery)>1 and d.cnt>1
order by d.cnt desc,d.sery

最后从临时表里查询的SQL：

select *
from tb_tmp1
where LENGTH(REGEXP_REPLACE(REPLACE((select listagg(sery,',') within group(order by sery) from tb_tmp1 ), sery, '@'),  '[^@]+', ''))=1

查询结果：

可见结果与预期是完全相符的，昨晚今晨披星戴月写的SQL程序通过了考验。

以上使用的全部SQL：

create table hy_product(
    id number(9,0) primary key,
    name nvarchar2(20) not null,
    price integer not null);
    
insert into hy_product(id,name,price) values('1','AA','10');
insert into hy_product(id,name,price) values('2','AA款','10');
insert into hy_product(id,name,price) values('3','AA屏风','10');
insert into hy_product(id,name,price) values('4','PDST','20');
insert into hy_product(id,name,price) values('5','PDST款','20');
insert into hy_product(id,name,price) values('6','PDST-TJ','20');
insert into hy_product(id,name,price) values('7','ASDF','20');
insert into hy_product(id,name,price) values('8','ASDF款','20');
insert into hy_product(id,name,price) values('9','ASDF-TJ','20');
insert into hy_product(id,name,price) values('10','PDF','20');
insert into hy_product(id,name,price) values('11','PDF款','20');
insert into hy_product(id,name,price) values('12','PDF-TJ','20');

commit;


create table tb_seq(
    id number(9,0) primary key);


insert into tb_seq
select rownum from dual
connect by level<16
order by dbms_random.random;

create table tb_tmp1 as select d.sery,d.cnt from
(select c.sery,count(*) as cnt from 
(select b.sery from
(select (case when a.namelen<a.cutlen then 'extra' else to_char(substr(a.name,1,a.cutlen)) end) as sery from
(select p.name,length(p.name) as namelen,seq.id as cutlen from hy_product p,(select id from tb_seq where id<=(select max(len) as maxlen from (select name,length(name) as len from hy_product)) order by id) seq) a) b
where b.sery<>'extra') c
group by c.sery) d
where length(d.sery)>1 and d.cnt>1
order by d.cnt desc,d.sery

select *
from tb_tmp1
where LENGTH(REGEXP_REPLACE(REPLACE((select listagg(sery,',') within group(order by sery) from tb_tmp1 ), sery, '@'),  '[^@]+', ''))=1


drop table hy_product
drop table tb_seq
drop table tb_tmp1

--2020-04-16--

posted @ 2020-04-16 15:12 逆火狂飙阅读(185) 评论(1) 收藏举报

刷新页面返回顶部

从产品表中归纳出同类前缀 再实验

从产品表中归纳出同类前缀再实验