从产品表中归纳出同类前缀 再实验

表结构:

create table hy_product(
    id number(9,0) primary key,
    name nvarchar2(20) not null,
    price integer not null);

数据:

insert into hy_product(id,name,price) values('1','AA','10');
insert into hy_product(id,name,price) values('2','AA款','10');
insert into hy_product(id,name,price) values('3','AA屏风','10');
insert into hy_product(id,name,price) values('4','PDST','20');
insert into hy_product(id,name,price) values('5','PDST款','20');
insert into hy_product(id,name,price) values('6','PDST-TJ','20');
insert into hy_product(id,name,price) values('7','ASDF','20');
insert into hy_product(id,name,price) values('8','ASDF款','20');
insert into hy_product(id,name,price) values('9','ASDF-TJ','20');
insert into hy_product(id,name,price) values('10','PDF','20');
insert into hy_product(id,name,price) values('11','PDF款','20');
insert into hy_product(id,name,price) values('12','PDF-TJ','20');

需求:上表的name字段里,有AA款,AA,AA屏风等产品,实际上它们都是同一产品的不同表述。现在需要把前缀相同的归到最一类,举例来说AA款,AA,AA屏风等应该被归纳到AA类里。

期待值:按照需求,归类完的产品应该是AA,3;PDST,3;ASDF,3;PDF,3;四种。

 

昨天的文章 https://www.cnblogs.com/xiandedanteng/p/12709690.html 是边讲解边运行,SQL显得有点多,今天只把必要的列出。

创建辅助序列表:

create table tb_seq(
    id number(9,0) primary key);


insert into tb_seq
select rownum from dual
connect by level<16
order by dbms_random.random;

整理数据放到临时表里:

create table tb_tmp1 as select d.sery,d.cnt from
(select c.sery,count(*) as cnt from 
(select b.sery from
(select (case when a.namelen<a.cutlen then 'extra' else to_char(substr(a.name,1,a.cutlen)) end) as sery from
(select p.name,length(p.name) as namelen,seq.id as cutlen from hy_product p,(select id from tb_seq where id<=(select max(len) as maxlen from (select name,length(name) as len from hy_product)) order by id) seq) a) b
where b.sery<>'extra') c
group by c.sery) d
where length(d.sery)>1 and d.cnt>1
order by d.cnt desc,d.sery

最后从临时表里查询的SQL:

select *
from tb_tmp1
where LENGTH(REGEXP_REPLACE(REPLACE((select listagg(sery,',') within group(order by sery) from tb_tmp1 ), sery, '@'),  '[^@]+', ''))=1

 

查询结果:

可见结果与预期是完全相符的,昨晚今晨披星戴月写的SQL程序通过了考验。

 

以上使用的全部SQL:

create table hy_product(
    id number(9,0) primary key,
    name nvarchar2(20) not null,
    price integer not null);
    
insert into hy_product(id,name,price) values('1','AA','10');
insert into hy_product(id,name,price) values('2','AA款','10');
insert into hy_product(id,name,price) values('3','AA屏风','10');
insert into hy_product(id,name,price) values('4','PDST','20');
insert into hy_product(id,name,price) values('5','PDST款','20');
insert into hy_product(id,name,price) values('6','PDST-TJ','20');
insert into hy_product(id,name,price) values('7','ASDF','20');
insert into hy_product(id,name,price) values('8','ASDF款','20');
insert into hy_product(id,name,price) values('9','ASDF-TJ','20');
insert into hy_product(id,name,price) values('10','PDF','20');
insert into hy_product(id,name,price) values('11','PDF款','20');
insert into hy_product(id,name,price) values('12','PDF-TJ','20');

commit;


create table tb_seq(
    id number(9,0) primary key);


insert into tb_seq
select rownum from dual
connect by level<16
order by dbms_random.random;

create table tb_tmp1 as select d.sery,d.cnt from
(select c.sery,count(*) as cnt from 
(select b.sery from
(select (case when a.namelen<a.cutlen then 'extra' else to_char(substr(a.name,1,a.cutlen)) end) as sery from
(select p.name,length(p.name) as namelen,seq.id as cutlen from hy_product p,(select id from tb_seq where id<=(select max(len) as maxlen from (select name,length(name) as len from hy_product)) order by id) seq) a) b
where b.sery<>'extra') c
group by c.sery) d
where length(d.sery)>1 and d.cnt>1
order by d.cnt desc,d.sery

select *
from tb_tmp1
where LENGTH(REGEXP_REPLACE(REPLACE((select listagg(sery,',') within group(order by sery) from tb_tmp1 ), sery, '@'),  '[^@]+', ''))=1


drop table hy_product
drop table tb_seq
drop table tb_tmp1

--2020-04-16--

posted @ 2020-04-16 15:12  逆火狂飙  阅读(184)  评论(1)    收藏  举报
生当作人杰 死亦为鬼雄 至今思项羽 不肯过江东