字符串的模糊搜索

Array.prototype.findSort=function(num1) {
var hasSortArr=this;
var l=0,r=hasSortArr.length;
var lock=-1;
var dis=0;
var dis1=0,dis2=0;
if(hasSortArr.length>0){
dis1=num1-hasSortArr[0];
if(dis1<=0){
return [0,dis1]
}
dis2=num1-hasSortArr[r-1];
if(dis2>=0){
return [r-1,dis2]
}
while(lock===-1){
var m=(l+r+1)>>1;
//比较下坐标大小
dis=num1-hasSortArr[m]
if(dis>0){
if(m+1===r){
if(dis>-dis2){
lock=r;
dis=dis2;
}else{
lock=m;
}
}else{
dis1=dis;
l=m
}
}else if(dis<0){
if(l+1===m){
if(-dis>=dis1){
lock=l;
dis=dis1;
}else{
lock=m;
}
}else{
dis2=dis;
r=m
}
}else{
lock=m;
}
}
return [lock,dis]
}
return [lock,dis]
}
//比较两字符的相等长度和大小
function compareLen(n1,n2,str1,str2) {
//求出相等部分
var len=0;
let dis=0;
let lock=true;
while (dis===0&&lock){
var c1=str1.charCodeAt(n1+len)
var c2=str2.charCodeAt(n2+len)
if(n1+len<str1.length&&n2+len<str2.length){
if(c1>c2){
dis=1;
}else if(c1<c2){
dis=-1;
}else if(c1===c2){
len++;
}
}else if(n1+len>=str1.length&&n2+len>=str2.length){
lock=false
}else if(n1+len<str1.length){
dis=1;
}else if(n2+len<str2.length){
dis=-1;
}

}
return [len,dis]
}

//查找字符在数组的最大相等长度和大小
function findLen(str,hasSortArr,callback) {
var l=0,r=hasSortArr.length;
var lock=-1;
var len1=0,len2=0;
var len=0,dis=0;

if(hasSortArr.length>0){
[len1,dis]=callback(str,hasSortArr[0]);
if(dis<1){
return [0,len1,dis]
}
[len2,dis]=callback(str,hasSortArr[r-1]);
if(dis>-1){
return [r-1,len2,dis]
}
while(lock===-1){
var m=(l+r+1)>>1;
//比较下坐标大小
[len,dis]=callback(str,hasSortArr[m])

if(dis===1){
if(m+1===r){
if(len<len2){
lock=r;
len=len2;
dis=-1
}else{
lock=m;
}
}else{
len1=len;
l=m
}
}else if(dis===-1){
if(l+1===m){
if(len<len1){
lock=l;
len=len1;
dis=1
}else{
lock=m;
}
}else{
len2=len;
r=m
}
}else{
lock=m;
}
}
return [lock,len,dis]
}
return [lock,0,1]
}

class SearchClass {
constructor(data) {
this.data=data;

var bitMap1=[]
var bitMap2=[]
data.forEach(function (str,j) {
for(let i=0;i<str.length;i++){
bitMap1.push(i)
bitMap2.push(j)
}
})
this.bitMap1=bitMap1;
this.bitMap2=bitMap2;
this.saArr=this.getSa();
console.log(this.saArr)
}
getSa(){
var data=this.data;
var bitMap1=this.bitMap1;
var bitMap2=this.bitMap2;
var sLen=bitMap1.length;//总共排名长度
//后缀数组
var sa=[];
for(var i=0;i<sLen;i++){
var [n,len,dis]=findLen(i,sa,function (n1,n2) {
return compareLen(bitMap1[n1],bitMap1[n2],data[bitMap2[n1]],data[bitMap2[n2]])
})
if(dis===1){
sa.splice(n+1,0,i)
}else{
sa.splice(n,0,i)
}
}
return sa
}
_getRange(key,m=0){
const bitMap1=this.bitMap1
const bitMap2=this.bitMap2
const data=this.data
const saArr=this.saArr
var [n,len,dis]=findLen(m,saArr,function (n1,n2) {
return compareLen(n1,bitMap1[n2],key,data[bitMap2[n2]])
})
let n1=n;
let n2=n+1;
if(len>0){
const kk=data[bitMap2[saArr[n]]].substr(bitMap1[saArr[n]],len);

while(n1>0&&data[bitMap2[saArr[n1-1]]].substr(bitMap1[saArr[n1-1]],len)===kk){
n1--
}
while(n2<saArr.length&&data[bitMap2[saArr[n2]]].substr(bitMap1[saArr[n2]],len)===kk){
n2++;
}
}
return [n1,n2,len]
}
getMaxSameByM(key,m=0){
const bitMap1=this.bitMap1
const bitMap2=this.bitMap2
const data=this.data
const saArr=this.saArr
const [n1,n2,len]=this._getRange(key,m)
if(len===0){
return
}

const arr1=[]
const arr2=[]
let maxFz=0;
let maxFm=len;
let rLen=0;
for(let i=n1;i<n2;i++){
const mid=saArr[i];
const bt1=bitMap1[mid]
const bt2=bitMap2[mid]
let left=m;
let right=m+len;

let left2=bt1;
let right2=bt1+len;

let curFz=len;
const str=data[bt2];
const curFm=str.length;
while(left2>0){
left--;
left2--;
if(str[left2]===key[left]){
curFz++
}
}
while(right2<curFm){
if(str[right2]===key[right]){
curFz++
}
right++;
right2++;
}

if(curFz===curFm){
arr1.push([curFz,curFm,bt2,left])
if(curFz>rLen){
rLen=curFz
}
if(arr2.length>0&&rLen>=arr2[0][0]){
arr2.splice(0,arr2.length)
}
}else if(curFz>rLen){
if(curFz*maxFm>maxFz*curFm) {
maxFz = curFz;
maxFm = curFm;
arr2.splice(0,arr2.length)
arr2.push([curFz,curFm,bt2,left])
}else if(curFz*maxFm===maxFz*curFm){
arr2.push([curFz,curFm,bt2,left])
}
}
}
return arr1.concat(arr2)
}
getMaxSame(str){
const data2=[]
const hasMap={};
for(let i=0;i<str.length;i++){
const arr2=this.getMaxSameByM(str,i);
if(arr2){
arr2.forEach(function (item) {
const key=item.join()
if(!hasMap[key]){
hasMap[key]=true
let isMin=false;
const isRect=item[0]===item[1];
for(let j=data2.length-1;j>=0;j--){
const pos=data2[j]
//是否相交
if(!(item[3]+item[1]<=pos[3]||pos[3]+pos[1]<=item[3])){
if(isRect){
if(pos[1]<=item[1]){
data2.splice(j,1)
}
}else if(pos[0]===pos[1]){
if(pos[0]>=item[0]){
isMin=true
break;
}
}else{
const dis=item[0]*pos[1]-pos[0]*item[1]
//删除
if(dis>0){
data2.splice(j,1)
}else if(dis<0){
isMin=true
break;
}
}
}
}
if(!isMin){
data2.push(item)
}
}
})
}
}
return data2;
}
}
module.exports=SearchClass;


// sLen,aLen,pos.start
const obj=new SearchClass(['123','23','2340','2350'])
console.time('log')
console.log(obj.getMaxSame('3400232340'))
console.timeEnd('log')
posted @ 2022-01-03 21:58  巅峰蜗牛  阅读(223)  评论(0编辑  收藏  举报