redis6.0.5之t_string阅读笔记--字符串键2算法LCS

*********************************************************************************************************************
/* STRALGO -- Implement complex algorithms on strings.
STRALGO 字符串算法  在字符串上实现复杂的算法
 *
 * STRALGO <algorithm> ... arguments ... */  STRALGO 算法名字  参数
void stralgoLCS(client *c);     /* This implements the LCS algorithm. */  这个函数事项了LCS算法
void stralgoCommand(client *c) {
    /* Select the algorithm. */
    if (!strcasecmp(c->argv[1]->ptr,"lcs")) {  目前只支持LCS算法
        stralgoLCS(c);
    } else {
        addReply(c,shared.syntaxerr);
    }
}
*********************************************************************************************************************
/* STRALGO <algo> [IDX] [MINMATCHLEN <len>] [WITHMATCHLEN]
 *     STRINGS <string> <string> | KEYS <keya> <keyb>
 */
使用格式  
STRALGO  算法名字 匹配位置索引 最小匹配长度 长度值  具体匹配的长度(每段的长度)  
如果是外面输入的字符串，那么采用  STRINGS <string> <string> 这种模式
stralgo lcs idx strings mytencenttest123 mybaidutest123   withmatchlen
如果是库中字符串，那么可以采用如下模式
stralgo lcs idx keys k1 k2   withmatchlen
其中k1="mytencenttest123"  k2="mybaidutest123"

void stralgoLCS(client *c) {
    uint32_t i, j;
    long long minmatchlen = 0;
    sds a = NULL, b = NULL;
    int getlen = 0, getidx = 0, withmatchlen = 0;
    robj *obja = NULL, *objb = NULL;

    for (j = 2; j < (uint32_t)c->argc; j++) {  从第三个传入参数开始
        char *opt = c->argv[j]->ptr;
        int moreargs = (c->argc-1) - j; 是否存在更多的传入参数，就是当前参数后面是否还有参数

        if (!strcasecmp(opt,"IDX")) {   如果需要记录同样字符位置，那么设置记录位置的标志为1
            getidx = 1;
        } else if (!strcasecmp(opt,"LEN")) {  如果需要获取相同字符的总长度，那么设置获取长度的标志为1
            getlen = 1;
        } else if (!strcasecmp(opt,"WITHMATCHLEN")) {  获取每段匹配的长度
            withmatchlen = 1;
        } else if (!strcasecmp(opt,"MINMATCHLEN") && moreargs) {  设置最小匹配长度（这样可以使得输出变少，看的清楚）
            if (getLongLongFromObjectOrReply(c,c->argv[j+1],&minmatchlen,NULL)  获取最小长度
                != C_OK) return;
            if (minmatchlen < 0) minmatchlen = 0;  最小长度不能小于0
            j++;
        } else if (!strcasecmp(opt,"STRINGS") && moreargs > 1) {
            if (a != NULL) {  已经存在参数，说明传入的参数冲突
                addReplyError(c,"Either use STRINGS or KEYS");
                return;
            }
            a = c->argv[j+1]->ptr; 获取后面跟着的输入字符串参数
            b = c->argv[j+2]->ptr;
            j += 2;
        } else if (!strcasecmp(opt,"KEYS") && moreargs > 1) {
            if (a != NULL) {
                addReplyError(c,"Either use STRINGS or KEYS");
                return;
            }
            obja = lookupKeyRead(c->db,c->argv[j+1]);  从字段中根据传入的键查找值
            objb = lookupKeyRead(c->db,c->argv[j+2]);
            obja = obja ? getDecodedObject(obja) : createStringObject("",0);  存在就解码不存在就新建一个空串
            objb = objb ? getDecodedObject(objb) : createStringObject("",0);
            a = obja->ptr;
            b = objb->ptr;
            j += 2;
        } else {
            addReply(c,shared.syntaxerr);   不在目标的字符串中，返回格式错误
            return;
        }
    }

    /* Complain if the user passed ambiguous parameters. */  提示 如果用户传入模糊的参数
    if (a == NULL) {
        addReplyError(c,"Please specify two strings: "  没有传入具体的字符串键或者参数
                        "STRINGS or KEYS options are mandatory");   字符串或者键 是必选参数
        return;
    } else if (getlen && getidx) {  如果想要长度和位置索引，值需要要使用位置索引即可
        addReplyError(c,
            "If you want both the length and indexes, please "
            "just use IDX.");
        return;
    }

    使用一般动态规划计数构建一个LCS(x,y)的子串的表格来计算LCS
    /* Compute the LCS using the vanilla dynamic programming technique of
     * building a table of LCS(x,y) substrings. */

    uint32_t alen = sdslen(a);  a字符串长度
    uint32_t blen = sdslen(b);  b字符串长度

    /* Setup an uint32_t array to store at LCS[i,j] the length of the
     * LCS A0..i-1, B0..j-1. Note that we have a linear array here, so
     * we index it as LCS[j+(blen+1)*j] */
创建一个uint32_t类型的数组来保存LCS[i,j] 所在位置 LCS A0..i-1, B0..j-1的长度。
注意到我们这里使用了一个线性的数组，所以我们定位需要使用LCS[j+(blen+1)*i] 这里的j估计为笔误
     
    uint32_t *lcs = zmalloc((alen+1)*(blen+1)*sizeof(uint32_t));多出一行一列，为了存储0的行列
    #define LCS(A,B) lcs[(B)+((A)*(blen+1))] 将二维地址转化为一维地址

    /* Start building the LCS table. */  开始构建LCS表格
    for (uint32_t i = 0; i <= alen; i++) {
        for (uint32_t j = 0; j <= blen; j++) {
            if (i == 0 || j == 0) {
                /* If one substring has length of zero, the
                 * LCS length is zero. */ 如果一个序列的长度是0，那么LCS的长度必然是0
                LCS(i,j) = 0;
            } else if (a[i-1] == b[j-1]) {  如果最后一个字符相同，那么值需要求除去最后一个字符的两个串的最大LCS长度即可
                /* The len LCS (and the LCS itself) of two，
                 * sequences with the same final character, is the
                 * LCS of the two sequences without the last char
                 * plus that last char. */
                LCS(i,j) = LCS(i-1,j-1)+1;
            } else {
               如果两个串最后一个字符不同，那么需要比较两种情况
               一是去掉前一个串的最后一个字符和后一个串的最长LCS
               二是去掉后一个串的最后一个字符和前一个串的最长LCS
               比较这两中情况，获取最大值即可
                /* If the last character is different, take the longest
                 * between the LCS of the first string and the second
                 * minus the last char, and the reverse. */
                uint32_t lcs1 = LCS(i-1,j);
                uint32_t lcs2 = LCS(i,j-1);
                LCS(i,j) = lcs1 > lcs2 ? lcs1 : lcs2;
            }
        }
    }

    /* Store the actual LCS string in "result" if needed. We create
     * it backward, but the length is already known, we store it into idx. */
     如果需要，保存实际的LCS字符串在result变量中。我们反向创建这个结果，
     LCS的长度已经知道了，我们保存到变量idx
    uint32_t idx = LCS(alen,blen);
    sds result = NULL;        /* Resulting LCS string. */  保存结果串
    void *arraylenptr = NULL; /* Deffered length of the array for IDX. */ IDX数组的偏移长度
    uint32_t arange_start = alen, /* alen signals that values are not set. */ alen表示没有设置值
             arange_end = 0,
             brange_start = 0,
             brange_end = 0;

    /* Do we need to compute the actual LCS string? Allocate it in that case. */
    我们需要实际的计算LCS的字符串吗？ 需要的话就分配它
    int computelcs = getidx || !getlen;
    if (computelcs) result = sdsnewlen(SDS_NOINIT,idx); 

    /* Start with a deferred array if we have to emit the ranges. */ 
    如果我们需要找出具体匹配字符的范围，那么需要一个关联的数组来保存
    uint32_t arraylen = 0;  /* Number of ranges emitted in the array. */  连续相同字符串范围个数
    if (getidx) {
        addReplyMapLen(c,2);
        addReplyBulkCString(c,"matches");
        arraylenptr = addReplyDeferredLen(c);
    }

    i = alen, j = blen;  从最末尾的一个格子开始
    while (computelcs && i > 0 && j > 0) {
        int emit_range = 0;
        if (a[i-1] == b[j-1]) {
            /* If there is a match, store the character and reduce
             * the indexes to look for a new match. */  
             如果两个字符串最末未的字符是相等的，那么保存这个字符，减少索引值寻找一个新的匹配字符
            result[idx-1] = a[i-1];

            /* Track the current range. */ 跟踪当前的范围(用前后两个位置来标识出一个范围)
            if (arange_start == alen) {  如果是最后一个字符
                arange_start = i-1;
                arange_end = i-1;
                brange_start = j-1;
                brange_end = j-1;
            } else {
                /* Let's see if we can extend the range backward since
                 * it is contiguous. */
                 如果我们可以回溯扩展范围，如果相同的字符是连续的
                if (arange_start == i && brange_start == j) {
                    arange_start--;
                    brange_start--;
                } else {
                    emit_range = 1;
                }
            }
            /* Emit the range if we matched with the first byte of
             * one of the two strings. We'll exit the loop ASAP. */ 
             如果我们已经回溯到了任何一个字符串的首字母，我们就可以立即退出了
            if (arange_start == 0 || brange_start == 0) emit_range = 1;
            idx--; i--; j--; 正常情况，全部减一回溯
        } else {
            /* Otherwise reduce i and j depending on the largest
             * LCS between, to understand what direction we need to go. */
             最后一个字符不相同，这种清下，望那个方向前进需要根据最大LCS的长度来决定，看看是减少i还是减少j
            uint32_t lcs1 = LCS(i-1,j);
            uint32_t lcs2 = LCS(i,j-1);
            if (lcs1 > lcs2) 如果i方向LCS大，那么减少i
                i--;
            else
                j--;
            if (arange_start != alen) emit_range = 1;  如果不是开始值，那么说明开始一个新的段了
        }

        /* Emit the current range if needed. */ 如果需要，返回当前的范围
        uint32_t match_len = arange_end - arange_start + 1;
        if (emit_range) {
            if (minmatchlen == 0 || match_len >= minmatchlen) {
                if (arraylenptr) {
                    addReplyArrayLen(c,2+withmatchlen);
                    addReplyArrayLen(c,2);
                    addReplyLongLong(c,arange_start);
                    addReplyLongLong(c,arange_end);
                    addReplyArrayLen(c,2);
                    addReplyLongLong(c,brange_start);
                    addReplyLongLong(c,brange_end);
                    if (withmatchlen) addReplyLongLong(c,match_len);
                    arraylen++; 范围个数加1
                }
            }
            arange_start = alen; /* Restart at the next match. */ 开启下一个匹配范围
        }
    }

    /* Signal modified key, increment dirty, ... */  发出修改键的信息，增加修改键的计数等等

    /* Reply depending on the given options. */  基于输入参数返回输出
    if (arraylenptr) { 返回LCS长度和最后一个区间的长度
        addReplyBulkCString(c,"len");
        addReplyLongLong(c,LCS(alen,blen));
        setDeferredArrayLen(c,arraylenptr,arraylen);
    } else if (getlen) {
        addReplyLongLong(c,LCS(alen,blen));
    } else {
        addReplyBulkSds(c,result);
        result = NULL;
    }

    /* Cleanup. */  释放引用和内存
    if (obja) decrRefCount(obja);
    if (objb) decrRefCount(objb);
    sdsfree(result);
    zfree(lcs);
    return;
}

************************************************************************************************************
posted on 2021-07-06 17:01 子虚乌有阅读(98) 评论(0) 收藏举报