redis之sds(simple dynamic string)阅读笔记7-sds之字符串常用函数1

redis之sds(simple dynamic string)阅读笔记7-sds之字符串常用函数1
**********************************************************************
函数sdstrim  去除字符串左右两边连续的在特定字符集中的字符
/* Remove the part of the string from left and from right composed just of
 * contiguous characters found in 'cset', that is a null terminted C string.
去除字符串左右两边的连续的在特定字符集'cset'中的字符，这个字符串以/0结尾
 * After the call, the modified sds string is no longer valid and all the
 * references must be substituted with the new pointer returned by the call.
 *
 * Example:
 *
 * s = sdsnew("AA...AA.a.aa.aHelloWorld     :::");
 * s = sdstrim(s,"Aa. :");
 * printf("%s\n", s);
 *
 * Output will be just "HelloWorld".
 */
以上的例子太完美，不是很好，举例如下
sds mys = sdsnew("AA...AeA.a.aa.aHelloWorld     :nihao::");
mys = sdstrim(mys,"Aa. :");
printf("%s\n",mys); //结果为 eA.a.aa.aHelloWorld     :nihao
AA...Ae(这个e是第一个不在字符集"Aa. :"的元素)A.a.aa.aHelloWorld     :nihao::(这个o是倒数第一个不在字符集"Aa. :"的元素):
所以结果为去除左右两边连续 AA...Ae(左边) 和 ::(右边),结果为 eA.a.aa.aHelloWorld     :nihao

sds sdstrim(sds s, const char *cset) {
    char *start, *end, *sp, *ep;
    size_t len;

    sp = start = s; //字符串开始位置
    ep = end = s+sdslen(s)-1; //字符串结尾位置
    while(sp <= end && strchr(cset, *sp)) sp++; //查找左边第一个不在去除字符集中的元素位置
    while(ep > sp && strchr(cset, *ep)) ep--; //查找右边第一个不在去除字符集中的元素位置
    len = (sp > ep) ? 0 : ((ep-sp)+1); //如果左边的位置已经超过了右边的位置，说明全部字符被去除了
    if (s != sp) memmove(s, sp, len); 
    //s!=sp,表明第一个字符在去除字符集中，又因为s,sp可能会有内存重叠的情况，所以这里需要使用memmove
    s[len] = '\0'; //新字符串结尾符号
    sdssetlen(s,len);//新字符串长度
    return s;
}
**********************************************************************
函数sdsrange 获取字符串一个连续的子集
/* Turn the string into a smaller (or equal) string containing only the
 * substring specified by the 'start' and 'end' indexes.
通过开始和结束两个位置截取原字符串的一个子集，这个子集最大是原字符串。
 * start and end can be negative, where -1 means the last character of the
 * string, -2 the penultimate character, and so forth.
开始和结束位置可以是负数，-1代表字符串最后一个位置，-2是倒数第二个位置，以此类推
 * The interval is inclusive, so the start and end characters will be part
 * of the resulting string.
这个区间是闭区间，所以开始和结束两个位置是包括在这个字符串子集中的
 * The string is modified in-place.
字符串直接在原串上修改
 * Example:
 *
 * s = sdsnew("Hello World");
 * sdsrange(s,1,-1); => "ello World"
 */
Hello World
012356789 -1
所以1的位置是e，-1的位置是d，结果就是从e到d的字符串，即"ello World"

void sdsrange(sds s, ssize_t start, ssize_t end) {
    size_t newlen, len = sdslen(s); //获取字符串长度

    if (len == 0) return; // 长度为0说明是空窜，无需处理
    if (start < 0) { //如果开始位置是倒数计数
        start = len+start; //确定开始位置正向位置
        if (start < 0) start = 0; // 如果确定的位置小于0，那么就从0开始
    }
    if (end < 0) {//如果结束位置是倒数计数
        end = len+end; //确定结束位置正向位置
        if (end < 0) end = 0;  // 如果确定的位置小于0，那么0作为结尾
    }
    newlen = (start > end) ? 0 : (end-start)+1;
    //获取子集长度，如果开始位置大于结束位置，那么长度为0，否则用结束位置减去开始位置+1获取长度
    if (newlen != 0) { //长度不为0
        if (start >= (ssize_t)len) { //开始位置不能大于字符串结尾
            newlen = 0;//开始位置大于字符串结尾的情况，这个长度就没有意义了，设置为0
        } else if (end >= (ssize_t)len) { //结尾位置大于字符串结尾
            end = len-1; // 那么最大就到字符串结尾为止
            newlen = (start > end) ? 0 : (end-start)+1; //重新计算字符串长度
        }
    } else { //如果长度为0，那么久没有什么子串了
        start = 0;
    }
    if (start && newlen) memmove(s, s+start, newlen);
    //如果起始位置不为0并且长度不为0，就需要做数据迁移，因为可能存在数据重叠的情况
    //如果起始位置为0 就不需要做数据迁移了
    s[newlen] = 0; //这里作者调皮了下，一直用的是'/0'，突然来了个0，其实两个值等价的 ‘/0’的ascii码值就是0
    sdssetlen(s,newlen); //设置字符串长度
}
**********************************************************************
大小写转化函数
/* Apply tolower() to every character of the sds string 's'. */
void sdstolower(sds s) {
    size_t len = sdslen(s), j;
    for (j = 0; j < len; j++) s[j] = tolower(s[j]);
}

/* Apply toupper() to every character of the sds string 's'. */
void sdstoupper(sds s) {
    size_t len = sdslen(s), j;
    for (j = 0; j < len; j++) s[j] = toupper(s[j]);
}
**********************************************************************
字符串比较函数
/* Compare two sds strings s1 and s2 with memcmp().
使用函数memcmp比较sds字符串s1和s2
 * Return value:
 *
 *     positive if s1 > s2.  返回值为正 表示 s1> s2
 *     negative if s1 < s2.  返回值为负 表示 s1< s2
 *     0 if s1 and s2 are exactly the same binary string. 返回值为0表示字符串相等
 *
 * If two strings share exactly the same prefix, but one of the two has
 * additional characters, the longer string is considered to be greater than
 * the smaller one. */
如果两个字符串拥有一样的前缀的，但是另外一个字符串更长，那么更长的这个字符串被人为大于短的这个串
int sdscmp(const sds s1, const sds s2) {
    size_t l1, l2, minlen;
    int cmp;

    l1 = sdslen(s1); //字符串1长度
    l2 = sdslen(s2); //字符串2长度
    minlen = (l1 < l2) ? l1 : l2; //按照短的长度比
    cmp = memcmp(s1,s2,minlen); //按照短的长度比的结果
    if (cmp == 0) return l1>l2? 1: (l1<l2? -1: 0); //只有当前缀结果相等的时候，才需要比较长短
    return cmp;
}
posted on 2020-07-30 17:35 子虚乌有阅读(292) 评论(0) 收藏举报