Redis源码解读 -简单动态字符串 SDS
简单动态字符串(Simple Dynamic Strings, SDS)是Redis的基本数据结构之一,用于存储字符串和整型数据。SDS兼容C语言标准字符串处理函数,且在此基础上保证了二进制安全。
SDS如何实现二进制安全,同时兼容C语言标准字符串处理?先来看下它的定义:
typedef char *sds; struct __attribute__ ((__packed__)) sdshdr5 { unsigned char flags; /* 3 lsb of type, and 5 msb of string length */ char buf[]; }; struct __attribute__ ((__packed__)) sdshdr8 { uint8_t len; /* used */ uint8_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[]; }; struct __attribute__ ((__packed__)) sdshdr16 { uint16_t len; /* used */ uint16_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[]; }; struct __attribute__ ((__packed__)) sdshdr32 { uint32_t len; /* used */ uint32_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[]; }; struct __attribute__ ((__packed__)) sdshdr64 { uint64_t len; /* used */ uint64_t alloc; /* excluding the header and null terminator */ unsigned char flags; /* 3 lsb of type, 5 unused bits */ char buf[]; };
sds是char类型的指针,以'\0'为字符串结束标志,而后面的一系列sdshdr结构体则是数据存储的真实格式;为什么要这样做?
__attribute__ ((__packed__)) 关键字的作用是取消字节对齐,使得结构体后面的 char buf[] (实际存储字符串内容的数组)成为了一个柔性数组成员,sds就是指向这个数组的首地址。
这样一来,C语言中就可以直接对sds使用标准字符串函数处理。
那么它如何实现二进制安全?
结构体中的 unsigned char flags 是固定长度,因此只需要对sds进行-1操作,也就是 buf-1 就可以很方便的得到flags的内容:
#define SDS_TYPE_5 0 #define SDS_TYPE_8 1 #define SDS_TYPE_16 2 #define SDS_TYPE_32 3 #define SDS_TYPE_64 4 #define SDS_TYPE_MASK 7 #define SDS_TYPE_BITS 3 #define SDS_HDR_VAR(T,s) struct sdshdr##T *sh = (void*)((s)-(sizeof(struct sdshdr##T))); #define SDS_HDR(T,s) ((struct sdshdr##T *)((s)-(sizeof(struct sdshdr##T)))) #define SDS_TYPE_5_LEN(f) ((f)>>SDS_TYPE_BITS) static inline size_t sdslen(const sds s) { unsigned char flags = s[-1]; switch(flags&SDS_TYPE_MASK) { case SDS_TYPE_5: return SDS_TYPE_5_LEN(flags); case SDS_TYPE_8: return SDS_HDR(8,s)->len; case SDS_TYPE_16: return SDS_HDR(16,s)->len; case SDS_TYPE_32: return SDS_HDR(32,s)->len; case SDS_TYPE_64: return SDS_HDR(64,s)->len; } return 0; }
以上代码可以看出,flags的低三位固定存储sds类型,sdshdr5的flags高5位还存储了字符串长度
对于其他结构体,其头部还存储字符串长度len和申请的内存大小alloc,这两个成员的类型可以通过flags的低三位判断。
由于结构体中存储了字符串真实长度len,因此可以实现二进制安全。
附:sds的创建:
/* Create a new sds string starting from a null terminated C string. */ sds sdsnew(const char *init) { size_t initlen = (init == NULL) ? 0 : strlen(init); return sdsnewlen(init, initlen); } sds sdsnewlen(const void *init, size_t initlen) { void *sh; sds s; char type = sdsReqType(initlen); /* Empty strings are usually created in order to append. Use type 8 * since type 5 is not good at this. */ if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8; int hdrlen = sdsHdrSize(type); unsigned char *fp; /* flags pointer. */ sh = s_malloc(hdrlen+initlen+1); if (init==SDS_NOINIT) init = NULL; else if (!init) memset(sh, 0, hdrlen+initlen+1); if (sh == NULL) return NULL; s = (char*)sh+hdrlen; fp = ((unsigned char*)s)-1; switch(type) { case SDS_TYPE_5: { *fp = type | (initlen << SDS_TYPE_BITS); break; } case SDS_TYPE_8: { SDS_HDR_VAR(8,s); sh->len = initlen; sh->alloc = initlen; *fp = type; break; } case SDS_TYPE_16: { SDS_HDR_VAR(16,s); sh->len = initlen; sh->alloc = initlen; *fp = type; break; } case SDS_TYPE_32: { SDS_HDR_VAR(32,s); sh->len = initlen; sh->alloc = initlen; *fp = type; break; } case SDS_TYPE_64: { SDS_HDR_VAR(64,s); sh->len = initlen; sh->alloc = initlen; *fp = type; break; } } if (initlen && init) memcpy(s, init, initlen); s[initlen] = '\0'; return s; }
附:sds扩容:
sds sdsMakeRoomFor(sds s, size_t addlen) { void *sh, *newsh; size_t avail = sdsavail(s); size_t len, newlen; char type, oldtype = s[-1] & SDS_TYPE_MASK; int hdrlen; /* Return ASAP if there is enough space left. */ if (avail >= addlen) return s; len = sdslen(s); sh = (char*)s-sdsHdrSize(oldtype); newlen = (len+addlen); if (newlen < SDS_MAX_PREALLOC) newlen *= 2; else newlen += SDS_MAX_PREALLOC; type = sdsReqType(newlen); /* Don't use type 5: the user is appending to the string and type 5 is * not able to remember empty space, so sdsMakeRoomFor() must be called * at every appending operation. */ if (type == SDS_TYPE_5) type = SDS_TYPE_8; hdrlen = sdsHdrSize(type); if (oldtype==type) { newsh = s_realloc(sh, hdrlen+newlen+1); if (newsh == NULL) return NULL; s = (char*)newsh+hdrlen; } else { /* Since the header size changes, need to move the string forward, * and can't use realloc */ newsh = s_malloc(hdrlen+newlen+1); if (newsh == NULL) return NULL; memcpy((char*)newsh+hdrlen, s, len+1); s_free(sh); s = (char*)newsh+hdrlen; s[-1] = type; sdssetlen(s, len); } sdssetalloc(s, newlen); return s; }
附:sds缩容:redis会对有大量闲置空间的sds进行缩容处理:
sds sdsRemoveFreeSpace(sds s) { void *sh, *newsh; char type, oldtype = s[-1] & SDS_TYPE_MASK; int hdrlen, oldhdrlen = sdsHdrSize(oldtype); size_t len = sdslen(s); sh = (char*)s-oldhdrlen; /* Check what would be the minimum SDS header that is just good enough to * fit this string. */ type = sdsReqType(len); hdrlen = sdsHdrSize(type); /* If the type is the same, or at least a large enough type is still * required, we just realloc(), letting the allocator to do the copy * only if really needed. Otherwise if the change is huge, we manually * reallocate the string to use the different header type. */ if (oldtype==type || type > SDS_TYPE_8) { newsh = s_realloc(sh, oldhdrlen+len+1); if (newsh == NULL) return NULL; s = (char*)newsh+oldhdrlen; } else { newsh = s_malloc(hdrlen+len+1); if (newsh == NULL) return NULL; memcpy((char*)newsh+hdrlen, s, len+1); s_free(sh); s = (char*)newsh+hdrlen; s[-1] = type; sdssetlen(s, len); } sdssetalloc(s, len); return s; }
本文来自博客园,作者:野猪被骑,转载请注明原文链接:https://www.cnblogs.com/lihuifeng/p/16412782.html

浙公网安备 33010602011771号