map结构初探
map结构初探
maps是erlang新出的一种数据结构,传说用来替代record。这里主要说下maps的具体实现,并分析各种操作的时间复杂度(c层面)。并对优缺点做一个总结。
环境:版本为R17
map结构
typedef struct map_s {
Eterm thing_word;
Uint size;
Eterm keys; /* tuple */
} map_t;
/* map node
*
* -----------
* Eterm THING
* Uint size
* Eterm Keys -> {K1, K2, K3, ..., Kn} where n = size
* ----
* Eterm V1
* ...
* Eterm Vn, where n = size
* -----------
*/
这个结构定义在erl_map.h中,这里的注释已经对map的结构说的很清楚了。
所以在map的结构大概是这样。
在erl_map.c的注释中,可以看到此模块主要实现了下面的一系列maps模块的函数
- maps:find/2
- maps:from_list/1
- maps:get/2
- maps:is_key/2
- maps:keys/1
- maps:merge/2
- maps:new/0
- maps:put/3
- maps:remove/2
- maps:to_list/1
- maps:update/3
- maps:values/1
下面挑一些能够看清map结构的函数,做一个简单的分析。
maps:find/2
时间复杂度:O(N), 由下面代码可以看到在map中查找实际需要遍历整个keys,所以时间复杂度是O(N)
BIF_RETTYPE maps_find_2(BIF_ALIST_2) {
if (is_map(BIF_ARG_2)) {
Eterm *hp, value,res;
if (erts_maps_find(BIF_ARG_1, BIF_ARG_2, &value)) {
hp = HAlloc(BIF_P, 3);
res = make_tuple(hp);
*hp++ = make_arityval(2);
*hp++ = am_ok;
*hp++ = value;
BIF_RET(res);
}
BIF_RET(am_error);
}
BIF_ERROR(BIF_P, BADARG);
}
int erts_maps_find(Eterm key, Eterm map, Eterm *value) {
Eterm *ks,*vs;
map_t *mp;
Uint n,i;
mp = (map_t*)map_val(map);
n = map_get_size(mp);
ks = map_get_keys(mp);
vs = map_get_values(mp);
//遍历所有的key,找到相等的key,O(N)
for( i = 0; i < n; i++) {
if (EQ(ks[i], key)) {
*value = vs[i];
return 1;
}
}
return 0;
}
maps:from_list/1
可以看到,from_list的操作是有点费时的,最坏情况下能到O(N^2)。
BIF_RETTYPE maps_from_list_1(BIF_ALIST_1) {
Eterm *kv, item = BIF_ARG_1;
Eterm *hp, *thp,*vs, *ks, keys, res;
map_t *mp;
Uint size = 0, unused_size = 0;
Sint c = 0;
Sint idx = 0;
if (is_list(item) || is_nil(item)) {
/* Calculate size and check validity */
//检查所有的list是否符合规范,这里遍历一遍,并得到元素个数。
while(is_list(item)) {
res = CAR(list_val(item));
if (is_not_tuple(res))
goto error;
kv = tuple_val(res);
if (*kv != make_arityval(2))
goto error;
size++;
item = CDR(list_val(item));
}
if (is_not_nil(item))
goto error;
//分配用于存放maps,3是map_t结构大小,1是tuple占用一个size,2倍大小的size是key跟value,由于这里是从头开始构造map,所以这里固定了keys的位置。
hp = HAlloc(BIF_P, 3 + 1 + (2 * size));
thp = hp;
keys = make_tuple(hp);//构造一个tuple
*hp++ = make_arityval(size);//填充tuple的大小
ks = hp;//keys的位置
hp += size;
mp = (map_t*)hp;//map结构指针
res = make_map(mp);
hp += MAP_HEADER_SIZE;
vs = hp;//values指针
//填充map头
mp->thing_word = MAP_HEADER;
mp->size = size; /* set later, might shrink*/
mp->keys = keys;
if (size == 0)
BIF_RET(res);
item = BIF_ARG_1;
//填充第一个元素的值
/* first entry */
kv = tuple_val(CAR(list_val(item)));
ks[0] = kv[1];
vs[0] = kv[2];
size = 1;
item = CDR(list_val(item));
/* insert sort key/value pairs */
while(is_list(item)) {
kv = tuple_val(CAR(list_val(item)));
/* compare ks backwards
* idx represent word index to be written (hole position).
* We cannot copy the elements when searching since we might
* have an equal key. So we search for just the index first =(
*
* It is perhaps faster to move the values in the first pass.
* Check for uniqueness during insert phase and then have a
* second phace compacting the map if duplicates are found
* during insert. .. or do someother sort .. shell-sort perhaps.
*/
idx = size;
//相当于使用了插入排序,保持keys有序
while(idx > 0 && (c = CMP_TERM(kv[1],ks[idx-1])) < 0) { idx--; }
if (c == 0) {
/* last compare was equal,
* i.e. we have to release memory
* and overwrite that key/value
*/
//如果key存在,直接改写数据就可以了
ks[idx-1] = kv[1];
vs[idx-1] = kv[2];
unused_size++;
} else {
//不存在,由于要保持有序,需要搬运后面的数据。
Uint i = size;
while(i > idx) {
ks[i] = ks[i-1];
vs[i] = vs[i-1];
i--;
}
ks[idx] = kv[1];
vs[idx] = kv[2];
size++;
}
item = CDR(list_val(item));
}
if (unused_size) {
/* the key tuple is embedded in the heap
* write a bignum to clear it.
*/
/* release values as normal since they are on the top of the heap */
ks[size] = make_pos_bignum_header(unused_size - 1);
HRelease(BIF_P, vs + size + unused_size, vs + size);
}
*thp = make_arityval(size);
mp->size = size;
BIF_RET(res);
}
error:
BIF_ERROR(BIF_P, BADARG);
}
maps:get/2
由于erts_maps_get的时间效率是O(N),所以get的时间效率也是O(N)
BIF_RETTYPE maps_get_2(BIF_ALIST_2) {
if (is_map(BIF_ARG_2)) {
Eterm *hp;
Eterm value, error;
char *s_error;
if (erts_maps_get(BIF_ARG_1, BIF_ARG_2, &value)) {
BIF_RET(value);
}
s_error = "bad_key";
error = am_atom_put(s_error, sys_strlen(s_error));
hp = HAlloc(BIF_P, 3);
BIF_P->fvalue = TUPLE2(hp, error, BIF_ARG_1);
BIF_ERROR(BIF_P, EXC_ERROR_2);
}
BIF_ERROR(BIF_P, BADARG);
}
int erts_maps_get(Eterm key, Eterm map, Eterm *value) {
Eterm *ks,*vs;
map_t *mp;
Uint n,i;
mp = (map_t*)map_val(map);
n = map_get_size(mp);
if (n == 0)
return 0;
ks = map_get_keys(mp);
vs = map_get_values(mp);
if (is_immed(key)) {
for( i = 0; i < n; i++) {
if (ks[i] == key) {
*value = vs[i];
return 1;
}
}
}
for( i = 0; i < n; i++) {
if (EQ(ks[i], key)) {
*value = vs[i];
return 1;
}
}
return 0;
}
maps:is_key/2
由代码可以看到还是需要遍历所有的key找到是否相等,时间效率O(N)
BIF_RETTYPE maps_is_key_2(BIF_ALIST_2) {
if (is_map(BIF_ARG_2)) {
Eterm *ks, key;
map_t *mp;
Uint n,i;
mp = (map_t*)map_val(BIF_ARG_2);
key = BIF_ARG_1;
n = map_get_size(mp);
ks = map_get_keys(mp);
if (n == 0)
BIF_RET(am_false);
if (is_immed(key)) {
for( i = 0; i < n; i++) {
if (ks[i] == key) {
BIF_RET(am_true);
}
}
}
for( i = 0; i < n; i++) {
if (EQ(ks[i], key)) {
BIF_RET(am_true);
}
}
BIF_RET(am_false);
}
BIF_ERROR(BIF_P, BADARG);
}
maps:keys/1
虽然得到keys只要O(1),但是构造list需要O(N)
BIF_RETTYPE maps_keys_1(BIF_ALIST_1) {
if (is_map(BIF_ARG_1)) {
Eterm *hp, *ks, res = NIL;
map_t *mp;
Uint n;
mp = (map_t*)map_val(BIF_ARG_1);
n = map_get_size(mp);
if (n == 0)
BIF_RET(res);
hp = HAlloc(BIF_P, (2 * n));
ks = map_get_keys(mp);
while(n--) {
res = CONS(hp, ks[n], res); hp += 2;
}
BIF_RET(res);
}
BIF_ERROR(BIF_P, BADARG);
}
maps:new/0
BIF_RETTYPE maps_new_0(BIF_ALIST_0) {
Eterm* hp;
Eterm tup;
map_t *mp;
//分配空间多分配1,是因为keys的结构是个tuple需要存放size
hp = HAlloc(BIF_P, (MAP_HEADER_SIZE + 1));
tup = make_tuple(hp);
*hp++ = make_arityval(0);
mp = (map_t*)hp;
mp->thing_word = MAP_HEADER;
mp->size = 0;
mp->keys = tup;
BIF_RET(make_map(mp));
}
maps:put/3
函数先假设已经存在这个key,找到key对于的value,然后修改它。如果key不存在这个结构中,则按序复制相应的key,value对。
BIF_RETTYPE maps_put_3(BIF_ALIST_3) {
if (is_map(BIF_ARG_3)) {
BIF_RET(erts_maps_put(BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3));
}
BIF_ERROR(BIF_P, BADARG);
}
Eterm erts_maps_put(Process *p, Eterm key, Eterm value, Eterm map) {
Sint n,i;
Sint c = 0;
Eterm* hp, *shp;
Eterm *ks,*vs, res, tup;
map_t *mp = (map_t*)map_val(map);
n = map_get_size(mp);
if (n == 0) {
hp = HAlloc(p, MAP_HEADER_SIZE + 1 + 2);
tup = make_tuple(hp);
*hp++ = make_arityval(1);
*hp++ = key;
res = make_map(hp);
*hp++ = MAP_HEADER;
*hp++ = 1;
*hp++ = tup;
*hp++ = value;
return res;
}
ks = map_get_keys(mp);
vs = map_get_values(mp);
/* only allocate for values,
* assume key-tuple will be intact
*/
hp = HAlloc(p, MAP_HEADER_SIZE + n);
shp = hp; /* save hp, used if optimistic update fails */
res = make_map(hp);
*hp++ = MAP_HEADER;
*hp++ = n;
*hp++ = mp->keys;
//对构造的新map进行值复制
if (is_immed(key)) {
for( i = 0; i < n; i ++) {
if (ks[i] == key) {
*hp++ = value;
vs++;
c = 1;
} else {
*hp++ = *vs++;
}
}
} else {
for( i = 0; i < n; i ++) {
if (EQ(ks[i], key)) {
*hp++ = value;
vs++;
c = 1;
} else {
*hp++ = *vs++;
}
}
}
//如果发现了key,直接返回
if (c)
return res;
/* need to make a new tuple,
* use old hp since it needs to be recreated anyway.
*/
tup = make_tuple(shp);
*shp++ = make_arityval(n+1);
hp = HAlloc(p, 3 + n + 1);
res = make_map(hp);
*hp++ = MAP_HEADER;
*hp++ = n + 1;
*hp++ = tup;
ks = map_get_keys(mp);
vs = map_get_values(mp);
ASSERT(n >= 0);
/* copy map in order */
while (n && ((c = CMP_TERM(*ks, key)) < 0)) {
*shp++ = *ks++;
*hp++ = *vs++;
n--;
}
*shp++ = key;
*hp++ = value;
ASSERT(n >= 0);
while(n--) {
*shp++ = *ks++;
*hp++ = *vs++;
}
/* we have one word remaining
* this will work out fine once we get the size word
* in the header.
*/
*shp = make_pos_bignum_header(0);
return res;
}
总结
总的来说,在erlang的map结构实现的很简单,给我的感觉是很随意。各种操作都比较耗时。