[简易] 链表相关

前言

数数开发者的军备，算下来说多也行、说不多同样没问题。习于搭砖块不是坏事，但流于搭砖块则未必是好事——当我们见到不明原理而产生的偏见（“这里不加锁可以吗，我不敢保证啊”、“你写成这样，易读性是有问题的”）影响开发的时候，恐怕难免不甘。改变现状不容易，更多的时候也许我们会顺着形势反思自己是不是做得有点过了头，不过真的是这样吗？在说滥的基本功面前，滥俗的或许是我们自己？

在这系列科普里，我会试图展示我所见的基础问题。它们不太现代，也许未必适合急着考试求职的伙计；所以读下去之前不妨想想：如果读者真的不喜欢在基本而常见的问题上纠缠，或者认为自己没必要为既定现存的人类智慧添一笔拙劣涂鸦，关掉页面可能是更好的选择。

文中的代码会使用GCC的C扩展，关注移植性的读者请小心。

Side A

假定我们面前有这样一个从天而降的问题：设计一种链表结构，至少支持

添加节点
删除节点

也可能还有其他的需求，不过暂时不讨论。教科书或许会鼓励作出这样的实现：

// list_impl1.h

#pragma once

#include    <stddef.h>
#include    <stdint.h>

struct linked_list_node {
    void *entry;
    struct linked_list_node *neighbors[2];
};

#define LIST_NODE_NEXT  0
#define LIST_NODE_PREV  1

struct linked_list {
    uint32_t count;
    void (*destructor)(void *);
    struct linked_list_node head;
};

链表节点类型struct linked_list_node提供看似通用的void *数据域。当然这里只是简化处理，或许读者会希望提供一个联合数据域，方便取出常见的基本类型（这样的需求可以通过_Generic实现出来，写一些宏足矣）。另一方面，我们为作为“表”出现的链表准备专门的结构，看起来会稍微舒服点。

重审我们的需求。当说起“添加节点”的时候，其实并不知道应该在什么位置上添加；同样地考虑下来，“删除节点”也不知道从哪里找到节点。所幸我们的描述足够狭窄——注意，提出的需求针对“节点”而非“元素”，故而现在可以专注解决节点的问题。在写函数之前，首先提供一份头文件，简单地涵盖可能用到的标准C头文件：

// stdc_common.h

#pragma once

#include    <stdlib.h>
#include    <string.h>
#include    <stdint.h>

下面给出添加和删除操作。由于对键比较方法缺乏认识，首先假定我们找到了插入位置（在此节点前后）和删除位置（删除此节点）：

// list_impl1.c

#include    "stdc_common.h"
#include    "list_impl1.h"

#define direction_of(node_, direction_) ((node_)->neighbors[(direction_)])
#define next_of(node_) direction_of((node_), LIST_NODE_NEXT)
#define prev_of(node_) direction_of((node_), LIST_NODE_PREV)


static
void do_insert_after(struct linked_list *list, struct linked_list_node *target, struct linked_list_node *newnode) {
    (prev_of(newnode) = target), (next_of(newnode) = next_of(target));
    (prev_of(next_of(target)) = newnode), (next_of(target) = newnode);
    list->count++;
}

static
void do_insert_before(struct linked_list *list, struct linked_list_node *target, struct linked_list_node *newnode) {
    (next_of(newnode) = target), (prev_of(newnode) = prev_of(target));
    (next_of(prev_of(target)) = newnode), (prev_of(target) = newnode);
    list->count++;
}

static
struct linked_list_node *do_remove_at(struct linked_list *list, struct linked_list_node *target) {
    (next_of(prev_of(target)) = next_of(target)), (prev_of(next_of(target)) = prev_of(target));
    return list->count--, (prev_of(target) = next_of(target) = NULL), target;
}

通俗的双向链表加链/断链过程。为对C语言不太习惯的读者加一注：static修饰要求函数作用域限于本编译单元，换句话说这些函数不会被导出（不妨考虑下原因）。

现在我们有了真正添加和移除节点的操作，它们比涉及键比较的操作更为基本，因此应当用于搭建上层的插入和删除操作——但是现在麻烦也来了：插入和删除的位置如何确定？很容易想到两种通用的做法，也即指定第k位置插入和指定在某键后插入。虽然都很好实现，但用起来很别扭：没人一上来就知道k该是多少，而指定的键也可能重复，这使得链表的插入行为不确定；同时，插入和删除应当是对称的——同样，没人知道该怎么删除。

这迫使我们停下来考虑一开始（或是教科书）的构想是否出现了问题。元素难以区别的链表，可能更多地用来堆积元素，同时在遍历过程中提供检索操作；另外的时候，也可能像数组一样用下标取得元素。
某些读者或许已经明白我们在谈论什么了，不过这里不必挑明。先从比较容易的事情做起，写一个按下标取得元素的简单函数：

// list_impl1.c

static
struct linked_list_node *do_get_node_at(struct linked_list *list, uint32_t index) {
    if (!(list->count) || (index >= list->count))
        return NULL;

    uint8_t direction = (index < list->count / 2) ? LIST_NODE_NEXT : LIST_NODE_PREV;
    uint32_t steps_total = (direction == LIST_NODE_NEXT) ? index : (list->count - index);
    struct linked_list_node *target = &(list->head);

    do {
        target = direction_of(target, direction);
    } while (--steps_total);

    return target;
}

void *list_get_value_at(struct linked_list *list, uint32_t index) {
    struct linked_list_node *target = do_get_node_at(list, index);
    return target ? target->entry : NULL;
}

它试图从更短的一侧查找下标。现在也许我们冷静一些了，可以放弃过分灵活但不实用的用法。假如这列表处于暂时未变动的状态（这或许暗示它已经生成完成，近乎不可变了）。如果真的是这样，或许重要的是列表的元素被加入的客观顺序，而非其他什么混乱的顺序。在这样的假定下，我们添加两种操作：头插入和尾插入。

// list_impl1.c

static
struct linked_list_node *create_node(void *value) {
    struct linked_list_node *node = malloc(sizeof(struct linked_list_node));
    if (node == NULL)
        return NULL;
    return (node->entry = value), node;
}

static
void destroy_node(struct linked_list_node *node) {
    free(node);
}

int list_append_value(struct linked_list *list, void *value) {
    struct linked_list_node *node = create_node(value);
    if (node == NULL)
        return 0;
    do_insert_before(list, &(list->head), node);
    return 1;
}

int list_prepend_value(struct linked_list *list, void *value) {
    struct linked_list_node *node = create_node(value);
    if (node == NULL)
        return 0;
    do_insert_after(list, &(list->head), node);
    return 1;
}

还差一点。现在链表可以按下标取得元素，也可以在头尾两侧添加元素，不过遍历是做不到的。不妨考虑遍历的一般过程——用户需要一个循环变量遍取列表中的每一个元素，针对循环变量编写自定义的操作。再抽象一步的话，这里可以完成一次解耦：一个迭代器实例向用户承诺遍取列表中的每一个元素，并承诺向用户返回当前取得的元素。不过迭代器之所以值得称为迭代器，在于它应当通用地实现前述行为；现在我们没有太多精力处理这样的通用性，所以只做出根本不像样子的迭代游标。

// list_impl1.h

struct linked_list_iterator {
    uint32_t n_steps_left;
    struct linked_list *parent;
    struct linked_list_node *current, *next;
};

这里我们假定使用先检测是否可遍历、再进行遍历操作的迭代风格，则迭代器需要的操作是“检测是否可以继续迭代”、“迭代步进”、“取当前元素值”和“删除当前元素”。注意到这里并没有定义任何添加操作，因为在迭代过程中添加会影响迭代语义（若是前插，则新元素不会被遍历到；若只提供后插，语义就更不对称，倒不如全放弃）。

int iterator_has_next(const struct linked_list_iterator *iterator) {
    return iterator->n_steps_left > 0;
}

static
struct linked_list_iterator *do_step_next(struct linked_list_iterator *iterator) {
    iterator->n_steps_left--;
    if (iterator->next) {
        return (iterator->current = iterator->next), (iterator->next = NULL), iterator;
    } else {
        return (iterator->current = next_of(iterator->current)), iterator;
    }
}

void *iterator_get_value(struct linked_list_iterator *iterator) {
    return iterator->current->entry;
}

void *iterator_next(struct linked_list_iterator *iterator) {
    return iterator_get_value(do_step_next(iterator));
}

void iterator_delete(struct linked_list_iterator *iterator) {
    struct linked_list_iterator iterator_ = *iterator;
    iterator->next = next_of(iterator->current);
    struct linked_list_node *node = do_remove_at(iterator_.parent, iterator_.current);
    destroy_node(node);
}

struct linked_list_iterator list_get_iterator(struct linked_list *list) {
    return (struct linked_list_iterator) { list->count, list, &(list->head), NULL };
}

注意一点，迭代器步进的正确性必须由迭代可行性检查保证——所以我们没做更多的检查。
从链表获取迭代器的操作返回了一个结构而非指针，立刻能想到的原因有二：第一，迭代器结构足够小（数个指针长）、生存期不长（往往在本地用完即扔）；第二，迭代器只用来保存恒定索引，它本身并不需要被链表引用。在这种时候，一个单纯的文字量比开动态内存明显有优势。
另外，链表本身的销毁涉及到链表中所有元素的销毁，此操作不应因为申请动态内存失败而失败——所以获得链表迭代器的操作是必须成功的。强迫用户反复调用销毁函数直到销毁成功，或是自己封装一次此类过程，都显得高度不可取。

当然，这里的迭代器实现十分粗糙，几乎是没动大脑就呕出来填实接口的粗暴实现，不值得读者效仿。

有关链表节点的操作告一段落，现在我们可以处理链表本身了：

// list_impl1.c

struct linked_list *linked_list_new(void (*destructor)(void *)) {
    struct linked_list *list = malloc(sizeof(struct linked_list));
    if (list == NULL)
        return NULL;
    list->count = 0;
    list->destructor = destructor;
    struct linked_list_node *head_node = &(list->head);
    next_of(head_node) = prev_of(head_node) = head_node;
    return list;
}

void linked_list_delete(struct linked_list *list, void (*destructor)(void *)) {
    struct linked_list_iterator iterator = list_get_iterator(list);
    while (iterator_has_next(&iterator)) {
        void *entry = iterator_next(&iterator);
        if (destructor)
            destructor(entry);
        iterator_delete(&iterator);
    }
    free(list);
}

uint32_t linked_list_get_size(const struct linked_list *list) {
    return list->count;
}

导出必要的API：

// list_impl1.h

void *list_get_value_at(struct linked_list *list, uint32_t index);
int list_append_value(struct linked_list *list, void *value);
int list_prepend_value(struct linked_list *list, void *value);

int iterator_has_next(const struct linked_list_iterator *iterator);
void *iterator_get_value(struct linked_list_iterator *iterator);
void *iterator_next(struct linked_list_iterator *iterator);
void iterator_delete(struct linked_list_iterator *iterator);
struct linked_list_iterator list_get_iterator(struct linked_list *list);

struct linked_list *linked_list_new(void (*destructor)(void *));
void linked_list_delete(struct linked_list *list);
uint32_t linked_list_get_size(const struct linked_list *list);

现在可以试试了。

#include    <stdio.h>
#include    <stdlib.h>
#include    <time.h>
#include    "list_impl1.h"


char messages[][64] = {
    "The first element.",
    "The second one.",
    "...And the third one.",
    "Last but not least, the fourth comes forth."
};

static
void dump_with_check(const char *message) {
    printf("%s\n", message ? message : "[FATAL] null element.");
}

int main(void) {
    struct linked_list *list = linked_list_new(NULL);

    for (uint32_t index = 0; index < 4; index++)
        list_append_value(list, messages[index]);

    dump_with_check(list_get_value_at(list, 1));
    dump_with_check(list_get_value_at(list, 3));
    dump_with_check(list_get_value_at(list, 4));
    printf("\n");

    struct linked_list_iterator iterator;

    iterator = list_get_iterator(list);
    while (iterator_has_next(&iterator)) {
        dump_with_check(iterator_next(&iterator));
    }
    printf("\n");

    iterator = list_get_iterator(list);
    srand((unsigned) time(NULL));
    int index_to_delete = rand() % 4, counter = 0;
    while (iterator_has_next(&iterator)) {
        void *value = iterator_next(&iterator);
        if (counter++ == index_to_delete) {
            iterator_delete(&iterator);
        } else {
            dump_with_check(value);
        }
    }

    linked_list_delete(list);
    return 0;
}

过程可能稍显混乱。稍微总结一下：首先我们定义最底层的操作，这些操作和表所需要满足的业务可能并无多少关系，专注于维护表自身的秩序性；此后我们试图声明某种隐喻——或者一处故事，这些故事讲述了结构面对客户程序时展现出的功能。

Side B

前一部分中，我们花了太多无谓的努力制造问题。假如现在问题得以简化一点——去掉对下标的要求，只提供迭代语义和头尾插入，则可能给出何种实现？如果我们不满足于链表节点空间利用率的恶劣程度，应当提供什么形式的数据结构定义？

下面的回答来自某内核，并非我的原创。首先我们定义数据结构：

// util_list.h

#pragma once

#include    <stddef.h>


struct link_index {
    struct link_index *prev, *next;
};

它没有数据域，只用于维持链表的结构。这比先前所实现的链表更为激进——只有两个指针，难道不会引起更多的空间浪费问题吗？实际上，我们压根不会用malloc()去单独申请这种节点。考虑下面的定义：

struct demo_structure {
    int x, y;
    struct link_index linkable;
};

我们要求数据域包含链表节点，而链表操作只对链表节点对象暴露。在已知数据域的情况下，得到链表节点无非只是一次成员访问的事；但如果只知道一个合法的链表节点指针，如何获得对应的数据域？

#define    offset_of(type, member) ((size_t) &((type *) 0)->member)
#define    container_of(ptr, type, member) ({ \
     const typeof(((type *) 0)->member) *__mptr = (ptr); \
     (type *) ((char *) __mptr - offset_of(type, member)); })

宏offset_of危险地算出对象中指定成员的偏移（如果希望行为真正安全，建议读者使用offsetof()宏，它的定义由stddef.h提供）；container_of则要求给出指针ptr, 安全地赋给__mptr（这个名字也比较危险，不推荐仿照），它指向所给定的链表节点，所以用链表节点成员在数据域中的偏移向前修正指针所对应的地址，就能得到指向数据域的指针。以上面的结构举例，就是：

struct demo_structure demo;
struct demo_structure *demo_ptr = container_of(&(demo.linkable), struct demo_structure, linkable);
assert(demo_ptr == &demo);      // true

理解了取得数据域的方式，剩下的工作则和往常一样，致力于构造有双指针域的链表。我们要求链表必须有头节点，而空表的头节点形成一元素长的环：

// util_list.h

static inline
void list_init(struct link_index *h) {
    h->next = h;
    h->prev = h;
}

static inline
int list_is_empty(struct link_index *h) {
    return (h->next == h) && (h->prev == h);
}

同样，我们需要实现定位插入操作；不过这次的定位插入是精准的——指定前驱和后继，在前驱和后继之间加入元素。前驱和后继不必相异，因为我们只需要修改它们的各一个指针域；若前驱和后继相同，则这个节点（比如说空表的头节点）的两个指针域分别被正确地修改了。

// util_list.h

static inline
void list_add__(struct link_index *e, struct link_index *p, struct link_index *n) {
    n->prev = e;
    e->next = n;
    e->prev = p;
    p->next = e;
}

在定位插入的基础上，实现头插和尾插：

// util_list.h

static inline
void list_add_head(struct link_index *e, struct link_index *h) {
    list_add__(e, h, h->next);
}

static inline
void list_add_tail(struct link_index *e, struct link_index *h) {
    list_add__(e, h->prev, h);
}

定位删除同样是按前驱和后继定位的，我们简单地略掉前驱和后继之间的元素。显然，若我们知道该删哪个节点，则很容易定位它的前驱和后继，利用定位删除把它从链表上卸掉。仍然需要提醒读者，由于链表节点被数据域包含、不独立创建，它没有权利要求任何构造和析构操作。它仅仅存在着。

// util_list.h

static inline
void list_del__(struct link_index *p, struct link_index *n) {
    n->prev = p;
    p->next = n;
}

按照上面的思路，我们实现迭代器风格的删除：

// util_list.h

static inline
void list_del(struct link_index *e) {
    list_del__(e->prev, e->next);
    e->next = e->prev = NULL;
}

迭代删除操作是最推荐用户使用的删除方式，它也会为被卸载节点的前驱和后继域下毒，使它们明显地不合法（置为NULL）。在NULL毒的约定下，我们补充判定节点卸载相关的操作。它们不太实用。

static inline
int list_node_isolated(struct link_index *e) {
    return (e->next == NULL) || (e->prev == NULL);
}

static inline
void list_node_isolate(struct link_index *e) {
    e->prev = e->next = NULL;
}

最后是迭代。本节中的链表不使用任何额外的迭代器结构，也不需要特别地获取迭代器——用一份链表节点指针充当迭代器已经足够，因为迭代删除正是基于链表节点的。另一方面，我们在迭代删除操作上并没有任何特殊的处理，所以需要提供两种迭代形式——可变和不可变的foreach.

// util_list.h

#define     list_foreach(head) \
    for (struct link_index *iterator = (head)->next; iterator != (head); iterator = iterator->next)

#define     list_foreach_remove(head) \
    for (struct link_index *iterator_aux = (head)->next, *iterator = iterator_aux; \
        ((iterator = iterator_aux) != (head)) && (iterator_aux = iterator_aux->next);)

#define     current_iterator                        iterator
#define     detach_current_iterator                 list_del(iterator)
#define     current_object_of_type(type, member) \
    container_of(iterator, type, member)

list_foreach比较好理解，只是单纯的for循环变化。list_foreach_remove则使用两组迭代器确保删除的正确性：iterator_aux完成遍历，而iterator则反映出当前的元素；若遍历没有回到头节点，则在每趟循环开始的时候，iterator_aux总比iterator快一步。请注意，list_foreach_remove使用的for循环没有步进表达式——因为步进发生在一趟循环结束时，那时再更新iterator_aux的话，会因为链表结构发生变化而惨遭失败，所以步进表达式（两组）和终止条件写在了一起。

一份简单的演示程序如下：

#include    <stdio.h>
#include    <stdint.h>
#include    "util_list.h"


struct test_schema {
    int id;
    struct link_index linkable;
} test_data[] = {
    { .id = 1 },
    { .id = 2 },
    { .id = 3 },
    { .id = 4 }
};

void print_list(struct link_index *list) {
    list_foreach(list) {
        printf("%d\n", current_object_of_type(struct test_schema, linkable)->id);
    }
}

void print_list_remove(struct link_index *list) {
    list_foreach_remove(list) {
        detach_current_iterator;
        printf("%d\n", current_object_of_type(struct test_schema, linkable)->id);
    }
}

int main(void) {
    struct link_index list;
    list_init(&list);
    for (uint32_t index = 0; index < sizeof(test_data) / sizeof(struct test_schema); index
++)
        list_add_tail(&(test_data[index].linkable), &list);
    print_list(&list);
    print_list_remove(&list);
    printf("list is%sempty.\n", list_is_empty(&list) ? " " : " not ");
    return 0;
}

posted @ 2016-09-27 00:13 Alpacius 阅读(151) 评论(0) 收藏举报

刷新页面返回顶部

Alpacatraz

[简易] 链表相关

公告