slab分配器（续）

用slab分配内存的不同方法的共同的入口是__cache_alloc（）：

先用should_failslab初步判断是否可以完成分配（标志位可能是NOFAIL）；
调用__do_cache_alloc(cachep, flags, &this_cpu)来完成分配；
调用cache_alloc_debugcheck_after(cachep, flags, objp, caller)进行分配后检查；
prefetchw(objp)通过预取什么的来优化指令；
return objp。

程序的代码如下：

static __always_inline void * __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
{
        unsigned long save_flags;
        int this_cpu;
        void *objp;
        if (should_failslab(cachep, flags))
                return NULL;

        cache_alloc_debugcheck_before(cachep, flags);
        slab_irq_save(save_flags, this_cpu);
        objp = __do_cache_alloc(cachep, flags, &this_cpu);
        slab_irq_restore(save_flags, this_cpu);
        objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
        prefetchw(objp);

        if (unlikely((flags & __GFP_ZERO) && objp))
                memset(objp, 0, obj_size(cachep));

        return objp;
}

可以看出这个分配的过程最重要的就是__do_cache_alloc(cachep, flags, &this_cpu)函数了：

如果设置了PF_SPREAD_SLAB|PF_MEMPOLICY表示就要从别的NODE上分配；
用____cache_alloc(cache, flags, this_cpu)来从CPU的缓冲中取对象，如果取不到就重新填充缓存；
如果上步没有分配成功，就调用____cache_alloc_node(cache, flags, cpu_to_node(*this_cpu), this_cpu)来进行非缓存的那种分配，这个会用到那三个链表；
return objp。

下面是代码，两个主要的函数会在下面详细介绍：

static __always_inline void * __do_cache_alloc(struct kmem_cache *cache, gfp_t flags, int *this_cpu)
{
        void *objp;
        if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
               objp = alternate_node_alloc(cache, flags, this_cpu);
               if (objp)
                        goto out;
        }
        objp = ____cache_alloc(cache, flags, this_cpu);
        if (!objp)
               objp = ____cache_alloc_node(cache, flags, cpu_to_node(*this_cpu), this_cpu);
out:
        return objp;
}

____cache_alloc(cache, flags, this_cpu);用来从CPU的缓存中去object，这个过程还是比较简单的，就是从对应的array_cache中取object：

调用cpu_cache_get(cachep, *this_cpu)取得对应的array_cache;
如果还有剩余的，objp = ac->entry[--ac->avail];
否则调用cache_alloc_refill(cachep, flags, this_cpu)重新填充；
return objp。

代码如下：

static inline void * ____cache_alloc(struct kmem_cache *cachep, gfp_t flags, int *this_cpu)
{
        void *objp;
        struct array_cache *ac;
        check_irq_off();
        ac = cpu_cache_get(cachep, *this_cpu);
        if (likely(ac->avail)) {
                STATS_INC_ALLOCHIT(cachep);
                ac->touched = 1;
                objp = ac->entry[--ac->avail];
        } else {
                STATS_INC_ALLOCMISS(cachep);
                objp = cache_alloc_refill(cachep, flags, this_cpu);
        }
        return objp;
}

上面的代码中最关键的就是cache_alloc_refill(cachep, flags, this_cpu)填充过程，下面就来具体看一下执行过程：

用锁来保护per-cpu list并使中断可用，这些用check_irq_off()检查；
用cpu_cache_get(cachep, *this_cpu)取得与CPU相关的array_cache；
用node = numa_node_id()取得现在的node；
cachep->nodelists[cpu_to_node(*this_cpu)]取得kmem_list3；
尝试充共享的链表上移过来一些；
循环来填充batchcount个对象；

一次尝试从slabs_partial、slabs_free中查看是否有object，如果没有的话就得调用cache_grow（）函数了，这个在下面再详细地讨论；
调用ac->entry[ac->avail++] = slab_get_obj(cachep, slabp, cpu_to_node(*this_cpu))从对应的slab中取出来填充缓存；
检查slab的位置是不是应该重新放移过链表了。

如果cache_grow（）都无法解决问题，那么就得retry了。
return ac->entry[--ac->avail]来返回一个对象。

过程还是比较简单的，具体的代码如下：

static void * cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, int *this_cpu)
{
        int batchcount;
        struct kmem_list3 *l3;
        struct array_cache *ac;
        int node;

retry:
        check_irq_off();
        node = numa_node_id();
        ac = cpu_cache_get(cachep, *this_cpu);
        batchcount = ac->batchcount;
        if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
                batchcount = BATCHREFILL_LIMIT;
        }
        l3 = cachep->nodelists[cpu_to_node(*this_cpu)];

        BUG_ON(ac->avail > 0 || !l3);
        spin_lock(&l3->list_lock);

        if (l3->shared && transfer_objects(ac, l3->shared, batchcount))
                goto alloc_done;

        while (batchcount > 0) {
                struct list_head *entry;
                struct slab *slabp;
                entry = l3->slabs_partial.next;
                if (entry == &l3->slabs_partial) {
                        l3->free_touched = 1;
                        entry = l3->slabs_free.next;
                        if (entry == &l3->slabs_free)
                                goto must_grow;
                }
                slabp = list_entry(entry, struct slab, list);
                check_slabp(cachep, slabp);
                check_spinlock_acquired_node(cachep, cpu_to_node(*this_cpu));
                BUG_ON(slabp->inuse < 0 || slabp->inuse >= cachep->num);
                while (slabp->inuse < cachep->num && batchcount--) {
                        STATS_INC_ALLOCED(cachep);
                        STATS_INC_ACTIVE(cachep);
                        STATS_SET_HIGH(cachep);
                        ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,  cpu_to_node(*this_cpu));
                }
                check_slabp(cachep, slabp);
 
                list_del(&slabp->list);
                if (slabp->free == BUFCTL_END)
                        list_add(&slabp->list, &l3->slabs_full);
                else
                        list_add(&slabp->list, &l3->slabs_partial);
         }
must_grow:
        l3->free_objects -= ac->avail;
alloc_done:
        spin_unlock(&l3->list_lock);
        if (unlikely(!ac->avail)) {
                int x;
                x = cache_grow(cachep, flags | GFP_THISNODE, cpu_to_node(*this_cpu), NULL, this_cpu);
                ac = cpu_cache_get(cachep, *this_cpu);
                if (!x && ac->avail == 0)       /* no objects in sight? abort */
                        return NULL;
                if (!ac->avail)         /* objects refilled by interrupt? */
                        goto retry;
        }
        ac->touched = 1;
        return ac->entry[--ac->avail];
}

在slabs_partial、slabs_free中没有需要的slab的时候就需要调用cache_grow（）函数来补充了：

前面有一大段和上面是相同的，然后计算color offset；
kmem_flagcheck(cachep, flags)检查标志；
通过kmem_getpages(cachep, local_flags, nodeid)调用伙伴系统分配需要的页；
用alloc_slabmgmt(cachep, objp, offset, local_flags & ~GFP_CONSTRAINT_MASK, nodeid)分配slab需要的内存并设置；
调用slab_map_pages(cachep, slabp, objp)把pages映射到给定的cache和slab；
cache_init_objs(cachep, slabp)初始化objects；
用list_add_tail(&slabp->list, &(l3->slabs_free))把刚申请的slab加到slabs_free中；
如果成功return 1，否则return 0。

整体的过程还是很清楚的，不过细节还是要再仔细看看，下面是具体的代码：

static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid, void *objp, int *this_cpu)
{
        struct slab *slabp;
        size_t offset;
        gfp_t local_flags;
        struct kmem_list3 *l3;
        BUG_ON(flags & GFP_SLAB_BUG_MASK);
        local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
        check_irq_off();
        l3 = cachep->nodelists[nodeid];
        spin_lock(&l3->list_lock);
        offset = l3->colour_next;
        l3->colour_next++;
        if (l3->colour_next >= cachep->colour)
                l3->colour_next = 0;
        spin_unlock(&l3->list_lock);
        offset *= cachep->colour_off;
        if (local_flags & __GFP_WAIT)
                slab_irq_enable_nort(*this_cpu);
        slab_irq_enable_rt(*this_cpu);
        kmem_flagcheck(cachep, flags);
        if (!objp)
                objp = kmem_getpages(cachep, local_flags, nodeid);
        if (!objp)
                goto failed;
        slabp = alloc_slabmgmt(cachep, objp, offset, local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
        if (!slabp)
                goto opps1;
        slab_map_pages(cachep, slabp, objp);
        cache_init_objs(cachep, slabp);
        slab_irq_disable_rt(*this_cpu);
        if (local_flags & __GFP_WAIT)
                slab_irq_disable_nort(*this_cpu);
        check_irq_off();
        spin_lock(&l3->list_lock);
        list_add_tail(&slabp->list, &(l3->slabs_free));
        STATS_INC_GROWN(cachep);
        l3->free_objects += cachep->num;
        spin_unlock(&l3->list_lock);
        return 1;
opps1:
        kmem_freepages(cachep, objp);
failed:
        slab_irq_disable_rt(*this_cpu);
        if (local_flags & __GFP_WAIT)
                slab_irq_disable_nort(*this_cpu);
        return 0;
}

这个如果画一个图出来就很清楚了。

------------------------

个人理解，欢迎拍砖。

posted @ 2011-08-12 13:30 GG大婶阅读(1641) 评论(0) 收藏举报

天驰

要成为绝世高手，并非一朝一夕，除非是天生武学奇才。但是这种人…万中无一。------好明显我就是这种人

slab分配器（续）

公告