slab分配器(续)

用slab分配内存的不同方法的共同的入口是__cache_alloc():

  1. 先用should_failslab初步判断是否可以完成分配(标志位可能是NOFAIL);
  2. 调用__do_cache_alloc(cachep, flags, &this_cpu)来完成分配;
  3. 调用cache_alloc_debugcheck_after(cachep, flags, objp, caller)进行分配后检查;
  4. prefetchw(objp)通过预取什么的来优化指令;
  5. return objp。
程序的代码如下:
static __always_inline void * __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
{
unsigned
long save_flags;
int this_cpu;
void *objp;
if (should_failslab(cachep, flags))
return NULL;

cache_alloc_debugcheck_before(cachep, flags);
slab_irq_save(save_flags, this_cpu);
objp
= __do_cache_alloc(cachep, flags, &this_cpu);
slab_irq_restore(save_flags, this_cpu);
objp
= cache_alloc_debugcheck_after(cachep, flags, objp, caller);
prefetchw(objp);

if (unlikely((flags & __GFP_ZERO) && objp))
memset(objp,
0, obj_size(cachep));

return objp;
}

可以看出这个分配的过程最重要的就是__do_cache_alloc(cachep, flags, &this_cpu)函数了:

  1. 如果设置了PF_SPREAD_SLAB|PF_MEMPOLICY表示就要从别的NODE上分配;
  2. 用____cache_alloc(cache, flags, this_cpu)来从CPU的缓冲中取对象,如果取不到就重新填充缓存;
  3. 如果上步没有分配成功,就调用____cache_alloc_node(cache, flags, cpu_to_node(*this_cpu), this_cpu)来进行非缓存的那种分配,这个会用到那三个链表;
  4. return objp。
下面是代码,两个主要的函数会在下面详细介绍:
static __always_inline void * __do_cache_alloc(struct kmem_cache *cache, gfp_t flags, int *this_cpu)
{
void *objp;
if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
objp
= alternate_node_alloc(cache, flags, this_cpu);
if (objp)
goto out;
}
objp
= ____cache_alloc(cache, flags, this_cpu);
if (!objp)
objp
= ____cache_alloc_node(cache, flags, cpu_to_node(*this_cpu), this_cpu);
out:
return objp;
}

____cache_alloc(cache, flags, this_cpu);用来从CPU的缓存中去object,这个过程还是比较简单的,就是从对应的array_cache中取object:

  1. 调用cpu_cache_get(cachep, *this_cpu)取得对应的array_cache;
  2. 如果还有剩余的,objp = ac->entry[--ac->avail];
  3. 否则调用cache_alloc_refill(cachep, flags, this_cpu)重新填充;
  4. return objp。
代码如下:
static inline void * ____cache_alloc(struct kmem_cache *cachep, gfp_t flags, int *this_cpu)
{
void *objp;
struct array_cache *ac;
check_irq_off();
ac
= cpu_cache_get(cachep, *this_cpu);
if (likely(ac->avail)) {
STATS_INC_ALLOCHIT(cachep);
ac
->touched = 1;
objp
= ac->entry[--ac->avail];
}
else {
STATS_INC_ALLOCMISS(cachep);
objp
= cache_alloc_refill(cachep, flags, this_cpu);
}
return objp;
}

上面的代码中最关键的就是cache_alloc_refill(cachep, flags, this_cpu)填充过程,下面就来具体看一下执行过程:

  1. 用锁来保护per-cpu list并使中断可用,这些用check_irq_off()检查;
  2. 用cpu_cache_get(cachep, *this_cpu)取得与CPU相关的array_cache;
  3. 用node = numa_node_id()取得现在的node;
  4. cachep->nodelists[cpu_to_node(*this_cpu)]取得kmem_list3;
  5. 尝试充共享的链表上移过来一些;
  6. 循环来填充batchcount个对象;
    1. 一次尝试从slabs_partial、slabs_free中查看是否有object,如果没有的话就得调用cache_grow()函数了,这个在下面再详细地讨论;
    2. 调用ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,  cpu_to_node(*this_cpu))从对应的slab中取出来填充缓存;
    3. 检查slab的位置是不是应该重新放移过链表了。
  7. 如果cache_grow()都无法解决问题,那么就得retry了。
  8. return ac->entry[--ac->avail]来返回一个对象。
过程还是比较简单的,具体的代码如下:
static void * cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, int *this_cpu)
{
int batchcount;
struct kmem_list3 *l3;
struct array_cache *ac;
int node;

retry:
check_irq_off();
node
= numa_node_id();
ac
= cpu_cache_get(cachep, *this_cpu);
batchcount
= ac->batchcount;
if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
batchcount
= BATCHREFILL_LIMIT;
}
l3
= cachep->nodelists[cpu_to_node(*this_cpu)];

BUG_ON(ac
->avail > 0 || !l3);
spin_lock(
&l3->list_lock);

if (l3->shared && transfer_objects(ac, l3->shared, batchcount))
goto alloc_done;

while (batchcount > 0) {
struct list_head *entry;
struct slab *slabp;
entry
= l3->slabs_partial.next;
if (entry == &l3->slabs_partial) {
l3
->free_touched = 1;
entry
= l3->slabs_free.next;
if (entry == &l3->slabs_free)
goto must_grow;
}
slabp
= list_entry(entry, struct slab, list);
check_slabp(cachep, slabp);
check_spinlock_acquired_node(cachep, cpu_to_node(
*this_cpu));
BUG_ON(slabp
->inuse < 0 || slabp->inuse >= cachep->num);
while (slabp->inuse < cachep->num && batchcount--) {
STATS_INC_ALLOCED(cachep);
STATS_INC_ACTIVE(cachep);
STATS_SET_HIGH(cachep);
ac
->entry[ac->avail++] = slab_get_obj(cachep, slabp, cpu_to_node(*this_cpu));
}
check_slabp(cachep, slabp);

list_del(
&slabp->list);
if (slabp->free == BUFCTL_END)
list_add(
&slabp->list, &l3->slabs_full);
else
list_add(
&slabp->list, &l3->slabs_partial);
}
must_grow:
l3
->free_objects -= ac->avail;
alloc_done:
spin_unlock(
&l3->list_lock);
if (unlikely(!ac->avail)) {
int x;
x
= cache_grow(cachep, flags | GFP_THISNODE, cpu_to_node(*this_cpu), NULL, this_cpu);
ac
= cpu_cache_get(cachep, *this_cpu);
if (!x && ac->avail == 0) /* no objects in sight? abort */
return NULL;
if (!ac->avail) /* objects refilled by interrupt? */
goto retry;
}
ac
->touched = 1;
return ac->entry[--ac->avail];
}

slabs_partial、slabs_free中没有需要的slab的时候就需要调用cache_grow()函数来补充了:

  1. 前面有一大段和上面是相同的,然后计算color offset;
  2. kmem_flagcheck(cachep, flags)检查标志;
  3. 通过kmem_getpages(cachep, local_flags, nodeid)调用伙伴系统分配需要的页;
  4. 用alloc_slabmgmt(cachep, objp, offset, local_flags & ~GFP_CONSTRAINT_MASK, nodeid)分配slab需要的内存并设置;
  5. 调用slab_map_pages(cachep, slabp, objp)把pages映射到给定的cache和slab;
  6. cache_init_objs(cachep, slabp)初始化objects;
  7. 用list_add_tail(&slabp->list, &(l3->slabs_free))把刚申请的slab加到slabs_free中;
  8. 如果成功return 1,否则return 0。
整体的过程还是很清楚的,不过细节还是要再仔细看看,下面是具体的代码:
static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid, void *objp, int *this_cpu)
{
struct slab *slabp;
size_t offset;
gfp_t local_flags;
struct kmem_list3 *l3;
BUG_ON(flags
& GFP_SLAB_BUG_MASK);
local_flags
= flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
check_irq_off();
l3
= cachep->nodelists[nodeid];
spin_lock(
&l3->list_lock);
offset
= l3->colour_next;
l3
->colour_next++;
if (l3->colour_next >= cachep->colour)
l3
->colour_next = 0;
spin_unlock(
&l3->list_lock);
offset
*= cachep->colour_off;
if (local_flags & __GFP_WAIT)
slab_irq_enable_nort(
*this_cpu);
slab_irq_enable_rt(
*this_cpu);
kmem_flagcheck(cachep, flags);
if (!objp)
objp
= kmem_getpages(cachep, local_flags, nodeid);
if (!objp)
goto failed;
slabp
= alloc_slabmgmt(cachep, objp, offset, local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
if (!slabp)
goto opps1;
slab_map_pages(cachep, slabp, objp);
cache_init_objs(cachep, slabp);
slab_irq_disable_rt(
*this_cpu);
if (local_flags & __GFP_WAIT)
slab_irq_disable_nort(
*this_cpu);
check_irq_off();
spin_lock(
&l3->list_lock);
list_add_tail(
&slabp->list, &(l3->slabs_free));
STATS_INC_GROWN(cachep);
l3
->free_objects += cachep->num;
spin_unlock(
&l3->list_lock);
return 1;
opps1:
kmem_freepages(cachep, objp);
failed:
slab_irq_disable_rt(
*this_cpu);
if (local_flags & __GFP_WAIT)
slab_irq_disable_nort(
*this_cpu);
return 0;
}

这个如果画一个图出来就很清楚了。

------------------------

个人理解,欢迎拍砖。

posted @ 2011-08-12 13:30  GG大婶  阅读(1621)  评论(0编辑  收藏  举报