glibc 2.31 malloc与free 源码分析（持续更新）

前言

本文以《glibc 内存管理 ptmalooc 源码代分析》为参考对 malloc 与 free 的过程进行分析，分析中对内存分配管理的基础知识不再做介绍，适合具有一定基础的读者。由于书中 glibc 的版本过老，glibc 2.31 源码相比有许多不同之处，这里我结合自己对源码的理解给出注释，如果错误之处还请师傅们指正。
对于 malloc 过程，以 __libc_malloc 函数为入口开始分析，对于 free 过程，以 __libc_free 函数为入口分析。分析过程中遇到的函数和结构体，都会在后文给出源码并进行分析，大家 ctrl + f 即可。
其中 _int_malloc() _int_free() 两个函数是核心部分，会重点分析。

函数

__libc_malloc (size_t bytes)

void * __libc_malloc (size_t bytes)
{
  /* mstate类型对应的结构体是 malloc_state */
  mstate ar_ptr;
  void *victim;

  _Static_assert (PTRDIFF_MAX <= SIZE_MAX / 2,
                  "PTRDIFF_MAX is not more than half of SIZE_MAX");
  
  /* 如果存在__malloc_hook，则调用 hook 函数 */
  void *(*hook) (size_t, const void *)
    = atomic_forced_read (__malloc_hook);
  if (__builtin_expect (hook != NULL, 0))
    return (*hook)(bytes, RETURN_ADDRESS (0));

/* 使用 tcache 机制的情况  */
#if USE_TCACHE
  /* int_free also calls request2size, be careful to not pad twice.  */
  size_t tbytes;

  /* 判断请求分配字节的大小，在 64 位的情况下，bytes 不能大于 0x7fffffffffffffff；*/
  /* 在 32 位的情况下，bytes 不能超过 0x7fffffff。函数中也会调用 request2size 来 */ 
  /* 计算 bytes 数据需要分配的内存大小，当 bytes 数据的大小比最小 chunk 要还小时，*/
  /* 按最小 chunk 的大小分配；当 bytes 数据的大小比最小 chunk 大时，则分配满足内存 */
  /* 对齐要求的最小大小。将分配的大小赋值给 tbytes 返回。 */
  if (!checked_request2size (bytes, &tbytes))
    {
      __set_errno (ENOMEM);
      return NULL;
    }

  /* 计算 tbytes 大小所对应的 tcache 下标 */
  size_t tc_idx = csize2tidx (tbytes);
  
  /* 如果 tcache 还没有被创建，则调用 tcache_init() 初始化 tcache */
  MAYBE_INIT_TCACHE ();

  DIAG_PUSH_NEEDS_COMMENT;
  
  /* 这里的 mp_ 是 malloc_par 结构 */
  /* 判断 idx 是否在 tcache bins 的范围内 */
  /* 判断 tcache 是否存在 */
  /* 判断 idx 对应的 tcache bins 中是否有空闲 tcache chunk */
  if (tc_idx < mp_.tcache_bins
      && tcache
      && tcache->counts[tc_idx] > 0)
    {
      return tcache_get(tc_idx);  /* 获得对应大小的 chunk */
    }
  DIAG_POP_NEEDS_COMMENT;
#endif
  
  /* 没有启用多线程的情况 */
  if (SINGLE_THREAD_P)
    {
      victim = _int_malloc (&main_arena, bytes);  /* 调用 _int_malloc 函数分配内存 */
      /* 这里为什么是 “!victim” 我也没有理解，后面两个分别是*/
      /* 当前 chunk 是从 mmap 分配的或当前 chunk 是从主分配区分配的 */
      assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
	      &main_arena == arena_for_chunk (mem2chunk (victim)));  
      return victim;  /* 将成功分配的内存指针返回 */
    }

  /* 启用多线程的情况 */
  arena_get (ar_ptr, bytes); /* 获取分配区 */

  victim = _int_malloc (ar_ptr, bytes);  /* 同上 */
  /* Retry with another arena only if we were able to find a usable arena
     before.  */
  /* 如果成功获取分配区，但是分配内存失败，可能是 mmap 区域的内存耗尽等多种原因 */
  /* 不同的原因有不同的解决方法，比如更换分配区等等 */
  /* 所以这里重新进行了获取分配区和分配内存操作，确保内存分配成功 */
  if (!victim && ar_ptr != NULL)
    {
      LIBC_PROBE (memory_malloc_retry, 1, bytes);
      ar_ptr = arena_get_retry (ar_ptr, bytes);
      victim = _int_malloc (ar_ptr, bytes);
    }
  
  /* 这里是释放线程锁，不用管它 */
  if (ar_ptr != NULL)
    __libc_lock_unlock (ar_ptr->mutex);

  assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
          ar_ptr == arena_for_chunk (mem2chunk (victim)));
  return victim; 
}

tcache_init(viod)

static void tcache_init(void)
{
  mstate ar_ptr;
  void *victim = 0;

  /* 计算 tcahce_perthread_struct 结构大小 */
  const size_t bytes = sizeof (tcache_perthread_struct);  

  /* 判断 tcache 是否被禁用，禁用情况下直接返回 */
  if (tcache_shutting_down)
    return;

  /* 这部分代码整体看就是给 tcache 分配相应的内存，*/
  /* 这里涉及到分配区和多线程锁控制等操作，比较复杂，就不再详细描述*/
  arena_get (ar_ptr, bytes);
  victim = _int_malloc (ar_ptr, bytes);
  if (!victim && ar_ptr != NULL)
    {
      ar_ptr = arena_get_retry (ar_ptr, bytes);
      victim = _int_malloc (ar_ptr, bytes);
    }


  if (ar_ptr != NULL)
    __libc_lock_unlock (ar_ptr->mutex);

  /* In a low memory situation, we may not be able to allocate memory
     - in which case, we just keep trying later.  However, we
     typically do this very early, so either there is sufficient
     memory, or there isn't enough memory to do non-trivial
     allocations anyway.  */
  /* tcache 分配好后，将 tcache 处的内存初始化为 0 */
  if (victim)
    {
      tcache = (tcache_perthread_struct *) victim;
      memset (tcache, 0, sizeof (tcache_perthread_struct));
    }

}

tcache_thread_shutdown (void)

static void tcache_thread_shutdown (void)  
{
  int i;
  tcache_perthread_struct *tcache_tmp = tcache;

  /* tcahce 不存在的情况下直接返回 */
  if (!tcache)
    return;

  /* Disable the tcache and prevent it from being reinitialized.  */
  /* 禁用tcache，防止它被重新初始化 */
  tcache = NULL;
  tcache_shutting_down = true;  /* tcache_shutting_down 的值默认值为 faluse */

  /* Free all of the entries and the tcache itself back to the arena
     heap for coalescing.  */
  /* 释放所有的 tcache ，以便进行合并 */
  /* 外层 for 循环遍历 tcache 指针数组，数组的每个下标对应不同大小的 tcache */
  for (i = 0; i < TCACHE_MAX_BINS; ++i)
    {
      /* 内层 while 循环遍历整个 tcache 点链表，也就是相同大小的 tcache */
      while (tcache_tmp->entries[i])
	{
          /* 依次释放链表上的 tcache */
	  tcache_entry *e = tcache_tmp->entries[i];
	  tcache_tmp->entries[i] = e->next;
	  __libc_free (e);
	}
    }
  /* 将管理 tcache 的结构体也释放掉 */
  __libc_free (tcache_tmp);
}

tcache_get (size_t tc_idx)

static __always_inline void *tcache_get (size_t tc_idx)
{
  tcache_entry *e = tcache->entries[tc_idx];  /* 将 idx 对应的 tcache bins 中的第一个 chunk 取出*/
  tcache->entries[tc_idx] = e->next;  /* 将取出 chunk 的下一个 chunk 做为 tcache bins 中的第一个 chunk */
  --(tcache->counts[tc_idx]);  /* idx 对应的 tcache bins 中的 chunk 数量减1 */
  e->key = NULL;
  return (void *) e;
}

_int_malloc (mstate av, size_t bytes)

static void *
_int_malloc (mstate av, size_t bytes)
{
  INTERNAL_SIZE_T nb;               /* normalized request size */
  unsigned int idx;                 /* associated bin index */
  mbinptr bin;                      /* associated bin *//* mbinptr 和 mchunkptr 类型都是 malloc_chunk 结构体 */

  mchunkptr victim;                 /* inspected/selected chunk */
  INTERNAL_SIZE_T size;             /* its size */
  int victim_index;                 /* its bin index */

  mchunkptr remainder;              /* remainder from a split */
  unsigned long remainder_size;     /* its size */

  unsigned int block;               /* bit map traverser */
  unsigned int bit;                 /* bit map traverser */
  unsigned int map;                 /* current word of binmap */

  mchunkptr fwd;                    /* misc temp for linking */
  mchunkptr bck;                    /* misc temp for linking */

#if USE_TCACHE
  size_t tcache_unsorted_count;	    /* count of unsorted chunks processed */
#endif

  /*
     Convert request size to internal form by adding SIZE_SZ bytes
     overhead plus possibly more to obtain necessary alignment and/or
     to obtain a size of at least MINSIZE, the smallest allocatable
     size. Also, checked_request2size returns false for request sizes
     that are so large that they wrap around zero when padded and
     aligned.
   */

  if (!checked_request2size (bytes, &nb))
    {
      __set_errno (ENOMEM);
      return NULL;
    }

  /* There are no usable arenas.  Fall back to sysmalloc to get a chunk from
     mmap.  */
  /* 如果没有可用的分配区，则调用 sysmalloc 获取 chunk */
  if (__glibc_unlikely (av == NULL))
    {
      void *p = sysmalloc (nb, av);
      if (p != NULL)
	alloc_perturb (p, bytes);
      return p;
    }

  /*
     If the size qualifies as a fastbin, first check corresponding bin.
     This code is safe to execute even if av is not yet initialized, so we
     can try it without checking, which saves some time on this fast path.
   */
    /* 从 fast bins 中分配一个 chunk 相当简单，首先根据所需 chunk 的大小获得 */
    /* 该 chunk 所属 fast bin 的 index，根据该 index 获得所需 fast bin 的空 */  
    /* 闲 chunk 链表的头指针，然后将头指针的下一个 chunk 作为空闲 chunk 链表的头部。 */
    /* 如果使用了 tcache 机制且 fast bin 链表中还有 chunk，则将 chunk 放入 tcache bin 中。*/
    /* 为了加快从 fast bins 中分配 chunk，处于 fast bins 中 chunk 的状态仍然保持为 inuse 状态，*/
    /* 避免被相邻的空闲 chunk 合并，从 fast bins 中分配 chunk，只需取出第一个 chunk，*/
    /* 并调用 chunk2mem() 函数返回用户所需的内存块 */

#define REMOVE_FB(fb, victim, pp)			\
  do							\
    {							\
      victim = pp;					\
      if (victim == NULL)				\
	break;						\
    }							\
  while ((pp = catomic_compare_and_exchange_val_acq (fb, victim->fd, victim)) \
	 != victim);					\

  /* 所需的 chunk 大小小于等于 fast bins 中的最大 chunk 大小，*/
  /* 首先尝试从 fast bins 中分配 chunk */
  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
    {
      idx = fastbin_index (nb);  /* 获得与所需内存分配大小的一致的 fast bin 的下标 */
      mfastbinptr *fb = &fastbin (av, idx);  /* 获得对应 fast bin 中第一个 chunk */
      mchunkptr pp;  
      victim = *fb;  

      if (victim != NULL)
	{
	  if (SINGLE_THREAD_P)
            /* 不开启优化算法 */
	    *fb = victim->fd;  /* 获得对应 fast bin 中第二个 chunk */
	  else
            /* 开启优化算法 */
	    REMOVE_FB (fb, pp, victim);  /* 原子操作优化算法我们不管它 */
	  if (__glibc_likely (victim != NULL))
	    {
	      size_t victim_idx = fastbin_index (chunksize (victim));  /* 计算之前第一个获得的 chunk 对应的 fast bin 下标 */
              
              /* 这里进行了检测，保证用所需分配内存大小取出的 chunk 计算出的 */ 
              /* idx 与使用第一个获得的 chunk 计算出的 idx 要相等 */
	      if (__builtin_expect (victim_idx != idx, 0))  
		malloc_printerr ("malloc(): memory corruption (fast)");
	      check_remalloced_chunk (av, victim, nb);
#if USE_TCACHE
              /* 使用 tcache 机制的情况 */
	      /* While we're here, if we see other chunks of the same size,
		 stash them in the tcache.  */
	      size_t tc_idx = csize2tidx (nb);  /* 获得与所需分配内存大小一致的 tcache bin 的下标 */
	      if (tcache && tc_idx < mp_.tcache_bins)  /* tcache 存在且下标值在最大范围内 */
		{
		  mchunkptr tc_victim;

		  /* While bin not empty and tcache not full, copy chunks.  */
		  while (tcache->counts[tc_idx] < mp_.tcache_count
			 && (tc_victim = *fb) != NULL)  /* 当对应的 fast bin 中存在 chunk 且 tcache bin 数量没满时 */
		    {
		      if (SINGLE_THREAD_P)  /* 不使用优化算法 */
			*fb = tc_victim->fd;
		      else
			{
			  REMOVE_FB (fb, pp, tc_victim);  /* 使用优化算法 */
			  if (__glibc_unlikely (tc_victim == NULL))
			    break;
			}
		      tcache_put (tc_victim, tc_idx);  /* 将 fast bin 中的 chunk 插入 tcache bin 中 */
		    }
		}
#endif
	      void *p = chunk2mem (victim);  /* 转化为用户指针 */
	      alloc_perturb (p, bytes);  /* 内存内容初始化 */
	      return p;
	    }
	}
    }

  /*
     If a small request, check regular bin.  Since these "smallbins"
     hold one size each, no searching within bins is necessary.
     (For a large request, we need to wait until unsorted chunks are
     processed to find best fit. But for small ones, fits are exact
     anyway, so we can check now, which is faster.)

  /* 如果分配的 chunk 属于 small bin，首先查找 chunk 所对应 small bins 数组的 index，然后  */
  /* 根据 index 获得某个 small bin 的空闲 chunk 双向循环链表表头，然后将最后一个 chunk 赋值 */
  /* 给 victim，如果 victim 与表头相同，表示该链表为空，不能从 small bin 的空闲 chunk 链表中 */
  /* 分配，这里不处理，等后面的步骤来处理。如果 victim 与表头不同，将 victim 从 small bin 的 */
  /* 双向循环链表中取出，设置 victim chunk 的 inuse 标志，该标志处于 victim chunk 的下一个相 */
` /* 邻 chunk 的 size 字段的第一个 bit。接着判断当前分配区是否为非主分配区，如果是，将 victim */
  /* chunk 的 size 字段中的表示非主分配区的标志 bit 置 1。如果启用了 tcache 机制，还需要将small */
  /* bin 中与 victim chunk 相同大小的其他 chunk 放入 tcache 中最后调用 chunk2mem()函数获得 chunk */
  /* 的实际可用的内存指针，将该内存指针返回给应用层。到此从 small bins 中分配 chunk 的工作完成了，*/
  /* 但我们看到，当对应的 small bin 中没有空闲 chunk，并没有获取到 chunk，需要后面的步骤来处理 */
  if (in_smallbin_range (nb))
    {
      idx = smallbin_index (nb);  /* 获得对应 smallbin 的下标 */
      bin = bin_at (av, idx);  /* 获得对应的 chunk 指针 */

      if ((victim = last (bin)) != bin)  /* 如果 bin 链表不为空 */
        {
          bck = victim->bk;  
          
          /* victim 后一个 chunk 的 fd 指针如果指向的不是 victim 就报错 */
	  if (__glibc_unlikely (bck->fd != victim))  
	    malloc_printerr ("malloc(): smallbin double linked list corrupted");
          set_inuse_bit_at_offset (victim, nb);  /* 设置 inuse 标志 */

          /* 将 victim 从双向链表中移除 */
          bin->bk = bck;
          bck->fd = bin;

          if (av != &main_arena)
	    set_non_main_arena (victim);  /* 设置标志 */
          check_malloced_chunk (av, victim, nb);
#if USE_TCACHE
	  /* While we're here, if we see other chunks of the same size,
	     stash them in the tcache.  */
	  size_t tc_idx = csize2tidx (nb);  /* 计算 idx */
	  if (tcache && tc_idx < mp_.tcache_bins)  /* 如果 tcache 存在且 idx 没有超出范围 */
	    {
	      mchunkptr tc_victim;

	      /* While bin not empty and tcache not full, copy chunks over.  */
	      while (tcache->counts[tc_idx] < mp_.tcache_count
		     && (tc_victim = last (bin)) != bin)  /* 当 tcache 未满且 small bin 中还有 chunk */
		{
		  if (tc_victim != 0)
                  /* 将 chunk 中 small bin 中取出，设置标志位，并放入 tcache 中 */
		    {
		      bck = tc_victim->bk;
		      set_inuse_bit_at_offset (tc_victim, nb);
		      if (av != &main_arena)
			set_non_main_arena (tc_victim);
		      bin->bk = bck;
		      bck->fd = bin;

		      tcache_put (tc_victim, tc_idx);
	            }
		}
	    }
#endif
          void *p = chunk2mem (victim);  /* chunk 指针转为内存指针 */  
          alloc_perturb (p, bytes);
          return p;  /* 将内存指针返回 */
        }
    }

  /*
     If this is a large request, consolidate fastbins before continuing.
     While it might look excessive to kill all fastbins before
     even seeing if there is space available, this avoids
     fragmentation problems normally associated with fastbins.
     Also, in practice, programs tend to have runs of either small or
     large requests, but less often mixtures, so consolidation is not
     invoked all that often in most programs. And the programs that
     it is called frequently in otherwise tend to fragment.
   */

  else  /* 所需 chunk 属于 large bin */
    {
      idx = largebin_index (nb);
      if (atomic_load_relaxed (&av->have_fastchunks)) /* 判断当前分配区的 fast bin 中是否包含 chunk */
        malloc_consolidate (av);  /* 调用 malloc_consolidate() 函数合并 fast bin 中 的 chunk */
    }

  /*
     Process recently freed or remaindered chunks, taking one only if
     it is exact fit, or, if this a small request, the chunk is remainder from
     the most recent non-exact fit.  Place other traversed chunks in
     bins.  Note that this step is the only place in any routine where
     chunks are placed in bins.

     The outer loop here is needed because we might not realize until
     near the end of malloc that we should have consolidated, so must
     do so and retry. This happens at most once, and only when we would
     otherwise need to expand memory to service a "small" request.
   */

#if USE_TCACHE  /* 使用 tcache 机制的情况 */
  INTERNAL_SIZE_T tcache_nb = 0;
  size_t tc_idx = csize2tidx (nb);  /* 计算下标 */
  if (tcache && tc_idx < mp_.tcache_bins)  /* tcache 存在且下标 idx 在范围内 */
    tcache_nb = nb;
  int return_cached = 0;

  tcache_unsorted_count = 0;
#endif

  for (;; )
    {
      int iters = 0;

      /* 反向遍历 unsorted bin 的双向循环链表，遍历结束的条件是循环链表中只剩下一个头结点 */
      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
        {
          bck = victim->bk;
          size = chunksize (victim);  /* 计算 size */
          mchunkptr next = chunk_at_offset (victim, size);  /* 获得指向内存空间中当前 chunk 的下一个chunk 的指针 */

          /* 检查当前遍历的 chunk 是否合法，chunk 的大小不能小于等于 */
          /* 2 * SIZE_SZ，也不能超过该分配区总的内存分配量 */
          if (__glibc_unlikely (size <= 2 * SIZE_SZ)
              || __glibc_unlikely (size > av->system_mem))
            malloc_printerr ("malloc(): invalid size (unsorted)");
          
          /* 对 next chunk 也进行一样的检查 */
          if (__glibc_unlikely (chunksize_nomask (next) < 2 * SIZE_SZ)
              || __glibc_unlikely (chunksize_nomask (next) > av->system_mem))
            malloc_printerr ("malloc(): invalid next size (unsorted)");

          /* 如果 next chunk 中记录前一个 chunk 大小的 prev_size 与 size 不符，则报错 */
          if (__glibc_unlikely ((prev_size (next) & ~(SIZE_BITS)) != size))
            malloc_printerr ("malloc(): mismatching next->prev_size (unsorted)");

          /* 如果 victim 中 bk 指针指向的 chunk 的 fd 指针不是指向 victim */
          /* 或者 victim 的 fd 指针不是指向 bin 头结点，则报错 */
          if (__glibc_unlikely (bck->fd != victim)
              || __glibc_unlikely (victim->fd != unsorted_chunks (av)))
            malloc_printerr ("malloc(): unsorted double linked list corrupted");
          
          /* 如果 next chunk 中的显示前一个 chunk 是否正在使用的标志位为1，*/
          /* 即前一个 chunk 正在使用，则报错 */
          if (__glibc_unlikely (prev_inuse (next)))
            malloc_printerr ("malloc(): invalid next->prev_inuse (unsorted)");

          /*
             If a small request, try to use last remainder if it is the
             only chunk in unsorted bin.  This helps promote locality for
             runs of consecutive small requests. This is the only
             exception to best-fit, and applies only when there is
             no exact fit for a small chunk.
           */

          /* 如果需要分配一个 small bin chunk，在之前的 small bins 中没有匹配到合适的 */
          /* chunk，并且 unsorted bin 中只有一个 chunk，并且这个 chunk 为 last remainder chunk，*/
          /* 并且这个 chunk 的大小大于所需 chunk 的大小加上 MINSIZE，在满足这些条件的情况下，可以使 */
          /* 用这个chunk切分出需要的small bin chunk。这是唯一的从unsorted bin中分配small bin chunk */
          /* 的情况，这种优化利于 cpu 的高速缓存命中 */
          if (in_smallbin_range (nb) &&
              bck == unsorted_chunks (av) &&
              victim == av->last_remainder &&
              (unsigned long) (size) > (unsigned long) (nb + MINSIZE))
            {
              /* split and reattach remainder */
              /* 从该 chunk 中切分出所需大小的 chunk，计算切分后剩下 chunk 的大小，*/
              /* 将剩下的 chunk 加入 unsorted bin 的链表中，并将剩下的 chunk 作为 */
              /* 分配区的 last remainder chunk，若剩下的 chunk 属于 large bin chunk，*/
              /* 将该 chunk 的 fd_nextsize 和 bk_nextsize 设置为 NULL，因为这个 chunk */
              /* 仅仅存在于 unsorted bin 中，并且 unsorted bin 中有且仅有这一个 chunk */
              remainder_size = size - nb;
              remainder = chunk_at_offset (victim, nb);
              unsorted_chunks (av)->bk = unsorted_chunks (av)->fd = remainder;
              av->last_remainder = remainder;
              remainder->bk = remainder->fd = unsorted_chunks (av);
              if (!in_smallbin_range (remainder_size))
                {
                  remainder->fd_nextsize = NULL;
                  remainder->bk_nextsize = NULL;
                }

              /* 设置分配出的 chunk 和 last remainder chunk 的相关信息，如 chunk 的 size，*/
              /* 状态标志位，对于 last remainder chunk，需要调用 set_foot 宏，因为只有处于 */
              /* 空闲状态的 chunk 的 foot 信息（prev_size）才是有效的，处于 inuse 状态的 chunk */
              /* 的 foot 无效，该 foot 是返回给应用层的内存块的一部分。设置完成 chunk 的相关信息，*/
              /* 调用 chunk2mem()获得 chunk 中可用的内存指针，返回给应用层，退出*/
              set_head (victim, nb | PREV_INUSE |
                        (av != &main_arena ? NON_MAIN_ARENA : 0));
              set_head (remainder, remainder_size | PREV_INUSE);
              set_foot (remainder, remainder_size);

              check_malloced_chunk (av, victim, nb);
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;
            }

          /* remove from unsorted list */
          if (__glibc_unlikely (bck->fd != victim))  /* 这里再次检查了 bck—>fd == victim */
            malloc_printerr ("malloc(): corrupted unsorted chunks 3");

          /* 将双向循环链表中的最后一个 chunk 移除 */
          unsorted_chunks (av)->bk = bck;
          bck->fd = unsorted_chunks (av);

          /* Take now instead of binning if exact fit */
          /* 如果当前遍历的 chunk 与所需的 chunk 大小一致，将当前 chunk 返回。首先设置当前 */
          /* chunk 处于 inuse 状态，该标志位处于相邻的下一个 chunk 的 size 中，如果当前分配区不是 */
          /* 主分配区，设置当前 chunk 的非主分配区标志位。如果使用了 tcache 机制，先将 chunk 放入 */
          /* tcache 中，待后续处理，否则调用 chunk2mem()获得 chunk 中可用的内存指针，返回给应用层，退出 */
          if (size == nb)  /* 如果当前遍历的 chunk 与所需的 chunk 大小一致 */
            {
              /* 设置标志 */
              set_inuse_bit_at_offset (victim, size);
              if (av != &main_arena)
		set_non_main_arena (victim);
#if USE_TCACHE  /* 使用 tcache 机制情况 */
	      /* Fill cache first, return to user only if cache fills.
		 We may return one of these chunks later.  */
	      if (tcache_nb
		  && tcache->counts[tc_idx] < mp_.tcache_count)  /* tcache 未满 */
		{
		  tcache_put (victim, tc_idx);  /* 将 victim 放入 tcache 中 */
		  return_cached = 1;
		  continue;
		}
	      else
		{
#endif
              check_malloced_chunk (av, victim, nb);
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;  /* 返回内存指针 */
#if USE_TCACHE
		}
#endif
            }

          /* place chunk in bin */
          /* 如果当前 chunk 属于 small bins，获得当前 chunk 所属 small bin 的 index，*/
          /* 并将该 small bin 的链表表头赋值给 bck，第一个 chunk 赋值给 fwd，之后会把当前的 */
          /* chunk 会插入到 bck 和 fwd 之间，作为 small bin 链表的第一个 chunk */
          if (in_smallbin_range (size))
            {
              victim_index = smallbin_index (size);
              bck = bin_at (av, victim_index);
              fwd = bck->fd;
            }
          else

          /* 如果当前 chunk 属于 large bins，获得当前 chunk 所属 large bin 的 index，*/
          /* 并将该 large bin 的链表表头赋值给 bck，第一个 chunk 赋值给 fwd，之后会把当前的 */
          /* chunk 会插入到 bck 和 fwd 之间，作为 large bin 链表的第一个 chunk */
            {
              victim_index = largebin_index (size);
              bck = bin_at (av, victim_index);
              fwd = bck->fd;

              /* maintain large bins in sorted order */
              /* 如果 fwd 不等于 bck，意味着 large bin 中有空闲 chunk存在，由于 large bin 中 */
              /* 的空闲 chunk是按照大小顺序排序的，需要将当前从 unsorted bin 中取出的 chunk 插 */
              /* 入到 large bin 中合适的位置。将当前 chunk 的 size 的 inuse 标志 bit 置位，相 */
              /* 当于加 1，便于加快 chunk 大小的比较，找到合适的地方插入当前 chunk。这里还做了一 */
              /* 次检查，断言在 large bin 双向循环链表中的最后一个 chunk 的 size 字段中的非主分 */
              /* 配区的标志 bit 没有置位，因为所有在 large bin中的 chunk 都处于空闲状态，该标志位一定是清零的 */
              if (fwd != bck)
                {
                  /* Or with inuse bit to speed comparisons */
                  size |= PREV_INUSE;
                  /* if smaller than smallest, bypass loop below */
                  assert (chunk_main_arena (bck->bk));

                  /* 如果当前chunk比large bin的最后一个chunk的大小还小，那么当前chunk就插入到large */
                  /* bin 的链表的最后，作为最后一个 chunk。可以看出 large bin 中的 chunk 是按照从大到小的 */
                  /* 顺序排序的，同时一个 chunk 存在于两个双向循环链表中，一个链表包含了 large bin 中所有 */
                  /* 的 chunk，另一个链表为 chunk size 链表，该链表从每个相同大小的 chunk 的取出第一个 chunk */
                  /* 按照大小顺序链接在一起，便于一次跨域多个相同大小的 chunk 遍历下一个不同大小的 */
                  /* chunk，这样可以加快在 large bin 链表中的遍历速度 */
                  if ((unsigned long) (size)
		      < (unsigned long) chunksize_nomask (bck->bk))
                    {
                      fwd = bck;
                      bck = bck->bk;

                      victim->fd_nextsize = fwd->fd;
                      victim->bk_nextsize = fwd->fd->bk_nextsize;
                      fwd->fd->bk_nextsize = victim->bk_nextsize->fd_nextsize = victim;
                    }
                  else
                    {
                      assert (chunk_main_arena (fwd));
                      
                     /* 正向遍历 chunk size 链表，直到找到第一个 chunk 大小小于 */
                     /* 等于当前 chunk 大小的 chunk退出循环 */
                      while ((unsigned long) size < chunksize_nomask (fwd))
                        {
                          fwd = fwd->fd_nextsize;
			  assert (chunk_main_arena (fwd));
                        }
                      
                      /* 如果从 large bin 链表中找到了与当前 chunk 大小相同的 chunk，*/
                      /* 则同一大小的 chunk 已经存在，那么 chunk size 链表中一定包含了 fwd */
                      /* 所指向的 chunk，为了不修改 chunk size 链表，当前 chunk 只能插入 fwd 之后 */
                      if ((unsigned long) size
			  == (unsigned long) chunksize_nomask (fwd))
                        /* Always insert in the second position.  */
                        fwd = fwd->fd;
                        
                        /* 如果 chunk size 链表中还没有包含当前 chunk 大小的 chunk，也就是说当前 chunk 的大 */
                        /* 小大于 fwd 的大小，则将当前 chunk 作为该 chunk size 的代表加入 chunk size 链表，*/
                        /* chunk size 链表也是按照由大到小的顺序排序 */
                        else
                        {
                          victim->fd_nextsize = fwd;
                          victim->bk_nextsize = fwd->bk_nextsize;
                          if (__glibc_unlikely (fwd->bk_nextsize->fd_nextsize != fwd))
                            malloc_printerr ("malloc(): largebin double linked list corrupted (nextsize)");
                          fwd->bk_nextsize = victim;
                          victim->bk_nextsize->fd_nextsize = victim;
                        }
                      bck = fwd->bk;
                      if (bck->fd != fwd)
                        malloc_printerr ("malloc(): largebin double linked list corrupted (bk)");
                    }
                }
          
              /* 如果 large bin 链表中没有 chunk，直接将当前 chunk 加入 chunk size 链表 */
              else
                victim->fd_nextsize = victim->bk_nextsize = victim;
            }

          mark_bin (av, victim_index);
          victim->bk = bck;
          victim->fd = fwd;
          fwd->bk = victim;
          bck->fd = victim;

#if USE_TCACHE
      /* If we've processed as many chunks as we're allowed while
	 filling the cache, return one of the cached ones.  */
      ++tcache_unsorted_count;
      if (return_cached
	  && mp_.tcache_unsorted_limit > 0
	  && tcache_unsorted_count > mp_.tcache_unsorted_limit)
	{
	  return tcache_get (tc_idx);
	}
#endif

#define MAX_ITERS       10000
          if (++iters >= MAX_ITERS)
            break;
        }

#if USE_TCACHE
      /* If all the small chunks we found ended up cached, return one now.  */
      if (return_cached)
	{
	  return tcache_get (tc_idx);
	}
#endif

      /*
         If a large request, scan through the chunks of current bin in
         sorted order to find smallest that fits.  Use the skip list for this.
       */

      if (!in_smallbin_range (nb))
        {
          bin = bin_at (av, idx);

          /* skip scan if empty or largest chunk is too small */
          if ((victim = first (bin)) != bin
	      && (unsigned long) chunksize_nomask (victim)
	        >= (unsigned long) (nb))
            {
              victim = victim->bk_nextsize;
              while (((unsigned long) (size = chunksize (victim)) <
                      (unsigned long) (nb)))
                victim = victim->bk_nextsize;

              /* Avoid removing the first entry for a size so that the skip
                 list does not have to be rerouted.  */
              if (victim != last (bin)
		  && chunksize_nomask (victim)
		    == chunksize_nomask (victim->fd))
                victim = victim->fd;

              remainder_size = size - nb;
              unlink_chunk (av, victim);

              /* Exhaust */
              if (remainder_size < MINSIZE)
                {
                  set_inuse_bit_at_offset (victim, size);
                  if (av != &main_arena)
		    set_non_main_arena (victim);
                }
              /* Split */
              else
                {
                  remainder = chunk_at_offset (victim, nb);
                  /* We cannot assume the unsorted list is empty and therefore
                     have to perform a complete insert here.  */
                  bck = unsorted_chunks (av);
                  fwd = bck->fd;
		  if (__glibc_unlikely (fwd->bk != bck))
		    malloc_printerr ("malloc(): corrupted unsorted chunks");
                  remainder->bk = bck;
                  remainder->fd = fwd;
                  bck->fd = remainder;
                  fwd->bk = remainder;
                  if (!in_smallbin_range (remainder_size))
                    {
                      remainder->fd_nextsize = NULL;
                      remainder->bk_nextsize = NULL;
                    }
                  set_head (victim, nb | PREV_INUSE |
                            (av != &main_arena ? NON_MAIN_ARENA : 0));
                  set_head (remainder, remainder_size | PREV_INUSE);
                  set_foot (remainder, remainder_size);
                }
              check_malloced_chunk (av, victim, nb);
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;
            }
        }

      /*
         Search for a chunk by scanning bins, starting with next largest
         bin. This search is strictly by best-fit; i.e., the smallest
         (with ties going to approximately the least recently used) chunk
         that fits is selected.

         The bitmap avoids needing to check that most blocks are nonempty.
         The particular case of skipping all bins during warm-up phases
         when no chunks have been returned yet is faster than it might look.
       */

      ++idx;
      bin = bin_at (av, idx);
      block = idx2block (idx);
      map = av->binmap[block];
      bit = idx2bit (idx);

      for (;; )
        {
          /* Skip rest of block if there are no more set bits in this block.  */
          if (bit > map || bit == 0)
            {
              do
                {
                  if (++block >= BINMAPSIZE) /* out of bins */
                    goto use_top;
                }
              while ((map = av->binmap[block]) == 0);

              bin = bin_at (av, (block << BINMAPSHIFT));
              bit = 1;
            }

          /* Advance to bin with set bit. There must be one. */
          while ((bit & map) == 0)
            {
              bin = next_bin (bin);
              bit <<= 1;
              assert (bit != 0);
            }

          /* Inspect the bin. It is likely to be non-empty */
          victim = last (bin);

          /*  If a false alarm (empty bin), clear the bit. */
          if (victim == bin)
            {
              av->binmap[block] = map &= ~bit; /* Write through */
              bin = next_bin (bin);
              bit <<= 1;
            }

          else
            {
              size = chunksize (victim);

              /*  We know the first chunk in this bin is big enough to use. */
              assert ((unsigned long) (size) >= (unsigned long) (nb));

              remainder_size = size - nb;

              /* unlink */
              unlink_chunk (av, victim);

              /* Exhaust */
              if (remainder_size < MINSIZE)
                {
                  set_inuse_bit_at_offset (victim, size);
                  if (av != &main_arena)
		    set_non_main_arena (victim);
                }

              /* Split */
              else
                {
                  remainder = chunk_at_offset (victim, nb);

                  /* We cannot assume the unsorted list is empty and therefore
                     have to perform a complete insert here.  */
                  bck = unsorted_chunks (av);
                  fwd = bck->fd;
		  if (__glibc_unlikely (fwd->bk != bck))
		    malloc_printerr ("malloc(): corrupted unsorted chunks 2");
                  remainder->bk = bck;
                  remainder->fd = fwd;
                  bck->fd = remainder;
                  fwd->bk = remainder;

                  /* advertise as last remainder */
                  if (in_smallbin_range (nb))
                    av->last_remainder = remainder;
                  if (!in_smallbin_range (remainder_size))
                    {
                      remainder->fd_nextsize = NULL;
                      remainder->bk_nextsize = NULL;
                    }
                  set_head (victim, nb | PREV_INUSE |
                            (av != &main_arena ? NON_MAIN_ARENA : 0));
                  set_head (remainder, remainder_size | PREV_INUSE);
                  set_foot (remainder, remainder_size);
                }
              check_malloced_chunk (av, victim, nb);
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;
            }
        }

    use_top:
      /*
         If large enough, split off the chunk bordering the end of memory
         (held in av->top). Note that this is in accord with the best-fit
         search rule.  In effect, av->top is treated as larger (and thus
         less well fitting) than any other available chunk since it can
         be extended to be as large as necessary (up to system
         limitations).

         We require that av->top always exists (i.e., has size >=
         MINSIZE) after initialization, so if it would otherwise be
         exhausted by current request, it is replenished. (The main
         reason for ensuring it exists is that we may need MINSIZE space
         to put in fenceposts in sysmalloc.)
       */

      victim = av->top;
      size = chunksize (victim);

      if (__glibc_unlikely (size > av->system_mem))
        malloc_printerr ("malloc(): corrupted top size");

      if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
        {
          remainder_size = size - nb;
          remainder = chunk_at_offset (victim, nb);
          av->top = remainder;
          set_head (victim, nb | PREV_INUSE |
                    (av != &main_arena ? NON_MAIN_ARENA : 0));
          set_head (remainder, remainder_size | PREV_INUSE);

          check_malloced_chunk (av, victim, nb);
          void *p = chunk2mem (victim);
          alloc_perturb (p, bytes);
          return p;
        }

      /* When we are using atomic ops to free fast chunks we can get
         here for all block sizes.  */
      else if (atomic_load_relaxed (&av->have_fastchunks))
        {
          malloc_consolidate (av);
          /* restore original bin index */
          if (in_smallbin_range (nb))
            idx = smallbin_index (nb);
          else
            idx = largebin_index (nb);
        }

      /*
         Otherwise, relay to handle system-dependent cases
       */
      else
        {
          void *p = sysmalloc (nb, av);
          if (p != NULL)
            alloc_perturb (p, bytes);
          return p;
        }
    }
}

sysmalloc (INTERNAL_SIZE_T nb, mstate av)

static void *sysmalloc (INTERNAL_SIZE_T nb, mstate av)
{
  mchunkptr old_top;              /* incoming value of av->top */
  INTERNAL_SIZE_T old_size;       /* its size */
  char *old_end;                  /* its end address */

  long size;                      /* arg to first MORECORE or mmap call */
  char *brk;                      /* return value from MORECORE */

  long correction;                /* arg to 2nd MORECORE call */
  char *snd_brk;                  /* 2nd return val */

  INTERNAL_SIZE_T front_misalign; /* unusable bytes at front of new space */
  INTERNAL_SIZE_T end_misalign;   /* partial page left at end of new space */
  char *aligned_brk;              /* aligned offset into brk */

  mchunkptr p;                    /* the allocated/returned chunk */
  mchunkptr remainder;            /* remainder from allocation */
  unsigned long remainder_size;   /* its size */


  size_t pagesize = GLRO (dl_pagesize);
  bool tried_mmap = false;


  /*
     If have mmap, and the request size meets the mmap threshold, and
     the system supports mmap, and there are few enough currently
     allocated mmapped regions, try to directly map this request
     rather than expanding top.
   */

  /* 如果没有分配区，或者所需分配的 chunk 大小大于 mmap 分配阈值，默认为 128K，并且当前进程 */
  /* 使用 mmap() 分配的内存块小于设定的最大值，将使用 mmap() 系统调用直接向操作系统申请内存 */
  if (av == NULL
      || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
    {
      char *mm;           /* return value from mmap call*/

    try_mmap:
      /*
         Round up size to nearest page.  For mmapped chunks, the overhead
         is one SIZE_SZ unit larger than for normal chunks, because there
         is no following chunk whose prev_size field could be used.

         See the front_misalign handling below, for glibc there is no
         need for further alignments unless we have have high alignment.
       */
      /* 计算满足页对齐的最小分配内存大小 */
      if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
        size = ALIGN_UP (nb + SIZE_SZ, pagesize);
      else
        size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
      tried_mmap = true;

      /* Don't try if size wraps around 0 */
      /* 如果重新计算所需分配的 size 小于 nb，表示溢出了，*/
      /* 不分配内存，否则，调用 mmap()分配所需大小的内存 */
      if ((unsigned long) (size) > (unsigned long) (nb))
        {
          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));

          /* 如果 mmap()分配内存成功，将 mmap()返回的内存指针强制转换为chunk 指针，*/
          /* 并设置该 chunk 的大小为 size，同时设置该 chunk 的 IS_MMAPPED 标志位，*/
          /* 表示本 chunk 是通过 mmap()函数直接从系统分配的 */
          if (mm != MAP_FAILED)
            {
              /*
                 The offset to the start of the mmapped region is stored
                 in the prev_size field of the chunk. This allows us to adjust
                 returned start address to meet alignment requirements here
                 and in memalign(), and still be able to compute proper
                 address argument for later munmap in free() and realloc().
               */

              if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
                {
                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
                     MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
                     aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
                  /* 检测是否对齐 */
                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
                  front_misalign = 0;
                }
              else
                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
              
              /* 如果 MALLOC_ALIGNMENT 不等于 2 * SIZE_SZ，则分配的内存 */
              /* 可能是不对齐的，按照 MALLOC_ALIGNMENT 具体的值进行对齐 */
              if (front_misalign > 0)
                {
                  correction = MALLOC_ALIGNMENT - front_misalign;
                  p = (mchunkptr) (mm + correction);
		  set_prev_size (p, correction);
                  set_head (p, (size - correction) | IS_MMAPPED);
                }
              else
                {
                  p = (mchunkptr) mm;
		  set_prev_size (p, 0);
                  set_head (p, size | IS_MMAPPED);
                }

              /* update statistics */
              /* 更新相关统计值，首先将当前进程 mmap 分配内存块的计数加一，如果使用 */
              /* mmap() 分配的内存块数量大于设置的最大值，将最大值设置为最新值，这个 */
              /* 判断不会成功，因为使用mmap分配内存的条件中包括了mp_.n_mmaps < mp_.n_mmaps_max，*/
              /* 所以++mp_.n_mmaps > mp_.max_n_mmaps 不会成立。然后更新 mmap 分配的内存 */
              /* 总量，如果该值大于设置的最大值，将当前值赋值给 mp_.max_mmapped_mem */
              int new = atomic_exchange_and_add (&mp_.n_mmaps, 1) + 1;
              atomic_max (&mp_.max_n_mmaps, new);

              unsigned long sum;
              sum = atomic_exchange_and_add (&mp_.mmapped_mem, size) + size;
              atomic_max (&mp_.max_mmapped_mem, sum);

              check_chunk (av, p);

              /* 将分配的 chunk 的指针返回，用户存放数据时就是从该指针指向的内存开始存放 */
              return chunk2mem (p);
            }
        }
    }

  /* There are no usable arenas and mmap also failed.  */
  if (av == NULL)
    return 0;

  /* Record incoming configuration of top */
  /* 保存当前 top chunk 的指针，大小和结束地址到临时变量中 */
  old_top = av->top;
  old_size = chunksize (old_top);
  old_end = (char *) (chunk_at_offset (old_top, old_size));

  brk = snd_brk = (char *) (MORECORE_FAILURE);

  /*
     If not the first time through, we require old_size to be
     at least MINSIZE and to have prev_inuse set.
   */
  /* 检查 top chunk 的合法性，如果第一次调用本函数，top chunk 可能没有初始化，*/
  /* 可能 old_size 为 0，如果 top chunk 已经初始化，则 top chunk 的大小必须 */
  /* 大于等于 MINSIZE，因为 top chunk 中包含了 fencepost，fencepost 需要 MINSIZE */
  /* 大小的内存。Top chunk 必须标识前一个 chunk 处于 inuse 状态，这是规定，并且 */
  /* top chunk 的结束地址必定是页对齐的。另外 top chunk 的除去 fencepost 的大小 */
  /* 必定小于所需 chunk 的大小，不然在_int_malloc()函数中就应该使用 top chunk 获得所需的 chunk */
  assert ((old_top == initial_top (av) && old_size == 0) ||
          ((unsigned long) (old_size) >= MINSIZE &&
           prev_inuse (old_top) &&
           ((unsigned long) old_end & (pagesize - 1)) == 0));

  /* Precondition: not enough current space to satisfy nb request */
  assert ((unsigned long) (old_size) < (unsigned long) (nb + MINSIZE));

  /* 如果当前分配区为非主分配区*/
  if (av != &main_arena)
    {
      /* 这里的 heap_info 是 _heap_info 结构体 */
      heap_info *old_heap, *heap;
      size_t old_heap_size;

      /* First try to extend the current heap. */
      old_heap = heap_for_ptr (old_top);
      old_heap_size = old_heap->size;

      /* 根据 top chunk 的指针获得当前 sub_heap 的 heap_info 实例，*/
      /* 如果 top chunk 的剩余有效空间不足以分配出所需的 chunk（前 */
      /* 面已经断言，这个肯定成立），尝试增长 sub_heap 的可读可写区 */
      /* 域大小，如果成功，修改过内存分配的统计信息，并更新新的 top chunk 的 size */
      if ((long) (MINSIZE + nb - old_size) > 0
          && grow_heap (old_heap, MINSIZE + nb - old_size) == 0)
        {
          av->system_mem += old_heap->size - old_heap_size;
          set_head (old_top, (((char *) old_heap + old_heap->size) - (char *) old_top)
                    | PREV_INUSE);
        }
      
      /* 调用 new_heap()函数创建一个新的 sub_heap，由于这个 sub_heap 中至少 */
      /* 需要容下大小为 nb 的 chunk，大小为 MINSIZE 的 fencepost 和大小为 sizeof(*heap) */
      /* 的 heap_info 实例，所以传入 new_heap()函数的分配大小为 nb + (MINSIZE + sizeof(*heap)) */
      else if ((heap = new_heap (nb + (MINSIZE + sizeof (*heap)), mp_.top_pad)))
        {
          /* 使新创建的 sub_heap 保存当前的分配区指针，将该 sub_heap 加入当前分配区的 */
          /* sub_heap 链表中，更新当前分配区内存分配统计，将新创建的 sub_heap 仅有的一 */
          /* 个空闲chunk 作为当前分配区的 top chunk，并设置 top chunk 的状态 */   
          /* Use a newly allocated heap.  */
          heap->ar_ptr = av;
          heap->prev = old_heap;
          av->system_mem += heap->size;
          /* Set up the new top.  */
          top (av) = chunk_at_offset (heap, sizeof (*heap));
          set_head (top (av), (heap->size - sizeof (*heap)) | PREV_INUSE);

          /* Setup fencepost and free the old top chunk with a multiple of
             MALLOC_ALIGNMENT in size. */
          /* The fencepost takes at least MINSIZE bytes, because it might
             become the top chunk again later.  Note that a footer is set
             up, too, although the chunk is marked in use. */
          old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
          set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);

          /* 设置原 top chunk 的 fencepost，fencepost 需要 MINSIZE 大小的内存空间，将该 old_size */
          /* 减去 MINSIZE 得到原 top chunk 的有效内存空间，首先设置 fencepost 的第二个 chunk 的 size */
          /* 为 0，并标识前一个 chunk 处于 inuse 状态。接着判断原 top chunk 的有效内存空间上是否大 */
          /* 于等于 MINSIZE，如果是，表示原 top chunk 可以分配出大于等于 MINSIZE 大小的 chunk，于 */
          /* 是将原 top chunk 切分成空闲 chunk 和 fencepost 两部分，先设置 fencepost 的第一个 chunk */
          /* 的大小为 2*SIZE_SZ，并标识前一个 chunk 处于 inuse 状态，fencepost 的第一个 chunk 还需 */
          /* 要设置 foot，表示该 chunk 处于空闲状态，而 fencepost 的第二个 chunk 却标识第一个 chunk */
          /* 处于 inuse 状态，因为不能有两个空闲 chunk 相邻，才会出现这么奇怪的 fencepost。另外其 */
          /* 实 top chunk 切分出来的 chunk 也是处于空闲状态，但 fencepost 的第一个 chunk 却标识前一 */
          /* 个 chunk 为 inuse 状态，然后强制将该处于 inuse 状态的 chunk 调用_int_free()函数释放掉 */ 
          /* 这样做完全是要遵循不能有两个空闲 chunk 相邻的约定 */
          if (old_size >= MINSIZE)
            {
              set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
              set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
              set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
              _int_free (av, old_top, 1);
            }
          /* 如果原 top chunk 中有效空间不足 MINSIZE，则将整个原 top chunk 作为 fencepost，*/
          /* 并设置 fencepost 的第一个 chunk 的相关状态 */
          else
            {
              set_head (old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
              set_foot (old_top, (old_size + 2 * SIZE_SZ));
            }
        }

      /* 如果增长 sub_heap 的可读可写区域大小和创建新 sub_heap 都失败了，*/
      /* 尝试使用 mmap() 函数直接从系统分配所需 chunk */
      else if (!tried_mmap)
        /* We can at least try to use to mmap memory. */
        goto try_mmap;
    }
  else     /* av == main_arena */
    /* 如果当前分配区为主分配区 */
    { /* Request enough space for nb + pad + overhead */
      size = nb + mp_.top_pad + MINSIZE;

      /*
         If contiguous, we can subtract out existing space that we hope to
         combine with new space. We add it back later only if
         we don't actually get contiguous space.
       */

      /* 一般情况下，主分配区使用 sbrk()从 heap 中分配内存，sbrk()返回连续的虚拟内存，*/
      /* 这里调整需要分配的 size，减掉 top chunk 中已有空闲内存大小 */
      if (contiguous (av))
        size -= old_size;

      /*
         Round to a multiple of page size.
         If MORECORE is not contiguous, this ensures that we only call it
         with whole-page arguments.  And if MORECORE is contiguous and
         this is not first time through, this preserves page-alignment of
         previous calls. Otherwise, we correct to page-align below.
       */
 
      /* 将 size 按照页对齐，sbrk()必须以页为单位分配连续虚拟内存 */
      size = ALIGN_UP (size, pagesize);

      /*
         Don't try to call MORECORE if argument is so big as to appear
         negative. Note that since mmap takes size_t arg, it may succeed
         below even if we cannot call MORECORE.
       */
      
      /* 使用 sbrk()从 heap 中分配 size 大小的虚拟内存块 */
      if (size > 0)
        {
          brk = (char *) (MORECORE (size));
          LIBC_PROBE (memory_sbrk_more, 2, brk, size);
        }

      /* 如果 sbrk()分配成功，并且 morecore 的 hook 函数存在，调用 morecore 的 hook 函数 */
      if (brk != (char *) (MORECORE_FAILURE))
        {
          /* Call the `morecore' hook if necessary.  */
          void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
          if (__builtin_expect (hook != NULL, 0))
            (*hook)();
        }
      else
        {
          /*
             If have mmap, try using it as a backup when MORECORE fails or
             cannot be used. This is worth doing on systems that have "holes" in
             address space, so sbrk cannot extend to give contiguous space, but
             space is available elsewhere.  Note that we ignore mmap max count
             and threshold limits, since the space will not be used as a
             segregated mmap region.
           */
          
          /* 如果 sbrk()返回失败，或是 sbrk()不可用，使用 mmap()代替，重新计算所需分配的内存 */
          /* 大小并按页对齐，如果重新计算的 size 小于 1M，将 size 设为 1M，也就是说使用 mmap() */
          /* 作为 morecore 函数分配的最小内存块大小为 1M */
          /* Cannot merge with old top, so add its size back in */
          if (contiguous (av))
            size = ALIGN_UP (size + old_size, pagesize);
          
          /* If we are relying on mmap as backup, then use larger units */
          if ((unsigned long) (size) < (unsigned long) (MMAP_AS_MORECORE_SIZE))
            size = MMAP_AS_MORECORE_SIZE;

          /* Don't try if size wraps around 0 */
          /* 如果所需分配的内存大小合法，使用 mmap()函数分配内存。如果分配成功，更新 brk */
          /* 和 snd_brk，并将当前分配区属性设置为可分配不连续虚拟内存块 */
          if ((unsigned long) (size) > (unsigned long) (nb))
            {
              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));

              if (mbrk != MAP_FAILED)
                {
                  /* We do not need, and cannot use, another sbrk call to find end */
                  brk = mbrk;
                  snd_brk = brk + size;

                  /*
                     Record that we no longer have a contiguous sbrk region.
                     After the first time mmap is used as backup, we do not
                     ever rely on contiguous space since this could incorrectly
                     bridge regions.
                   */
                  set_noncontiguous (av);
                }
            }
        }
     
      /* 如果brk合法，即sbrk()或mmap()分配成功，如果sbrk_base还没有初始化，*/
      /* 更新 sbrk_base 和当前分配区的内存分配总量 */
      if (brk != (char *) (MORECORE_FAILURE))
        {
          if (mp_.sbrk_base == 0)
            mp_.sbrk_base = brk;
          av->system_mem += size;

          /*
             If MORECORE extends previous space, we can likewise extend top size.
           */

          /* 如果 sbrk()分配成功，更新 top chunk 的大小，并设定 top chunk 的前一个 chunk 处于 inuse */
          /* 状态。如果当前分配区可分配连续虚拟内存，原 top chunk 的大小大于 0，但新的 brk 值小 */
          /* 于原 top chunk 的结束地址，则出错了 */
          if (brk == old_end && snd_brk == (char *) (MORECORE_FAILURE))
            set_head (old_top, (size + old_size) | PREV_INUSE);

          else if (contiguous (av) && old_size && brk < old_end)
	    /* Oops!  Someone else killed our space..  Can't touch anything.  */
	    malloc_printerr ("break adjusted to free malloc space");

          /*
             Otherwise, make adjustments:

           * If the first time through or noncontiguous, we need to call sbrk
              just to find out where the end of memory lies.

           * We need to ensure that all returned chunks from malloc will meet
              MALLOC_ALIGNMENT

           * If there was an intervening foreign sbrk, we need to adjust sbrk
              request size to account for fact that we will not be able to
              combine new space with existing space in old_top.

           * Almost all systems internally allocate whole pages at a time, in
              which case we might as well use the whole last page of request.
              So we allocate enough more memory to hit a page boundary now,
              which in turn causes future contiguous calls to page-align.
           */

          /* 执行到这个分支，意味着 sbrk()返回的 brk 值大于原 top chunk 的结束地址，*/
          /* 那么新的地址与原 top chunk 的地址不连续，可能是由于外部其它地方调用 sbrk()函数，*/
          /* 这里需要处理地址的重新对齐问题 */
          else
            {
              front_misalign = 0;
              end_misalign = 0;
              correction = 0;
              aligned_brk = brk;

              /* handle contiguous cases */
              if (contiguous (av))
                {
                  /* Count foreign sbrk as system_mem.  */
                  /* 如果本分配区可分配连续虚拟内存，并且有外部调用了 sbrk()函数，*/
                  /* 将外部调用 sbrk()分配的内存计入当前分配区所分配内存统计中 */
                  if (old_size)
                    av->system_mem += brk - old_end;
            
                  /* Guarantee alignment of first new chunk made from this space */
                  /* 计算当前的 brk 要矫正的字节数据，保证 brk 地址按 MALLOC_ALIGNMENT 对齐 */
                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
                  if (front_misalign > 0)
                    {
                      /*
                         Skip over some bytes to arrive at an aligned position.
                         We don't need to specially mark these wasted front bytes.
                         They will never be accessed anyway because
                         prev_inuse of av->top (and any chunk created from its start)
                         is always true after initialization.
                       */

                      correction = MALLOC_ALIGNMENT - front_misalign;
                      aligned_brk += correction;
                    }

                  /*
                     If this isn't adjacent to existing space, then we will not
                     be able to merge with old_top space, so must add to 2nd request.
                   */
                  
                  /* 由于原 top chunk 的地址与当前 brk 不相邻，也就不能再使用原 top chunk 的内存了，需 */
                  /* 要重新为所需 chunk 分配足够的内存，将原 top chunk 的大小加到矫正值中，从当前 brk 中 */
                  /* 分配所需 chunk，计算出未对齐的 chunk 结束地址 end_misalign，然后将 end_misalign 按照 */
                  /* 页对齐计算出需要矫正的字节数加到矫正值上。然后再调用 sbrk()分配矫正值大小的内存，*/
                  /* 如果 sbrk()分配成功，则当前的 top chunk 中可以分配出所需的连续内存的 chunk */    
                  correction += old_size;

                  /* Extend the end address to hit a page boundary */
                  end_misalign = (INTERNAL_SIZE_T) (brk + size + correction);
                  correction += (ALIGN_UP (end_misalign, pagesize)) - end_misalign;

                  assert (correction >= 0);
                  snd_brk = (char *) (MORECORE (correction));

                  /*
                     If can't allocate correction, try to at least find out current
                     brk.  It might be enough to proceed without failing.

                     Note that if second sbrk did NOT fail, we assume that space
                     is contiguous with first sbrk. This is a safe assumption unless
                     program is multithreaded but doesn't use locks and a foreign sbrk
                     occurred between our first and second calls.
                   */
                  /* 如果 sbrk()执行失败，更新当前 brk 的结束地址 */
                  if (snd_brk == (char *) (MORECORE_FAILURE))
                    {
                      correction = 0;
                      snd_brk = (char *) (MORECORE (0));
                    }
                  else
                  /* 如果 sbrk()执行成功，并且有 morecore hook 函数存在，执行该 hook 函数 */
                    {
                      /* Call the `morecore' hook if necessary.  */
                      void (*hook) (void) = atomic_forced_read (__after_morecore_hook);
                      if (__builtin_expect (hook != NULL, 0))
                        (*hook)();
                    }
                }

              /* handle non-contiguous cases */
              else
              /* 执行到这里，意味着 brk 是用 mmap()分配的 */
                {
                  if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
                    /* MORECORE/mmap must correctly align */
                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
                  else
                  /* 对齐操作 */
                    {
                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
                      if (front_misalign > 0)
                        {
                          /*
                             Skip over some bytes to arrive at an aligned position.
                             We don't need to specially mark these wasted front bytes.
                             They will never be accessed anyway because
                             prev_inuse of av->top (and any chunk created from its start)
                             is always true after initialization.
                           */

                          aligned_brk += MALLOC_ALIGNMENT - front_misalign;
                        }
                    }

                  /* Find out current end of memory */
                  /* 如果 brk 的结束地址非法，使用 morecore 获得当前 brk 的结束地址 */
                  if (snd_brk == (char *) (MORECORE_FAILURE))
                    {
                      snd_brk = (char *) (MORECORE (0));
                    }
                }

              /* Adjust top based on results of second sbrk */
              /* 如果 brk 的结束地址合法，设置当前分配区的 top chunk 为 brk，*/
              /* 设置 top chunk 的大小，并更新分配区的总分配内存量 */
              if (snd_brk != (char *) (MORECORE_FAILURE))
                {
                  av->top = (mchunkptr) aligned_brk;
                  set_head (av->top, (snd_brk - aligned_brk + correction) | PREV_INUSE);
                  av->system_mem += correction;

                  /*
                     If not the first time through, we either have a
                     gap due to foreign sbrk or a non-contiguous region.  Insert a
                     double fencepost at old_top to prevent consolidation with space
                     we don't own. These fenceposts are artificial chunks that are
                     marked as inuse and are in any case too small to use.  We need
                     two to make sizes and alignments work out.
                   */
                  
                  /* 设置原 top chunk 的 fencepost，fencepost 需要 MINSIZE 大小的内存空间，将该 old_size */
                  /* 减去 MINSIZE 得到原 top chunk 的有效内存空间，我们可以确信原 top chunk 的有效内存空间 */
                  /* 一定大于 MINSIZE，将原 top chunk 切分成空闲 chunk 和 fencepost 两部分，首先设置切分出 */
                  /* 来的 chunk 的大小为 old_size，并标识前一个 chunk 处于 inuse 状态，原 top chunk 切分出来 */
                  /* 的chunk本应处于空闲状态，但fencepost的第一个chunk却标识前一个chunk为inuse状态， */
                  /* 然后强制将该处于 inuse 状态的 chunk 调用_int_free()函数释放掉。然后设置 fencepost 的第 */
                  /* 一个 chunk 的大小为 2*SIZE_SZ，并标识前一个 chunk 处于 inuse 状态，然后设置 fencepost */
                  /* 的第二个 chunk 的 size 为 2*SIZE_SZ，并标识前一个 chunk 处于 inuse 状态。这里的主分配区 */
                  /* 的 fencepost 与非主分配区的 fencepost 不同，主分配区 fencepost 的第二个 chunk 的大小设 */
                  /* 置为 2*SIZE_SZ，而非主分配区的 fencepost 的第二个 chunk 的大小设置为 0 */
                  if (old_size != 0)
                    {
                      /*
                         Shrink old_top to insert fenceposts, keeping size a
                         multiple of MALLOC_ALIGNMENT. We know there is at least
                         enough space in old_top to do this.
                       */
                      old_size = (old_size - 4 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
                      set_head (old_top, old_size | PREV_INUSE);

                      /*
                         Note that the following assignments completely overwrite
                         old_top when old_size was previously MINSIZE.  This is
                         intentional. We need the fencepost, even if old_top otherwise gets
                         lost.
                       */
		      set_head (chunk_at_offset (old_top, old_size),
				(2 * SIZE_SZ) | PREV_INUSE);
		      set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ),
				(2 * SIZE_SZ) | PREV_INUSE);

                      /* If possible, release the rest. */
                      if (old_size >= MINSIZE)
                        {
                          _int_free (av, old_top, 1);
                        }
                    }
                }
            }
        }
    } /* if (av !=  &main_arena) */

  /* 如果当前分配区所分配的内存量大于设置的最大值，更新当前分配区最大分配的内存量 */
  if ((unsigned long) av->system_mem > (unsigned long) (av->max_system_mem))
    av->max_system_mem = av->system_mem;
  check_malloc_state (av);

  /* finally, do the allocation */
  p = av->top;
  size = chunksize (p);

  /* check that one of the above allocation paths succeeded */
  /* 如果当前 top chunk 中已经有足够的内存来分配所需的 chunk，*/
  /* 从当前的 top chunk 中分配所需的 chunk 并返回 */
  if ((unsigned long) (size) >= (unsigned long) (nb + MINSIZE))
    {
      remainder_size = size - nb;
      remainder = chunk_at_offset (p, nb);
      av->top = remainder;
      set_head (p, nb | PREV_INUSE | (av != &main_arena ? NON_MAIN_ARENA : 0));
      set_head (remainder, remainder_size | PREV_INUSE);
      check_malloced_chunk (av, p, nb);
      return chunk2mem (p);
    }

  /* catch all failure paths */
  __set_errno (ENOMEM);
  return 0;
}

grow_heap (heap_info *h, long diff)

Grow_heap()函数的实现比较简单，首先将要增加的可读可写的内存大小按照页对齐，然后计算 sub_heap 总的可读可写的内存大小 new_size，判断 new_size 是否大于HEAP_MAX_SIZE，如果是，返回，否则判断 new_size 是否大于当前 sub_heap 的可读可写区域大小，如果否，调用 mprotect()设置新增的区域可读可写，并更新当前 sub_heap 的可读可写区域的大小为 new_size。最后将当前 sub_heap 的字段 size 更新为 new_size。

static int
grow_heap (heap_info *h, long diff)
{
  size_t pagesize = GLRO (dl_pagesize);
  long new_size;

  diff = ALIGN_UP (diff, pagesize);
  new_size = (long) h->size + diff;
  if ((unsigned long) new_size > (unsigned long) HEAP_MAX_SIZE)
    return -1;

  if ((unsigned long) new_size > h->mprotect_size)
    {
      if (__mprotect ((char *) h + h->mprotect_size,
                      (unsigned long) new_size - h->mprotect_size,
                      PROT_READ | PROT_WRITE) != 0)
        return -2;

      h->mprotect_size = new_size;
    }

  h->size = new_size;
  LIBC_PROBE (memory_heap_more, 2, h, h->size);
  return 0;
}

new_heap (size_t size, size_t top_pad)

New_heap() 函数负责从 mmap 区域映射一块内存来作为 sub_heap，在 32 位系统上，该函数每次映射 1M 内存，映射的内存块地址按 1M 对齐；在 64 为系统上，该函数映射 64M
内存，映射的内存块地址按 64M 对齐。New_heap() 函数只是映射一块虚拟地址空间，该空间不可读写，不会被 swap。

static heap_info *new_heap (size_t size, size_t top_pad)
{
  size_t pagesize = GLRO (dl_pagesize);
  char *p1, *p2;
  unsigned long ul;
  heap_info *h;

  /* 调整 size 的大小，size 的最小值为 32K,最大值 HEAP_MAX_SIZE 在不同 */
  /* 的系统上不同，在 32 位系统为 1M，64 位系统为 64M，将 size 的大小调 */
  /* 整到最小值与最大值之间，并以页对齐，如果 size 大于最大值，直接报错 */
  if (size + top_pad < HEAP_MIN_SIZE)
    size = HEAP_MIN_SIZE;
  else if (size + top_pad <= HEAP_MAX_SIZE)
    size += top_pad;
  else if (size > HEAP_MAX_SIZE)
    return 0;
  else
    size = HEAP_MAX_SIZE;
  size = ALIGN_UP (size, pagesize);

  /* A memory region aligned to a multiple of HEAP_MAX_SIZE is needed.
     No swap space needs to be reserved for the following large
     mapping (on Linux, this is the case for all non-writable mappings
     anyway). */
  /* 全局变量 aligned_heap_area 是上一次调用 mmap 分配内存的结束虚拟地址，并已经按 */
  /* 照 HEAP_MAX_SIZE 大小对齐。如果 aligned_heap_area 不为空，尝试从上次映射结束 */
  /* 地址开始映射大小为 HEAP_MAX_SIZE 的内存块，由于全局变量 aligned_heap_area 没 */
  /* 有锁保护，可能存在多个线程同时 mmap()函数从 aligned_heap_area 开始映射新的虚拟 */
  /* 内存块，操作系统会保证只会有一个线程会成功，其它在同一地址映射新虚拟内存块都会失败。*/
  /* 无论映射是否成功，都将全局变量 aligned_heap_area 设置为 NULL。如果映射成功，但 */
  /* 返回的虚拟地址不是按 HEAP_MAX_SIZE 大小对齐的，取消该区域的映射，映射失败 */
  p2 = MAP_FAILED;
  if (aligned_heap_area)
    {
      p2 = (char *) MMAP (aligned_heap_area, HEAP_MAX_SIZE, PROT_NONE,
                          MAP_NORESERVE);
      aligned_heap_area = NULL;
      if (p2 != MAP_FAILED && ((unsigned long) p2 & (HEAP_MAX_SIZE - 1)))
        {
          __munmap (p2, HEAP_MAX_SIZE);
          p2 = MAP_FAILED;
        }
    }

  /* 全局变量 aligned_heap_area 是上一次调用 mmap 分配内存的结束虚拟地址，并已经按 */
  /* 照 HEAP_MAX_SIZE 大小对齐。如果 aligned_heap_area 不为空，尝试从上次映射结束 */  
  /* 地址开始映射大小为 HEAP_MAX_SIZE 的内存块，由于全局变量 aligned_heap_area 没有锁保护，可
能存在多个线程同时 mmap()函数从 aligned_heap_area 开始映射新的虚拟内存块，操作系统
会保证只会有一个线程会成功，其它在同一地址映射新虚拟内存块都会失败。无论映射是否
成功，都将全局变量 aligned_heap_area 设置为 NULL。如果映射成功，但返回的虚拟地址不
是按 HEAP_MAX_SIZE 大小对齐的，取消该区域的映射，映射失败。*/
  if (p2 == MAP_FAILED)
    {
      p1 = (char *) MMAP (0, HEAP_MAX_SIZE << 1, PROT_NONE, MAP_NORESERVE);

      /* 映射 2 倍 HEAP_MAX_SIZE 大小的虚拟内存成功，将大于等于 p1 并按 HEAP_MAX_SIZE */
      /* 大小对齐的第一个虚拟地址赋值给 p2，p2 作为 sub_heap 的起始虚拟地址，p2+HEAP_MAX_SIZE */
      /* 作为 sub_heap 的结束地址，并将 sub_heap 的结束地址赋值给全局变量aligned_heap_area，*/
      /* 最后还需要将多余的虚拟内存还回给操作系统 */
      if (p1 != MAP_FAILED)
        {
          p2 = (char *) (((unsigned long) p1 + (HEAP_MAX_SIZE - 1))
                         & ~(HEAP_MAX_SIZE - 1));
          ul = p2 - p1;
          if (ul)
            __munmap (p1, ul);
          else
            aligned_heap_area = p2 + HEAP_MAX_SIZE;
          __munmap (p2 + HEAP_MAX_SIZE, HEAP_MAX_SIZE - ul);
        }
      else
        {
          /* Try to take the chance that an allocation of only HEAP_MAX_SIZE
             is already aligned. */
          /* 映射 2 倍 HEAP_MAX_SIZE 大小的虚拟内存失败了，再尝试映射 HEAP_MAX_SIZE */
          /* 大小的虚拟内存，如果失败，返回；如果成功，但该虚拟地址不是按照 HEAP_MAX_SIZE */
          /* 大小对齐的，返回 */
          p2 = (char *) MMAP (0, HEAP_MAX_SIZE, PROT_NONE, MAP_NORESERVE);
          if (p2 == MAP_FAILED)
            return 0;

          if ((unsigned long) p2 & (HEAP_MAX_SIZE - 1))
            {
              __munmap (p2, HEAP_MAX_SIZE);
              return 0;
            }
        }
    }

  /* 调用 mprotect()函数将 size 大小的内存设置为可读可写，如果失败，*/
  /* 解除整个 sub_heap 的映射。然后更新 heap_info 实例中的相关字段 */
  if (__mprotect (p2, size, PROT_READ | PROT_WRITE) != 0)
    {
      __munmap (p2, HEAP_MAX_SIZE);
      return 0;
    }
  h = (heap_info *) p2;
  h->size = size;
  h->mprotect_size = size;
  LIBC_PROBE (memory_heap_new, 2, h, h->size);
  return h;
}

tcache_put (mchunkptr chunk, size_t tc_idx)

static __always_inline void tcache_put (mchunkptr chunk, size_t tc_idx)
{
  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);

  /* Mark this chunk as "in the tcache" so the test in _int_free will
     detect a double free.  */
  e->key = tcache;  /* 设置 key 的值为 tcache */

  /* 将 chunk 插入 tcache bin 中 */
  e->next = tcache->entries[tc_idx];  
  tcache->entries[tc_idx] = e;
  ++(tcache->counts[tc_idx]);
}

malloc_consolidate(mstate av)

static void malloc_consolidate(mstate av)
{
  mfastbinptr*    fb;                 /* current fastbin being consolidated */
  mfastbinptr*    maxfb;              /* last fastbin (for loop control) */
  mchunkptr       p;                  /* current chunk being consolidated */
  mchunkptr       nextp;              /* next chunk to consolidate */
  mchunkptr       unsorted_bin;       /* bin header */
  mchunkptr       first_unsorted;     /* chunk to link to */

  /* These have same use as in free() */
  mchunkptr       nextchunk;
  INTERNAL_SIZE_T size;
  INTERNAL_SIZE_T nextsize;
  INTERNAL_SIZE_T prevsize;
  int             nextinuse;

  atomic_store_relaxed (&av->have_fastchunks, false);

  unsorted_bin = unsorted_chunks(av);

  /*
    Remove each chunk from fast bin and consolidate it, placing it
    then in unsorted bin. Among other reasons for doing this,
    placing in unsorted bin avoids needing to calculate actual bins
    until malloc is sure that chunks aren't immediately going to be
    reused anyway.
  */

  maxfb = &fastbin (av, NFASTBINS - 1);
  fb = &fastbin (av, 0);
  do {
    p = atomic_exchange_acq (fb, NULL);
    if (p != 0) {
      do {
	{
	  unsigned int idx = fastbin_index (chunksize (p));
	  if ((&fastbin (av, idx)) != fb)
	    malloc_printerr ("malloc_consolidate(): invalid chunk size");
	}

	check_inuse_chunk(av, p);
	nextp = p->fd;

	/* Slightly streamlined version of consolidation code in free() */
	size = chunksize (p);
	nextchunk = chunk_at_offset(p, size);
	nextsize = chunksize(nextchunk);

	if (!prev_inuse(p)) {
	  prevsize = prev_size (p);
	  size += prevsize;
	  p = chunk_at_offset(p, -((long) prevsize));
	  if (__glibc_unlikely (chunksize(p) != prevsize))
	    malloc_printerr ("corrupted size vs. prev_size in fastbins");
	  unlink_chunk (av, p);
	}

	if (nextchunk != av->top) {
	  nextinuse = inuse_bit_at_offset(nextchunk, nextsize);

	  if (!nextinuse) {
	    size += nextsize;
	    unlink_chunk (av, nextchunk);
	  } else
	    clear_inuse_bit_at_offset(nextchunk, 0);

	  first_unsorted = unsorted_bin->fd;
	  unsorted_bin->fd = p;
	  first_unsorted->bk = p;

	  if (!in_smallbin_range (size)) {
	    p->fd_nextsize = NULL;
	    p->bk_nextsize = NULL;
	  }

	  set_head(p, size | PREV_INUSE);
	  p->bk = unsorted_bin;
	  p->fd = first_unsorted;
	  set_foot(p, size);
	}

	else {
	  size += nextsize;
	  set_head(p, size | PREV_INUSE);
	  av->top = p;
	}

      } while ( (p = nextp) != 0);

    }
  } while (fb++ != maxfb);
}

结构体

tcache_perthread_struct & tcache_entry

/* 管理 tcache 的结构 */
typedef struct tcache_perthread_struct
{
  uint16_t counts[TCACHE_MAX_BINS];        /* 统计数组中每个下标有多少对应的 chunk ，TCHACHE_MAX_BINS 的值一般是 64 */
  tcache_entry *entries[TCACHE_MAX_BINS];  /* 指向不同 tcache 的指针数组*/
} tcache_perthread_struct;

/* tcache 的基本结构，通过单项链表连接 */
typedef struct tcache_entry
{
  struct tcache_entry *next;              /* 指向下一个 tcache 的指针 */
  /* This field exists to detect double frees.  */
  struct tcache_perthread_struct *key;    /* 新增的防止 tcache double free 的机制 */
} tcache_entry;

malloc_state

glibc 使用 malloc_state 来管理内存分配区，每个分配区是 struct malloc_state 的一个实例。

struct malloc_state
{
  /* Serialize access.  */
  /* Mutex 用于串行化访问分配区，当有多个线程访问同一个分配区时， */
  /* 第一个获得这个 mutex 的线程将使用该分配区分配内存，分配完成后, */
  /* 释放该分配区的 mutex，以便其它线程使用该分配区 */
  __libc_lock_define (, mutex);

  /* Flags (formerly in max_fast).  */
  /* Flags 记录了分配区的一些标志 */
  int flags;

  /* Set if the fastbin chunks contain recently inserted free blocks.  */
  /* Note this is a bool but not all targets support atomics on booleans.  */
  /* 标志位，判断 fastbin 最近是否有插入块 */
  int have_fastchunks;

  /* Fastbins */
  /* mchunkptr 与 mfastbinptr 类型其实都是 malloc_chunk 结构体*/
  /* 用来记录和管理 fastbin chunk */
  mfastbinptr fastbinsY[NFASTBINS];

  /* Base of the topmost chunk -- not otherwise kept in a bin */
  /* top chunk */
  mchunkptr top;

  /* The remainder from the most recent split of a small request */
  /* Last remainder chunk */
  mchunkptr last_remainder;

  /* Normal bins packed as described above */
  /* bins 包括 unsorted chunk、small chunk 和 large chunk */
  mchunkptr bins[NBINS * 2 - 2];

  /* Bitmap of bins */
  unsigned int binmap[BINMAPSIZE];

  /* Linked list */ 
  /* 指向下一个分配区的指针 */
  struct malloc_state *next;

  /* Linked list for free arenas.  Access to this field is serialized
     by free_list_lock in arena.c.  */
  /* 指向下一个空闲的分配区 */
  struct malloc_state *next_free;

  /* Number of threads attached to this arena.  0 if the arena is on
     the free list.  Access to this field is serialized by
     free_list_lock in arena.c.  */
  /* 附加到当前分配区的进程数 */
  INTERNAL_SIZE_T attached_threads;

  /* Memory allocated from the system in this arena.  */
  INTERNAL_SIZE_T system_mem;
  INTERNAL_SIZE_T max_system_mem;
};

malloc_chunk

glibc 将内存划分成很多大小不同的块，从而对内存的分配与回收进行管理。在 glibc 的实现源码中定义结构体 malloc_chunk 来描述这些块。

struct malloc_chunk {
  
  /* 如果前一个 chunk 是空闲的，该域表示前一个 chunk 的大小 */
  INTERNAL_SIZE_T      mchunk_prev_size;  /* Size of previous chunk (if free).  */
  /* 当前 chunk 的大小，并且记录了当前 chunk 和前一个 chunk 的一些属性 */
  INTERNAL_SIZE_T      mchunk_size;       /* Size in bytes, including overhead. */

  /* 指针 fd 和 bk 只有当该 chunk 块空闲时才存在，其作用是用 */
  /* 于将对应的空闲 chunk 块加入到空闲 chunk 块链表中统一管理*/
  struct malloc_chunk* fd;         /* double links -- used only if free. */
  struct malloc_chunk* bk;

  /* Only used for large blocks: pointer to next larger size.  */
  /* 当当前的 chunk 存在于 large bins 中时，large bins 中的空闲 chunk */
  /* 是按照大小排序的，但同一个大小的 chunk 可能有多个，增加了这两个字段 */
  /* 可以加快遍历空闲 chunk，并查找满足需要的空闲 chunk，fd_nextsize 指 */
  /* 向下一个比当前 chunk 大小大的第一个空闲 chunk，bk_nextszie 指向前 */
  /* 一个比当前 chunk 大小小的第一个空闲 chunk */
  struct malloc_chunk* fd_nextsize; /* double links -- used only if free. */
  struct malloc_chunk* bk_nextsize;
};

malloc_par

glibc 的参数管理使用 struct malloc_par，全局拥有一个唯一的 malloc_par 实例。

struct malloc_par
{
  /* Tunable parameters */
  unsigned long trim_threshold;    /* top chunk 的收缩阈值 */
  INTERNAL_SIZE_T top_pad;         /* 在分配内存时是否添加额外的 pad，默认该字段为 0 */
  INTERNAL_SIZE_T mmap_threshold;  /*  mmap 分配阈值 */
  INTERNAL_SIZE_T arena_test;      /* 当每个进程的分配区数量小于等于 arena_test 时，不会重用已有的分配区 */
  INTERNAL_SIZE_T arena_max;       /* 当系统中的分配区数量达到 arena_max，就不会再创建新的分配区，只会重用已有的分配区 */

  /* Memory map support */
  int n_mmaps;                     /* 当前进程使用 mmap()函数分配的内存块的个数 */
  int n_mmaps_max;                 /* mmap()函数分配的内存块的最大数量 */
  int max_n_mmaps;                 /* mmap()函数分配的内存块的数量的最大值 */
  /* the mmap_threshold is dynamic, until the user sets
     it manually, at which point we need to disable any
     dynamic behavior. */
  int no_dyn_threshold;            /* 否开启 mmap 分配阈值动态调整机制，默认值为 0，即开启 */

  /* Statistics */
  /* mmapped_mem 和 max_mmapped_mem 都用于统计 mmap 分配的内存大小，一般情况下两个字段的值相等 */
  INTERNAL_SIZE_T mmapped_mem;    
  INTERNAL_SIZE_T max_mmapped_mem;

  /* First address handed out by MORECORE/sbrk.  */
  char *sbrk_base;                  /* 堆的起始地址 */

#if USE_TCACHE
  /* Maximum number of buckets to use.  */
  size_t tcache_bins;              /* tcache bins 的数量 */
  size_t tcache_max_bytes;         /* 最大 tache 的大小 */
  /* Maximum number of chunks in each bucket.  */
  size_t tcache_count;             /* 每个 tcache bins 中tcaches 的最大数量 */
  /* Maximum number of chunks to remove from the unsorted list, which
     aren't used to prefill the cache.  */
  size_t tcache_unsorted_limit;
#endif
};

_heap_info

typedef struct _heap_info
{
  /* 指向所属分配区的指针 */
  mstate ar_ptr; /* Arena for this heap. */
  
  /* 用于将同一个分配区中的 sub_heap 用单向链表链接起来 */
  /* prev 指向链表中的前一个 sub_heap */
  struct _heap_info *prev; /* Previous heap. */
  
  /* 表示当前 sub_heap 中的内存大小，以 page 对齐 */
  size_t size;   /* Current size in bytes. */

  /* 当前 sub_heap 中被读写保护的内存大小，*/
  /* 也就是说还没有被分配的内存大小*/
  size_t mprotect_size; /* Size in bytes that has been mprotected
                           PROT_READ|PROT_WRITE.  */
  
  /* Make sure the following data is properly aligned, particularly
     that sizeof (heap_info) + 2 * SIZE_SZ is a multiple of
     MALLOC_ALIGNMENT. */
  /* 段用于保证 sizeof (heap_info) + 2 * SIZE_SZ 是按 MALLOC_ALIGNMENT 对齐的 */
  char pad[-6 * SIZE_SZ & MALLOC_ALIGN_MASK];
} heap_info;

内容来源

《glibc内存管理ptmallioc源代码分析》
Glibc 官网 glibc2.31 源码

posted @ 2021-11-07 21:52 不想取名字a 阅读(3865) 评论(2) 收藏举报

刷新页面返回顶部

PwnKi