malloc 13us耗时分析定位
【问题描述】
10个生产者malloc(50KB),1个消费者(模拟业务,1ms后free)
调用malloc接口大概13us,分析耗时
【调用栈】

【编译调试】
root@jch:~/glibc_test/glibc-2.31/build# make install -sj;gcc -g -L /root/glibc-2.31_install/64/lib -Wl,--rpath=/root/glibc-2.31_install/64/libs -Wl,-I /root/glibc-2.31_install/64/lib/ld-2.31.so ../hello.c -o hello -lpthread -ljemalloc -mcmodel=medium;./hello
【内部耗时】
1) 打点结果
[TIME=1685875827][140712515462912]user_call_malloc ,count: 18000,avg: 13(us), min:0,max:9155, total: 242970(us)
[TIME1=1685875828][140712515462912]grow_heap ,count: 17961,avg: 7(us), min:0,max:9148, total: 137963(us)
[TIME1=1685875828][140712515462912]sysmalloc_2414_2775_sethead2 ,count: 17974,avg: 4(us), min:0,max:6546, total: 85628(us)


2)grow_heap里的__m_protect耗时7us

3)sysmalloc里的set_head接口


【疑问】
第一个为什么这么耗时

第二个改内存值可能触发系统调用
【测试代码】
#include<stdio.h>
#include<unistd.h>
#include<stdlib.h>
#include<string.h>
#include "sys/time.h"
#include <pthread.h>
#define TOTAL_COUNT (1000 * 1000 * 10 )
#define NUM_Threads 10
#define COUNT (TOTAL_COUNT/NUM_Threads)
#define PRODUCE_COUNT 18000
#define AVERAGE_PRODUCE_COUNT (PRODUCE_COUNT/NUM_Threads)
#define je_malloc malloc
#define je_free free
//#include <jemalloc/jemalloc.h>
static int producer_index=0;
static int consumer_index=0;
typedef unsigned long long uint64;
#define MIN(A, B) ((B) < (A) ? (B) : (A))
#define MAX(A, B) ((B) > (A) ? (B) : (A))
uint64 us_clock_now(){
struct timeval st;
gettimeofday(&st, NULL);
return st.tv_sec * 1000 * 1000 + st.tv_usec;
}
#define TIME_BEGIN(name) uint64 __##name##begin = us_clock_now();
#define TIME_END(name, print_interval_us) \
do { \
static __thread uint64 __##name##total = 0; \
static __thread uint64 __##name##count = 0; \
static __thread uint64 __##name##min = 1000000000000; \
static __thread uint64 __##name##max = 0; \
static __thread uint64 __##name##pre_total = 0; \
static __thread uint64 __##name##pre_count = 0; \
static __thread uint64 __##name##log_time = 0; \
uint64 current = (us_clock_now() - __##name##begin);\
__##name##min = MIN(current, __##name##min); \
__##name##max = MAX(current, __##name##max); \
__##name##total += current; \
__##name##count++; \
if (us_clock_now() - __##name##log_time > print_interval_us) \
{ \
__##name##log_time = us_clock_now(); \
uint64 __##name##this_total = __##name##total - __##name##pre_total; \
uint64 __##name##this_count = __##name##count - __##name##pre_count; \
__##name##pre_total = __##name##total; \
__##name##pre_count = __##name##count; \
if (__##name##this_count > 1) { \
printf("[TIME=%llu][%ld]%-30s,count:%10llu,avg:%10llu(us), min:%llu,max:%llu, total:%10llu(us)\n", \
us_clock_now()/1000000,pthread_self(), \
#name, __##name##this_count, \
__##name##this_count == 0 ? 0 :__##name##this_total / __##name##this_count, __##name##min, __##name##max, __##name##this_total); \
} \
__##name##min=1000000000000; \
__##name##max = 0;\
} \
} while (0);
static char* threads[NUM_Threads][COUNT];
static uint64 malloc_time[NUM_Threads][COUNT];
static uint64 global_malloc_time[NUM_Threads * COUNT];
static uint64 global_producer_index = 0;
static uint64 global_consumer_index = 0;
pthread_mutex_t gMutex = PTHREAD_MUTEX_INITIALIZER;
void *PrintHello(void *arg)
{
int id = (int) arg;
static struct timeval st;
static struct timeval ed;
static long time_total=0;
long time_current;
uint64 last_produce_time = us_clock_now();
for(int i=0;i<COUNT;i++){
producer_index=i;
// gettimeofday(&st, NULL);
TIME_BEGIN(user_call_malloc);
char * s1=(char *)je_malloc(50 * 1000);
*s1 = 'a';
TIME_END(user_call_malloc, 10 * 1000 *1000);
// gettimeofday(&ed, NULL);
threads[id][i] = s1;
pthread_mutex_lock(&gMutex);
global_malloc_time[global_producer_index]= us_clock_now();
global_producer_index++;
// malloc_time[id][i]=us_clock_now();
pthread_mutex_unlock(&gMutex);
// time_current = (ed.tv_sec - st.tv_sec) * 1000000 + (ed.tv_usec - st.tv_usec) ;
// time_total += time_current;
if(i%AVERAGE_PRODUCE_COUNT==0){
while(us_clock_now() - last_produce_time < 1000*1000){
usleep(1);
}
last_produce_time = us_clock_now();
}
}
printf("[time_total]:%ld, threadId=%d\n", time_total,id);
return 0;
}
void *Consumer(void *arg)
{
int id = (int) arg;
static struct timeval st;
static struct timeval ed;
static long time_total=0;
long time_current;
for(id=0;id<NUM_Threads;id++){
for (int i=0;i<COUNT;i++){
while(global_malloc_time[global_consumer_index]==0 || us_clock_now() - global_malloc_time[global_consumer_index] <= 1000) {
usleep(1);
}
global_consumer_index++;
TIME_BEGIN(free);
je_free(threads[id][i]);
consumer_index=i;
if(global_consumer_index % 100000==0){
printf("gap=%llu,consumer_index=%llu, producer_index=%llu\n",global_producer_index-global_consumer_index, global_consumer_index,global_producer_index);
}
TIME_END(free, 10000 * 1000);
}
};
return 0;
}
int main(){
printf("AVERAGE_PRODUCE_COUNT=%d\n", AVERAGE_PRODUCE_COUNT);
memset(threads, 0, sizeof(threads));
memset(global_malloc_time, 0, sizeof(global_malloc_time));
pthread_t tids[NUM_Threads];
int i;
int ret;
for (i=0; i<NUM_Threads; i++)
{
ret = pthread_create(&tids[i], NULL, PrintHello, (void *)i);
if (ret != 0)
{
printf("pthread_create error: error_code = \n");
}
}
usleep(10 * 1000);
// sleep(2);
pthread_create(&tids[i], NULL, Consumer, NULL);
void *a1;
for (i=0; i<NUM_Threads; i++){
pthread_join(tids[i],&a1);
}
return 0;
}
`

浙公网安备 33010602011771号