【DAOS】CaRT初始化过程和数据发送代码

目录

初始化

数据发送

附录

附录1 msg_send_unexpected的定义和赋值

附录2 na_ofi_msg_send_unexpected

通信两端上下文的创建


初始化

engine进程中的初始化:

daos\src\engine\init.c

main(int argc, char **argv)

--server_init(argc, argv)

......

/* initialize the network layer  初始化网络层*/

----crt_init_opt(daos_sysname,  CRT_FLAG_BIT_SERVER,  daos_crt_init_opt_get(true, ctx_nr));//检查配置的provider在crt_na_dict表中否存在

------d_log_init(); //初始化CART日志

------data_init(server, opt) #初始化一些默认值,opt设置的值会替换从环境变量读到的;添加计数器

------crt_hg_init()  #初始化HG日志系统,HG 日志等级

------crt_grp_init(grpid) #初始化grp的lookup cache;swim等

------prov_data_init(&crt_gdata.cg_prov_gdata[prov],prov, set_sep, max_num_ctx,max_expect_size, max_unexpect_size); //provider 选择

......

------crt_internal_rpc_register(server) #注册RPC函数

 

启动协程loop的过程

 

daos\src\engine\init.c

main(int argc, char **argv)

--server_init(argc, argv)

......

/* initialize the network layer */

----ctx_nr = dss_ctx_nr_get();

----crt_init_opt(daos_sysname,  CRT_FLAG_BIT_SERVER,  daos_crt_init_opt_get(true, ctx_nr));//检查配置的providercrt_na_dict表中否存在

------data_init(server, opt) #初始化一些默认值,opt设置的值会替换从环境变量读到的;添加计数器

------crt_hg_init()  #初始化HG日志系统,HG 日志等级

------crt_grp_init(grpid) #初始化grplookup cache;swim

......

------crt_internal_rpc_register(server) #注册RPC函数

/* initialize service */

----dss_srv_init(); #初始化argobot 等

......

for (i = 0; i < dss_sys_xs_nr; i++){

------dss_start_xs_id(xs_id, false, DSS_SYS_ROLE)

--------dss_xstreams_init() /* 读取环境变量,启动X stream 见详情1*/

----------dss_start_one_xstream(obj->cpuset, xs_id); //分配cpu核,分配名字:daos_sys_$num\daos_io_$tgtid\daos_off_$num

------------dss_sched_init(dx); //

  /** start progress ULT */

------------daos_abt_thread_create(dx->dx_sp, dss_free_stack_cb, dx->dx_pools[DSS_POOL_NET_POLL],

    dss_srv_handler, dx, attr,

    &dx->dx_progress);  //启动了abt线程执行dss_srv_handler

}

其中的协程处理函数:

dss_srv_handler

--if (dx->dx_main_xs) {

   daos_abt_thread_create(dx->dx_sp, dss_free_stack_cb, dx->dx_pools[DSS_POOL_NVME_POLL],

    dss_nvme_poll_ult, NULL, attr, NULL);}   //nvme

/* main service progress loop */

--for (;;) {

if (dx->dx_comm) {

rc = crt_progress(dmi->dmi_ctx, dx->dx_timeout);

if (rc != 0 && rc != -DER_TIMEDOUT) {

D_ERROR("failed to progress CART context: %d\n",

rc);

/* XXX Sometimes the failure might be just

 * temporary, Let's keep progressing for now.

 */

}

}

if (dss_xstream_exiting(dx))

break;

ABT_thread_yield();

}

其中的crt_progress:

crt_progress

--crt_hg_progress(&ctx->cc_hg_ctx, 0)

----hg_ret = HG_Progress(hg_context, hg_timeout);       /** progress RPC execution */

----hg_ret = HG_Trigger(hg_context, 0, total, &count);  /** some RPCs have progressed, call Trigger */

--timeout = crt_exec_progress_cb(ctx, timeout);

--if (timeout != 0 && (rc == 0 || rc == -DER_TIMEDOUT)) {

      rc = crt_hg_progress(&ctx->cc_hg_ctx, timeout);

}

其中的HG_Progress

HG_Progress(hg_context, hg_timeout);       /** progress RPC execution */

--HG_Core_progress(private_context, timeout);  /* Make progress on the HG layer */

----hg_core_progress(private_context, timeout);

------hg_core_poll_try_wait(context))/hg_core_poll_wait(context, poll_timeout, &progressed)/hg_core_poll(context, poll_timeout, &progressed)

--------hg_core_progress_na(HG_CORE_CONTEXT_CLASS(context)->core_class.na_class, context->core_context.na_context, progress_timeout, &progressed_na)

for (;;) {

----------NA_Trigger(na_context, 0, HG_CORE_MAX_TRIGGER_COUNT,cb_ret, &actual_count) //处理progress出来的事件

------------completion_data_ptr =  hg_atomic_queue_pop_mc(na_private_context->completion_queue)//从cq中取出完成的e

------------completion_data.callback(&completion_data.callback_info) //执行回调

----------NA_Progress(na_class, na_context, hg_time_to_ms(hg_time_subtract(deadline, now))) //progress出完成事件

------------na_class->ops->progress(na_class, context, (unsigned int) (remaining * 1000.0))

}

数据发送

以上完成监听和等待消息,下面是数据发送。当命令行输入pool query查询命令:

#查询每个pool上tgt的数据/元数据容量使用是否符合预期

storage dmg pool query  ${poolname}

底层调用dc_pool_query接口函数发送指令:

应用层

------------------------------------------------------------->

Int dc_pool_query(tse_task_t *task) #查询池消息中调用daos_rpc_send

--daos_rpc_send(crt_rpc_t *rpc, tse_task_t *task)

----crt_req_send(rpc, daos_rpc_cb, task);

CaRT (集群网络层)

------------------------------------------------------------->

crt_req_send(crt_rpc_t *req, crt_cb_t complete_cb, void *arg)             #\ceastor\src\cart\crt_rpc.c

--crt_req_send_internal(struct crt_rpc_priv *rpc_priv)   #会在这里找链接

----crt_req_send_immediately

------crt_hg_req_send

--------HG_Forward

mercury  (RPC传输层)

------------------------------------------------------------->

HG_Forward              #\mercury\src\mercury.c

--HG_Core_forward

----hg_core_forward

------hg_core_handle->forward #hg_core_handle->forward = hg_core_handle->is_self ? hg_core_forward_self : hg_core_forward_na;

--------hg_core_forward_na(struct hg_core_private_handle *hg_core_handle)

----------NA_Msg_send_unexpected

-------------na_class->ops->msg_send_unexpected 

--------------na_ofi_msg_send_unexpected  #转到这里的过程,后面附录1的解释

----------------na_ofi_msg_send

-------------------fi_tsend

ofi (网络传输层)

------------------------------------------------------------->

----fi_tsend      #ofi\include\rdma\fi_tagged.h

------ep->tagged->send  

-------rxm_ep_tsend   (转到这里的过程,见后面附录2解释)

rxm_ep_tsend

--ret = rxm_get_conn(rxm_ep, dest_addr, &rxm_conn);

--ret = rxm_send_common(rxm_ep, rxm_conn, &iov, &desc, 1, context, 0,rxm_ep->util_ep.tx_op_flags, tag, ofi_op_tagged);

----ret = rxm_send_eager #/rxm_send_sar/rxm_ep_rndv_tx_send

------ret = rxm_msg_tsend #/rxm_direct_send/rxm_ep_msg_normal_send

--------fi_tsend/fi_tsenddata  #count == 0 or count == 1

--------fi_tsendmsg

----------ep->tagged->sendmsg(ep, msg, flags);#--->根据上面的fi_ops_tagged rxm_ops_tagged,ep->tagged->sendmsg-->rxm_ep_tsendmsg

------------rxm_ep_tsendmsg

--------------rxm_send_common

----------------ret = rxm_send_eager #data_len <= rxm_ep->eager_limit

----------------ret = rxm_send_sar #data_len <= rxm_ep->sar_limit

----------------ret = rxm_ep_rndv_tx_send #


附录

附录1 msg_send_unexpected的定义和赋值

msg_send_unexpected的定义和赋值

msg_send_unexpected的定义 mercury-master-commiut\src\na\na.h

/* NA plugin callbacks */

struct na_class_ops {

  const char *class_name;

  bool (*check_protocol)(const char *protocol_name);

  na_return_t (*initialize)(

    na_class_t *na_class, const struct na_info *na_info, bool listen);

  na_return_t (*finalize)(na_class_t *na_class);

  void (*cleanup)(void);

  na_return_t (*context_create)(

    na_class_t *na_class, void **plugin_context, uint8_t id);

  na_return_t (*context_destroy)(na_class_t *na_class, void *plugin_context);

  na_op_id_t *(*op_create)(na_class_t *na_class);

  na_return_t (*op_destroy)(na_class_t *na_class, na_op_id_t *op_id);

  na_return_t (*addr_lookup)(

    na_class_t *na_class, const char *name, na_addr_t *addr);

  na_return_t (*addr_free)(na_class_t *na_class, na_addr_t addr);

  na_return_t (*addr_set_remove)(na_class_t *na_class, na_addr_t addr);

  na_return_t (*addr_self)(na_class_t *na_class, na_addr_t *addr);

  na_return_t (*addr_dup)(

    na_class_t *na_class, na_addr_t addr, na_addr_t *new_addr);

  bool (*addr_cmp)(na_class_t *na_class, na_addr_t addr1, na_addr_t addr2);

  bool (*addr_is_self)(na_class_t *na_class, na_addr_t addr);

  na_return_t (*addr_to_string)(

    na_class_t *na_class, char *buf, size_t *buf_size, na_addr_t addr);

  size_t (*addr_get_serialize_size)(na_class_t *na_class, na_addr_t addr);

  na_return_t (*addr_serialize)(

    na_class_t *na_class, void *buf, size_t buf_size, na_addr_t addr);

  na_return_t (*addr_deserialize)(na_class_t *na_class, na_addr_t *addr,

    const void *buf, size_t buf_size);

  size_t (*msg_get_max_unexpected_size)(const na_class_t *na_class);

  size_t (*msg_get_max_expected_size)(const na_class_t *na_class);

  size_t (*msg_get_unexpected_header_size)(const na_class_t *na_class);

  size_t (*msg_get_expected_header_size)(const na_class_t *na_class);

  na_tag_t (*msg_get_max_tag)(const na_class_t *na_class);

  void *(*msg_buf_alloc)(

    na_class_t *na_class, size_t buf_size, void **plugin_data);

  na_return_t (*msg_buf_free)(

    na_class_t *na_class, void *buf, void *plugin_data);

  na_return_t (*msg_init_unexpected)(

    na_class_t *na_class, void *buf, size_t buf_size);

  na_return_t (*msg_send_unexpected)(na_class_t *na_class,

    na_context_t *context, na_cb_t callback, void *arg, const void *buf,

    size_t buf_size, void *plugin_data, na_addr_t dest_addr,

    uint8_t dest_id, na_tag_t tag, na_op_id_t *op_id);

  na_return_t (*msg_recv_unexpected)(na_class_t *na_class,

    na_context_t *context, na_cb_t callback, void *arg, void *buf,

    size_t buf_size, void *plugin_data, na_op_id_t *op_id);

  na_return_t (*msg_init_expected)(

    na_class_t *na_class, void *buf, size_t buf_size);

  na_return_t (*msg_send_expected)(na_class_t *na_class,

    na_context_t *context, na_cb_t callback, void *arg, const void *buf,

    size_t buf_size, void *plugin_data, na_addr_t dest_addr,

    uint8_t dest_id, na_tag_t tag, na_op_id_t *op_id);

  na_return_t (*msg_recv_expected)(na_class_t *na_class,

    na_context_t *context, na_cb_t callback, void *arg, void *buf,

    size_t buf_size, void *plugin_data, na_addr_t source_addr,

    uint8_t source_id, na_tag_t tag, na_op_id_t *op_id);

  na_return_t (*mem_handle_create)(na_class_t *na_class, void *buf,

    size_t buf_size, unsigned long flags, na_mem_handle_t *mem_handle);

  na_return_t (*mem_handle_create_segments)(na_class_t *na_class,

    struct na_segment *segments, size_t segment_count, unsigned long flags,

    na_mem_handle_t *mem_handle);

  na_return_t (*mem_handle_free)(

    na_class_t *na_class, na_mem_handle_t mem_handle);

  size_t (*mem_handle_get_max_segments)(const na_class_t *na_class);

  na_return_t (*mem_register)(na_class_t *na_class,

    na_mem_handle_t mem_handle, enum na_mem_type mem_type, uint64_t device);

  na_return_t (*mem_deregister)(

    na_class_t *na_class, na_mem_handle_t mem_handle);

  size_t (*mem_handle_get_serialize_size)(

    na_class_t *na_class, na_mem_handle_t mem_handle);

  na_return_t (*mem_handle_serialize)(na_class_t *na_class, void *buf,

    size_t buf_size, na_mem_handle_t mem_handle);

  na_return_t (*mem_handle_deserialize)(na_class_t *na_class,

    na_mem_handle_t *mem_handle, const void *buf, size_t buf_size);

  na_return_t (*put)(na_class_t *na_class, na_context_t *context,

    na_cb_t callback, void *arg, na_mem_handle_t local_mem_handle,

    na_offset_t local_offset, na_mem_handle_t remote_mem_handle,

    na_offset_t remote_offset, size_t length, na_addr_t remote_addr,

    uint8_t remote_id, na_op_id_t *op_id);

  na_return_t (*get)(na_class_t *na_class, na_context_t *context,

    na_cb_t callback, void *arg, na_mem_handle_t local_mem_handle,

    na_offset_t local_offset, na_mem_handle_t remote_mem_handle,

    na_offset_t remote_offset, size_t length, na_addr_t remote_addr,

    uint8_t remote_id, na_op_id_t *op_id);

  int (*na_poll_get_fd)(na_class_t *na_class, na_context_t *context);

  bool (*na_poll_try_wait)(na_class_t *na_class, na_context_t *context);

  na_return_t (*progress)(

    na_class_t *na_class, na_context_t *context, unsigned int timeout);

  na_return_t (*cancel)(

    na_class_t *na_class, na_context_t *context, na_op_id_t *op_id);

};

msg_send_unexpected的赋值在 mercury-master-commiut\src\na\na.h

//op 数组

const struct na_class_ops NA_PLUGIN_OPS(ofi) = { 

//NA_PLUGIN_OPS是宏 = na_##plugin_name##_class_ops_g  推出 NA_PLUGIN_OPS(ofi)展开是===>na_ofi_class_ops_g

每一种provide 都用这个宏定义了自己的结构体:

/*

:

na_class_table_g in na.c (D:\04-code\mercury-master-commiut\src\na) : 

/* NA plugin class table */

static const struct na_class_ops *const na_class_table_g[] = {

#ifdef NA_HAS_SM

  &NA_PLUGIN_OPS(sm), /* Keep NA SM first for protocol selection */

#endif

#ifdef NA_HAS_OFI

  &NA_PLUGIN_OPS(ofi),

#endif

#ifdef NA_HAS_BMI

  &NA_PLUGIN_OPS(bmi),

#endif

#ifdef NA_HAS_MPI

  &NA_PLUGIN_OPS(mpi),

#endif

#ifdef NA_HAS_CCI

  &NA_PLUGIN_OPS(cci),

#endif

#ifdef NA_HAS_UCX

  &NA_PLUGIN_OPS(ucx),

#endif

#ifdef NA_HAS_PSM

  &NA_PLUGIN_OPS(psm),

#endif

#ifdef NA_HAS_PSM2

  &NA_PLUGIN_OPS(psm2),

#endif

  NULL};

 ofi 的展开定义并初始化赋值:

*/

  "ofi",                 /* name */

  na_ofi_check_protocol,         /* check_protocol */

  na_ofi_initialize,           /* initialize */

  na_ofi_finalize,            /* finalize */

  NULL,                 /* cleanup */

  na_ofi_context_create,         /* context_create */

  na_ofi_context_destroy,        /* context_destroy */

  na_ofi_op_create,           /* op_create */

  na_ofi_op_destroy,           /* op_destroy */

  na_ofi_addr_lookup,          /* addr_lookup */

  na_ofi_addr_free,           /* addr_free */

  na_ofi_addr_set_remove,        /* addr_set_remove */

  na_ofi_addr_self,           /* addr_self */

  na_ofi_addr_dup,            /* addr_dup */

  na_ofi_addr_cmp,            /* addr_cmp */

  na_ofi_addr_is_self,          /* addr_is_self */

  na_ofi_addr_to_string,         /* addr_to_string */

  na_ofi_addr_get_serialize_size,    /* addr_get_serialize_size */

  na_ofi_addr_serialize,         /* addr_serialize */

  na_ofi_addr_deserialize,        /* addr_deserialize */

  na_ofi_msg_get_max_unexpected_size,  /* msg_get_max_unexpected_size */

  na_ofi_msg_get_max_expected_size,   /* msg_get_max_expected_size */

  na_ofi_msg_get_unexpected_header_size, /* msg_get_unexpected_header_size */

  NULL,                 /* msg_get_expected_header_size */

  na_ofi_msg_get_max_tag,        /* msg_get_max_tag */

  na_ofi_msg_buf_alloc,         /* msg_buf_alloc */

  na_ofi_msg_buf_free,          /* msg_buf_free */

  na_ofi_msg_init_unexpected,      /* msg_init_unexpected */

  na_ofi_msg_send_unexpected,      /* 给成员 msg_send_unexpected赋值na_ofi_msg_send_unexpected */  

  na_ofi_msg_recv_unexpected,      /* msg_recv_unexpected */

  NULL,                 /* msg_init_expected */

  na_ofi_msg_send_expected,       /* msg_send_expected */

  na_ofi_msg_recv_expected,       /* msg_recv_expected */

  na_ofi_mem_handle_create,       /* mem_handle_create */

  na_ofi_mem_handle_create_segments,   /* mem_handle_create_segment */

  na_ofi_mem_handle_free,        /* mem_handle_free */

  na_ofi_mem_handle_get_max_segments,  /* mem_handle_get_max_segments */

  na_ofi_mem_register,          /* mem_register */

  na_ofi_mem_deregister,         /* mem_deregister */

  na_ofi_mem_handle_get_serialize_size, /* mem_handle_get_serialize_size */

  na_ofi_mem_handle_serialize,      /* mem_handle_serialize */

  na_ofi_mem_handle_deserialize,     /* mem_handle_deserialize */

  na_ofi_put,              /* put */

  na_ofi_get,              /* get */

  na_ofi_poll_get_fd,          /* poll_get_fd */

  na_ofi_poll_try_wait,         /* poll_try_wait */

  na_ofi_progress,            /* progress */

  na_ofi_cancel             /* cancel */

};

附录2 na_ofi_msg_send_unexpected

na_ofi_msg_send_unexpected  #定义在\mercury\src\na\na_ofi.c

--na_ofi_msg_send

----fi_tsend

------ep->tagged->send 是函数指针,不同的provider插件指向不同的处理函数,找对应处理函数方法如下:

(点击tagged,跳到点击定义fi_ops_tagged tagged,搜索 fi_ops_tagged)

(点击tagged,跳到定义)

struct fid_ep {

  struct fid    fid;

  struct fi_ops_ep  *ops;

  struct fi_ops_cm  *cm;

  struct fi_ops_msg  *msg;

  struct fi_ops_rma  *rma;

  struct fi_ops_tagged  *tagged;

  struct fi_ops_atomic  *atomic;

  struct fi_ops_collective *collective;

};

--->

搜索 fi_ops_tagged

---->

---- fi_ops_tagged Matches (109 in 32 files) ----

...

fid_ep in fi_endpoint.h (D:\04-code\libfabric\include\rdma) :   struct fi_ops_tagged  *tagged;

fi_tagged.h (D:\04-code\libfabric\include\rdma) line 56 : struct fi_ops_tagged {

...

sock_ep.c (D:\04-code\libfabric\prov\sockets\src) line 59 : extern struct fi_ops_tagged sock_ep_tagged;

sock_msg.c (D:\04-code\libfabric\prov\sockets\src) line 748 : struct fi_ops_tagged sock_ep_tagged = {

sock_ep_tagged in sock_msg.c (D:\04-code\libfabric\prov\sockets\src) :   .size = sizeof(struct fi_ops_tagged),

tcpx_domain.c (D:\04-code\libfabric\prov\tcp\src) line 39 : extern struct fi_ops_tagged tcpx_srx_tag_ops;

tcpx_ep.c (D:\04-code\libfabric\prov\tcp\src) line 44 : extern struct fi_ops_tagged tcpx_tagged_ops;

tcpx_msg.c (D:\04-code\libfabric\prov\tcp\src) line 598 : struct fi_ops_tagged tcpx_tagged_ops = {

tcpx_shared_ctx.c (D:\04-code\libfabric\prov\tcp\src) line 242 : struct fi_ops_tagged tcpx_srx_tag_ops = {

tcpx_srx_tag_ops in tcpx_shared_ctx.c (D:\04-code\libfabric\prov\tcp\src) :   .size = sizeof(struct fi_ops_tagged)

...

-->

点击进入我们当前使用的provider:sock的xxx_msg.c/xxx_ep.c

fabric

------------------------------------------------------------->

##sock:\libfabric\prov\sockets\src\sock_msg.c

struct fi_ops_tagged sock_ep_tagged = {

  .size = sizeof(struct fi_ops_tagged),

  .recv = sock_ep_trecv,

  .recvv = sock_ep_trecvv,

  .recvmsg = sock_ep_trecvmsg,

  .send = sock_ep_tsend,

  .sendv = sock_ep_tsendv,

  .sendmsg = sock_ep_tsendmsg,

  .inject = sock_ep_tinject,

  .senddata = sock_ep_tsenddata,

  .injectdata = sock_ep_tinjectdata,

};

-->所以ep->tagged->send 就是sock_ep_tsend

ep->tagged->send-->sock_ep_tsend    #libfabric\prov\sockets\src\sock_msg.c

sock_ep_tsend

--sock_ep_tsendmsg

----ret = sock_ep_get_conn(ep_attr, tx_ctx, msg->addr, &conn);

----if (flags & FI_TRIGGER) {

        ret = sock_queue_tmsg_op(ep, msg, flags, FI_OP_TSEND);

        if (ret != 1)

            return ret;

    }

----sock_tx_ctx_write_op_tsend

------sock_tx_ctx_write_op_tsend(*tx_ctx,*op,flags,context,dest_addr,buf,*ep_attr,*conn,tag)//用后面的参数给tx_ctx对应的成员赋值

------sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(msg->data));/sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov)); //写入环形缓冲

------sock_tx_ctx_commit(tx_ctx);

--------ofi_rbcommit(&tx_ctx->rb);//修改指示变量

----------rb->wcnt = rb->wpos;

--------sock_pe_signal(tx_ctx->domain->pe)//write socket fd 触发中断

##rxm: \libfabric\prov\rxm\src\rxm_ep.c

rxm_ep.c (D:\04-code\libfabric\prov\rxm\src) line 2228 : static struct fi_ops_tagged rxm_ops_tagged = {

rxm_ops_tagged in rxm_ep.c (D:\04-code\libfabric\prov\rxm\src) :     .size = sizeof(struct fi_ops_tagged),

static struct fi_ops_tagged rxm_ops_tagged = {

    .size = sizeof(struct fi_ops_tagged),

    .recv = rxm_ep_trecv,

    .recvv = rxm_ep_trecvv,

    .recvmsg = rxm_ep_trecvmsg,

    .send = rxm_ep_tsend,

    .sendv = rxm_ep_tsendv,

    .sendmsg = rxm_ep_tsendmsg,

    .inject = rxm_ep_tinject,

    .senddata = rxm_ep_tsenddata,

    .injectdata = rxm_ep_tinjectdata,

};

rxm_ep_tsend

--ret = rxm_get_conn(rxm_ep, dest_addr, &rxm_conn);

--ret = rxm_send_common(rxm_ep, rxm_conn, &iov, &desc, 1, context, 0,rxm_ep->util_ep.tx_op_flags, tag, ofi_op_tagged);

----ret = rxm_send_eager #/rxm_send_sar/rxm_ep_rndv_tx_send

------ret = rxm_msg_tsend #/rxm_direct_send/rxm_ep_msg_normal_send

--------fi_tsend/fi_tsenddata  #count == 0 or count == 1

--------fi_tsendmsg

----------ep->tagged->sendmsg(ep, msg, flags);--->根据上面的fi_ops_tagged rxm_ops_tagged,ep->tagged->sendmsg-->rxm_ep_tsendmsg

------------rxm_ep_tsendmsg

--------------rxm_send_common

----------------ret = rxm_send_eager #data_len <= rxm_ep->eager_limit

----------------ret = rxm_send_sar #data_len <= rxm_ep->sar_limit

----------------ret = rxm_ep_rndv_tx_send #

通信两端上下文的创建

是在协程启动的时候,协程函数dss_srv_handler内创建

--server_init(argc, argv)

......

/* initialize service */

----dss_srv_init(); #初始化argobot 等

......

for (i = 0; i < dss_sys_xs_nr; i++){

------dss_start_xs_id(xs_id, false, DSS_SYS_ROLE)

--------dss_xstreams_init() /* 读取环境变量,启动X stream 见详情1*/

----------dss_start_one_xstream(obj->cpuset, xs_id); //分配cpu核,分配名字:daos_sys_$num\daos_io_$tgtid\daos_off_$num

------------dss_sched_init(dx); //

  /** start progress ULT */

------------daos_abt_thread_create(dx->dx_sp, dss_free_stack_cb, dx->dx_pools[DSS_POOL_NET_POLL],

    dss_srv_handler, dx, attr,

    &dx->dx_progress);  //启动了abt线程执行dss_srv_handler

}

 

dss_srv_handler

--crt_context_create(crt_context_t *crt_ctx)

----crt_context_provider_create(crt_ctx, crt_gdata.cg_init_prov) //有条件

------crt_context_init(ctx) //

-------crt_swim_init(crt_gdata.cg_swim_crt_idx) //有条件.// 进程启动的第二个线程才会启动swim操作, 确保只有一个线程操作swim

-------crt_hg_ctx_init(&ctx->cc_hg_ctx, provider, cur_ctx_num)

--------crt_hg_class_init(provider, idx, &hg_class)

----------hg_class = HG_Init_opt(info_string, crt_is_service(), &init_info)

--------crt_hg_pool_init(hg_ctx)

}

----->水星层

详情1:

启动 执行stream:

  • 启动 service XS
  • 启动 IO service XS
  • 启动 offload XS

/** Number of dRPC xstreams */

#define DRPC_XS_NR        (1)

/** Number of offload XS */

unsigned int        dss_tgt_offload_xs_nr;

/** Number of target (XS set) per engine */

unsigned int        dss_tgt_nr;

/** Number of system XS */

unsigned int        dss_sys_xs_nr = DAOS_TGT0_OFFSET + DRPC_XS_NR;

/* start system service XS */

for (i = 0; i < dss_sys_xs_nr; i++) {

xs_id = i;

rc = dss_start_xs_id(xs_id);

if (rc)

D_GOTO(out, rc);

}

/* start main IO service XS */

for (i = 0; i < dss_tgt_nr; i++) {

xs_id = DSS_MAIN_XS_ID(i);

rc = dss_start_xs_id(xs_id);

if (rc)

D_GOTO(out, rc);

}

posted on 2024-12-16 03:19  bdy  阅读(5)  评论(0)    收藏  举报  来源

导航