【DAOS】CaRT初始化过程和数据发送代码
目录
附录2 na_ofi_msg_send_unexpected
初始化
engine进程中的初始化:
daos\src\engine\init.c
main(int argc, char **argv)
--server_init(argc, argv)
......
/* initialize the network layer 初始化网络层*/
----crt_init_opt(daos_sysname, CRT_FLAG_BIT_SERVER, daos_crt_init_opt_get(true, ctx_nr));//检查配置的provider在crt_na_dict表中否存在
------d_log_init(); //初始化CART日志
------data_init(server, opt) #初始化一些默认值,opt设置的值会替换从环境变量读到的;添加计数器
------crt_hg_init() #初始化HG日志系统,HG 日志等级
------crt_grp_init(grpid) #初始化grp的lookup cache;swim等
------prov_data_init(&crt_gdata.cg_prov_gdata[prov],prov, set_sep, max_num_ctx,max_expect_size, max_unexpect_size); //provider 选择
......
------crt_internal_rpc_register(server) #注册RPC函数
启动协程loop的过程
daos\src\engine\init.c
main(int argc, char **argv)
--server_init(argc, argv)
......
/* initialize the network layer */
----ctx_nr = dss_ctx_nr_get();
----crt_init_opt(daos_sysname, CRT_FLAG_BIT_SERVER, daos_crt_init_opt_get(true, ctx_nr));//检查配置的provider在crt_na_dict表中否存在
------data_init(server, opt) #初始化一些默认值,opt设置的值会替换从环境变量读到的;添加计数器
------crt_hg_init() #初始化HG日志系统,HG 日志等级
------crt_grp_init(grpid) #初始化grp的lookup cache;swim等
......
------crt_internal_rpc_register(server) #注册RPC函数
/* initialize service */
----dss_srv_init(); #初始化argobot 等
......
for (i = 0; i < dss_sys_xs_nr; i++){
------dss_start_xs_id(xs_id, false, DSS_SYS_ROLE)
--------dss_xstreams_init() /* 读取环境变量,启动X stream 见详情1*/
----------dss_start_one_xstream(obj->cpuset, xs_id); //分配cpu核,分配名字:daos_sys_$num\daos_io_$tgtid\daos_off_$num
------------dss_sched_init(dx); //
/** start progress ULT */
------------daos_abt_thread_create(dx->dx_sp, dss_free_stack_cb, dx->dx_pools[DSS_POOL_NET_POLL],
dss_srv_handler, dx, attr,
&dx->dx_progress); //启动了abt线程执行dss_srv_handler
}
其中的协程处理函数:
dss_srv_handler
--if (dx->dx_main_xs) {
daos_abt_thread_create(dx->dx_sp, dss_free_stack_cb, dx->dx_pools[DSS_POOL_NVME_POLL],
dss_nvme_poll_ult, NULL, attr, NULL);} //nvme
/* main service progress loop */
--for (;;) {
if (dx->dx_comm) {
rc = crt_progress(dmi->dmi_ctx, dx->dx_timeout);
if (rc != 0 && rc != -DER_TIMEDOUT) {
D_ERROR("failed to progress CART context: %d\n",
rc);
/* XXX Sometimes the failure might be just
* temporary, Let's keep progressing for now.
*/
}
}
if (dss_xstream_exiting(dx))
break;
ABT_thread_yield();
}
其中的crt_progress:
crt_progress
--crt_hg_progress(&ctx->cc_hg_ctx, 0)
----hg_ret = HG_Progress(hg_context, hg_timeout); /** progress RPC execution */
----hg_ret = HG_Trigger(hg_context, 0, total, &count); /** some RPCs have progressed, call Trigger */
--timeout = crt_exec_progress_cb(ctx, timeout);
--if (timeout != 0 && (rc == 0 || rc == -DER_TIMEDOUT)) {
rc = crt_hg_progress(&ctx->cc_hg_ctx, timeout);
}
其中的HG_Progress
HG_Progress(hg_context, hg_timeout); /** progress RPC execution */
--HG_Core_progress(private_context, timeout); /* Make progress on the HG layer */
----hg_core_progress(private_context, timeout);
------hg_core_poll_try_wait(context))/hg_core_poll_wait(context, poll_timeout, &progressed)/hg_core_poll(context, poll_timeout, &progressed)
--------hg_core_progress_na(HG_CORE_CONTEXT_CLASS(context)->core_class.na_class, context->core_context.na_context, progress_timeout, &progressed_na)
for (;;) {
----------NA_Trigger(na_context, 0, HG_CORE_MAX_TRIGGER_COUNT,cb_ret, &actual_count) //处理progress出来的事件
------------completion_data_ptr = hg_atomic_queue_pop_mc(na_private_context->completion_queue)//从cq中取出完成的e
------------completion_data.callback(&completion_data.callback_info) //执行回调
----------NA_Progress(na_class, na_context, hg_time_to_ms(hg_time_subtract(deadline, now))) //progress出完成事件
------------na_class->ops->progress(na_class, context, (unsigned int) (remaining * 1000.0))
}
数据发送
以上完成监听和等待消息,下面是数据发送。当命令行输入pool query查询命令:
#查询每个pool上tgt的数据/元数据容量使用是否符合预期
storage dmg pool query ${poolname}
底层调用dc_pool_query接口函数发送指令:
应用层
------------------------------------------------------------->
Int dc_pool_query(tse_task_t *task) #查询池消息中调用daos_rpc_send
--daos_rpc_send(crt_rpc_t *rpc, tse_task_t *task)
----crt_req_send(rpc, daos_rpc_cb, task);
CaRT (集群网络层)
------------------------------------------------------------->
crt_req_send(crt_rpc_t *req, crt_cb_t complete_cb, void *arg) #\ceastor\src\cart\crt_rpc.c
--crt_req_send_internal(struct crt_rpc_priv *rpc_priv) #会在这里找链接
----crt_req_send_immediately
------crt_hg_req_send
--------HG_Forward
mercury (RPC传输层)
------------------------------------------------------------->
HG_Forward #\mercury\src\mercury.c
--HG_Core_forward
----hg_core_forward
------hg_core_handle->forward #hg_core_handle->forward = hg_core_handle->is_self ? hg_core_forward_self : hg_core_forward_na;
--------hg_core_forward_na(struct hg_core_private_handle *hg_core_handle)
----------NA_Msg_send_unexpected
-------------na_class->ops->msg_send_unexpected
--------------na_ofi_msg_send_unexpected #转到这里的过程,后面附录1的解释
----------------na_ofi_msg_send
-------------------fi_tsend
ofi (网络传输层)
------------------------------------------------------------->
----fi_tsend #ofi\include\rdma\fi_tagged.h
------ep->tagged->send
-------rxm_ep_tsend (转到这里的过程,见后面附录2解释)
rxm_ep_tsend
--ret = rxm_get_conn(rxm_ep, dest_addr, &rxm_conn);
--ret = rxm_send_common(rxm_ep, rxm_conn, &iov, &desc, 1, context, 0,rxm_ep->util_ep.tx_op_flags, tag, ofi_op_tagged);
----ret = rxm_send_eager #/rxm_send_sar/rxm_ep_rndv_tx_send
------ret = rxm_msg_tsend #/rxm_direct_send/rxm_ep_msg_normal_send
--------fi_tsend/fi_tsenddata #count == 0 or count == 1
--------fi_tsendmsg
----------ep->tagged->sendmsg(ep, msg, flags);#--->根据上面的fi_ops_tagged rxm_ops_tagged,ep->tagged->sendmsg-->rxm_ep_tsendmsg
------------rxm_ep_tsendmsg
--------------rxm_send_common
----------------ret = rxm_send_eager #data_len <= rxm_ep->eager_limit
----------------ret = rxm_send_sar #data_len <= rxm_ep->sar_limit
----------------ret = rxm_ep_rndv_tx_send #
附录
附录1 msg_send_unexpected的定义和赋值
msg_send_unexpected的定义和赋值
msg_send_unexpected的定义 mercury-master-commiut\src\na\na.h
/* NA plugin callbacks */
struct na_class_ops {
const char *class_name;
bool (*check_protocol)(const char *protocol_name);
na_return_t (*initialize)(
na_class_t *na_class, const struct na_info *na_info, bool listen);
na_return_t (*finalize)(na_class_t *na_class);
void (*cleanup)(void);
na_return_t (*context_create)(
na_class_t *na_class, void **plugin_context, uint8_t id);
na_return_t (*context_destroy)(na_class_t *na_class, void *plugin_context);
na_op_id_t *(*op_create)(na_class_t *na_class);
na_return_t (*op_destroy)(na_class_t *na_class, na_op_id_t *op_id);
na_return_t (*addr_lookup)(
na_class_t *na_class, const char *name, na_addr_t *addr);
na_return_t (*addr_free)(na_class_t *na_class, na_addr_t addr);
na_return_t (*addr_set_remove)(na_class_t *na_class, na_addr_t addr);
na_return_t (*addr_self)(na_class_t *na_class, na_addr_t *addr);
na_return_t (*addr_dup)(
na_class_t *na_class, na_addr_t addr, na_addr_t *new_addr);
bool (*addr_cmp)(na_class_t *na_class, na_addr_t addr1, na_addr_t addr2);
bool (*addr_is_self)(na_class_t *na_class, na_addr_t addr);
na_return_t (*addr_to_string)(
na_class_t *na_class, char *buf, size_t *buf_size, na_addr_t addr);
size_t (*addr_get_serialize_size)(na_class_t *na_class, na_addr_t addr);
na_return_t (*addr_serialize)(
na_class_t *na_class, void *buf, size_t buf_size, na_addr_t addr);
na_return_t (*addr_deserialize)(na_class_t *na_class, na_addr_t *addr,
const void *buf, size_t buf_size);
size_t (*msg_get_max_unexpected_size)(const na_class_t *na_class);
size_t (*msg_get_max_expected_size)(const na_class_t *na_class);
size_t (*msg_get_unexpected_header_size)(const na_class_t *na_class);
size_t (*msg_get_expected_header_size)(const na_class_t *na_class);
na_tag_t (*msg_get_max_tag)(const na_class_t *na_class);
void *(*msg_buf_alloc)(
na_class_t *na_class, size_t buf_size, void **plugin_data);
na_return_t (*msg_buf_free)(
na_class_t *na_class, void *buf, void *plugin_data);
na_return_t (*msg_init_unexpected)(
na_class_t *na_class, void *buf, size_t buf_size);
na_return_t (*msg_send_unexpected)(na_class_t *na_class,
na_context_t *context, na_cb_t callback, void *arg, const void *buf,
size_t buf_size, void *plugin_data, na_addr_t dest_addr,
uint8_t dest_id, na_tag_t tag, na_op_id_t *op_id);
na_return_t (*msg_recv_unexpected)(na_class_t *na_class,
na_context_t *context, na_cb_t callback, void *arg, void *buf,
size_t buf_size, void *plugin_data, na_op_id_t *op_id);
na_return_t (*msg_init_expected)(
na_class_t *na_class, void *buf, size_t buf_size);
na_return_t (*msg_send_expected)(na_class_t *na_class,
na_context_t *context, na_cb_t callback, void *arg, const void *buf,
size_t buf_size, void *plugin_data, na_addr_t dest_addr,
uint8_t dest_id, na_tag_t tag, na_op_id_t *op_id);
na_return_t (*msg_recv_expected)(na_class_t *na_class,
na_context_t *context, na_cb_t callback, void *arg, void *buf,
size_t buf_size, void *plugin_data, na_addr_t source_addr,
uint8_t source_id, na_tag_t tag, na_op_id_t *op_id);
na_return_t (*mem_handle_create)(na_class_t *na_class, void *buf,
size_t buf_size, unsigned long flags, na_mem_handle_t *mem_handle);
na_return_t (*mem_handle_create_segments)(na_class_t *na_class,
struct na_segment *segments, size_t segment_count, unsigned long flags,
na_mem_handle_t *mem_handle);
na_return_t (*mem_handle_free)(
na_class_t *na_class, na_mem_handle_t mem_handle);
size_t (*mem_handle_get_max_segments)(const na_class_t *na_class);
na_return_t (*mem_register)(na_class_t *na_class,
na_mem_handle_t mem_handle, enum na_mem_type mem_type, uint64_t device);
na_return_t (*mem_deregister)(
na_class_t *na_class, na_mem_handle_t mem_handle);
size_t (*mem_handle_get_serialize_size)(
na_class_t *na_class, na_mem_handle_t mem_handle);
na_return_t (*mem_handle_serialize)(na_class_t *na_class, void *buf,
size_t buf_size, na_mem_handle_t mem_handle);
na_return_t (*mem_handle_deserialize)(na_class_t *na_class,
na_mem_handle_t *mem_handle, const void *buf, size_t buf_size);
na_return_t (*put)(na_class_t *na_class, na_context_t *context,
na_cb_t callback, void *arg, na_mem_handle_t local_mem_handle,
na_offset_t local_offset, na_mem_handle_t remote_mem_handle,
na_offset_t remote_offset, size_t length, na_addr_t remote_addr,
uint8_t remote_id, na_op_id_t *op_id);
na_return_t (*get)(na_class_t *na_class, na_context_t *context,
na_cb_t callback, void *arg, na_mem_handle_t local_mem_handle,
na_offset_t local_offset, na_mem_handle_t remote_mem_handle,
na_offset_t remote_offset, size_t length, na_addr_t remote_addr,
uint8_t remote_id, na_op_id_t *op_id);
int (*na_poll_get_fd)(na_class_t *na_class, na_context_t *context);
bool (*na_poll_try_wait)(na_class_t *na_class, na_context_t *context);
na_return_t (*progress)(
na_class_t *na_class, na_context_t *context, unsigned int timeout);
na_return_t (*cancel)(
na_class_t *na_class, na_context_t *context, na_op_id_t *op_id);
};
msg_send_unexpected的赋值在 mercury-master-commiut\src\na\na.h
//op 数组
const struct na_class_ops NA_PLUGIN_OPS(ofi) = {
//NA_PLUGIN_OPS是宏 = na_##plugin_name##_class_ops_g 推出 NA_PLUGIN_OPS(ofi)展开是===>na_ofi_class_ops_g
每一种provide 都用这个宏定义了自己的结构体:
/*
:
na_class_table_g in na.c (D:\04-code\mercury-master-commiut\src\na) :
/* NA plugin class table */
static const struct na_class_ops *const na_class_table_g[] = {
#ifdef NA_HAS_SM
&NA_PLUGIN_OPS(sm), /* Keep NA SM first for protocol selection */
#endif
#ifdef NA_HAS_OFI
&NA_PLUGIN_OPS(ofi),
#endif
#ifdef NA_HAS_BMI
&NA_PLUGIN_OPS(bmi),
#endif
#ifdef NA_HAS_MPI
&NA_PLUGIN_OPS(mpi),
#endif
#ifdef NA_HAS_CCI
&NA_PLUGIN_OPS(cci),
#endif
#ifdef NA_HAS_UCX
&NA_PLUGIN_OPS(ucx),
#endif
#ifdef NA_HAS_PSM
&NA_PLUGIN_OPS(psm),
#endif
#ifdef NA_HAS_PSM2
&NA_PLUGIN_OPS(psm2),
#endif
NULL};
ofi 的展开定义并初始化赋值:
*/
"ofi", /* name */
na_ofi_check_protocol, /* check_protocol */
na_ofi_initialize, /* initialize */
na_ofi_finalize, /* finalize */
NULL, /* cleanup */
na_ofi_context_create, /* context_create */
na_ofi_context_destroy, /* context_destroy */
na_ofi_op_create, /* op_create */
na_ofi_op_destroy, /* op_destroy */
na_ofi_addr_lookup, /* addr_lookup */
na_ofi_addr_free, /* addr_free */
na_ofi_addr_set_remove, /* addr_set_remove */
na_ofi_addr_self, /* addr_self */
na_ofi_addr_dup, /* addr_dup */
na_ofi_addr_cmp, /* addr_cmp */
na_ofi_addr_is_self, /* addr_is_self */
na_ofi_addr_to_string, /* addr_to_string */
na_ofi_addr_get_serialize_size, /* addr_get_serialize_size */
na_ofi_addr_serialize, /* addr_serialize */
na_ofi_addr_deserialize, /* addr_deserialize */
na_ofi_msg_get_max_unexpected_size, /* msg_get_max_unexpected_size */
na_ofi_msg_get_max_expected_size, /* msg_get_max_expected_size */
na_ofi_msg_get_unexpected_header_size, /* msg_get_unexpected_header_size */
NULL, /* msg_get_expected_header_size */
na_ofi_msg_get_max_tag, /* msg_get_max_tag */
na_ofi_msg_buf_alloc, /* msg_buf_alloc */
na_ofi_msg_buf_free, /* msg_buf_free */
na_ofi_msg_init_unexpected, /* msg_init_unexpected */
na_ofi_msg_send_unexpected, /* 给成员 msg_send_unexpected赋值na_ofi_msg_send_unexpected */
na_ofi_msg_recv_unexpected, /* msg_recv_unexpected */
NULL, /* msg_init_expected */
na_ofi_msg_send_expected, /* msg_send_expected */
na_ofi_msg_recv_expected, /* msg_recv_expected */
na_ofi_mem_handle_create, /* mem_handle_create */
na_ofi_mem_handle_create_segments, /* mem_handle_create_segment */
na_ofi_mem_handle_free, /* mem_handle_free */
na_ofi_mem_handle_get_max_segments, /* mem_handle_get_max_segments */
na_ofi_mem_register, /* mem_register */
na_ofi_mem_deregister, /* mem_deregister */
na_ofi_mem_handle_get_serialize_size, /* mem_handle_get_serialize_size */
na_ofi_mem_handle_serialize, /* mem_handle_serialize */
na_ofi_mem_handle_deserialize, /* mem_handle_deserialize */
na_ofi_put, /* put */
na_ofi_get, /* get */
na_ofi_poll_get_fd, /* poll_get_fd */
na_ofi_poll_try_wait, /* poll_try_wait */
na_ofi_progress, /* progress */
na_ofi_cancel /* cancel */
};
附录2 na_ofi_msg_send_unexpected
na_ofi_msg_send_unexpected #定义在\mercury\src\na\na_ofi.c
--na_ofi_msg_send
----fi_tsend
------ep->tagged->send 是函数指针,不同的provider插件指向不同的处理函数,找对应处理函数方法如下:
(点击tagged,跳到点击定义fi_ops_tagged tagged,搜索 fi_ops_tagged)
(点击tagged,跳到定义)
struct fid_ep {
struct fid fid;
struct fi_ops_ep *ops;
struct fi_ops_cm *cm;
struct fi_ops_msg *msg;
struct fi_ops_rma *rma;
struct fi_ops_tagged *tagged;
struct fi_ops_atomic *atomic;
struct fi_ops_collective *collective;
};
--->
搜索 fi_ops_tagged
---->
---- fi_ops_tagged Matches (109 in 32 files) ----
...
fid_ep in fi_endpoint.h (D:\04-code\libfabric\include\rdma) : struct fi_ops_tagged *tagged;
fi_tagged.h (D:\04-code\libfabric\include\rdma) line 56 : struct fi_ops_tagged {
...
sock_ep.c (D:\04-code\libfabric\prov\sockets\src) line 59 : extern struct fi_ops_tagged sock_ep_tagged;
sock_msg.c (D:\04-code\libfabric\prov\sockets\src) line 748 : struct fi_ops_tagged sock_ep_tagged = {
sock_ep_tagged in sock_msg.c (D:\04-code\libfabric\prov\sockets\src) : .size = sizeof(struct fi_ops_tagged),
tcpx_domain.c (D:\04-code\libfabric\prov\tcp\src) line 39 : extern struct fi_ops_tagged tcpx_srx_tag_ops;
tcpx_ep.c (D:\04-code\libfabric\prov\tcp\src) line 44 : extern struct fi_ops_tagged tcpx_tagged_ops;
tcpx_msg.c (D:\04-code\libfabric\prov\tcp\src) line 598 : struct fi_ops_tagged tcpx_tagged_ops = {
tcpx_shared_ctx.c (D:\04-code\libfabric\prov\tcp\src) line 242 : struct fi_ops_tagged tcpx_srx_tag_ops = {
tcpx_srx_tag_ops in tcpx_shared_ctx.c (D:\04-code\libfabric\prov\tcp\src) : .size = sizeof(struct fi_ops_tagged)
...
-->
点击进入我们当前使用的provider:sock的xxx_msg.c/xxx_ep.c
fabric
------------------------------------------------------------->
##sock:\libfabric\prov\sockets\src\sock_msg.c
struct fi_ops_tagged sock_ep_tagged = {
.size = sizeof(struct fi_ops_tagged),
.recv = sock_ep_trecv,
.recvv = sock_ep_trecvv,
.recvmsg = sock_ep_trecvmsg,
.send = sock_ep_tsend,
.sendv = sock_ep_tsendv,
.sendmsg = sock_ep_tsendmsg,
.inject = sock_ep_tinject,
.senddata = sock_ep_tsenddata,
.injectdata = sock_ep_tinjectdata,
};
-->所以ep->tagged->send 就是sock_ep_tsend
ep->tagged->send-->sock_ep_tsend #libfabric\prov\sockets\src\sock_msg.c
sock_ep_tsend
--sock_ep_tsendmsg
----ret = sock_ep_get_conn(ep_attr, tx_ctx, msg->addr, &conn);
----if (flags & FI_TRIGGER) {
ret = sock_queue_tmsg_op(ep, msg, flags, FI_OP_TSEND);
if (ret != 1)
return ret;
}
----sock_tx_ctx_write_op_tsend
------sock_tx_ctx_write_op_tsend(*tx_ctx,*op,flags,context,dest_addr,buf,*ep_attr,*conn,tag)//用后面的参数给tx_ctx对应的成员赋值
------sock_tx_ctx_write(tx_ctx, &msg->data, sizeof(msg->data));/sock_tx_ctx_write(tx_ctx, &tx_iov, sizeof(tx_iov)); //写入环形缓冲
------sock_tx_ctx_commit(tx_ctx);
--------ofi_rbcommit(&tx_ctx->rb);//修改指示变量
----------rb->wcnt = rb->wpos;
--------sock_pe_signal(tx_ctx->domain->pe)//write socket fd 触发中断
##rxm: \libfabric\prov\rxm\src\rxm_ep.c
rxm_ep.c (D:\04-code\libfabric\prov\rxm\src) line 2228 : static struct fi_ops_tagged rxm_ops_tagged = {
rxm_ops_tagged in rxm_ep.c (D:\04-code\libfabric\prov\rxm\src) : .size = sizeof(struct fi_ops_tagged),
static struct fi_ops_tagged rxm_ops_tagged = {
.size = sizeof(struct fi_ops_tagged),
.recv = rxm_ep_trecv,
.recvv = rxm_ep_trecvv,
.recvmsg = rxm_ep_trecvmsg,
.send = rxm_ep_tsend,
.sendv = rxm_ep_tsendv,
.sendmsg = rxm_ep_tsendmsg,
.inject = rxm_ep_tinject,
.senddata = rxm_ep_tsenddata,
.injectdata = rxm_ep_tinjectdata,
};
rxm_ep_tsend
--ret = rxm_get_conn(rxm_ep, dest_addr, &rxm_conn);
--ret = rxm_send_common(rxm_ep, rxm_conn, &iov, &desc, 1, context, 0,rxm_ep->util_ep.tx_op_flags, tag, ofi_op_tagged);
----ret = rxm_send_eager #/rxm_send_sar/rxm_ep_rndv_tx_send
------ret = rxm_msg_tsend #/rxm_direct_send/rxm_ep_msg_normal_send
--------fi_tsend/fi_tsenddata #count == 0 or count == 1
--------fi_tsendmsg
----------ep->tagged->sendmsg(ep, msg, flags);--->根据上面的fi_ops_tagged rxm_ops_tagged,ep->tagged->sendmsg-->rxm_ep_tsendmsg
------------rxm_ep_tsendmsg
--------------rxm_send_common
----------------ret = rxm_send_eager #data_len <= rxm_ep->eager_limit
----------------ret = rxm_send_sar #data_len <= rxm_ep->sar_limit
----------------ret = rxm_ep_rndv_tx_send #
通信两端上下文的创建
是在协程启动的时候,协程函数dss_srv_handler内创建
--server_init(argc, argv)
......
/* initialize service */
----dss_srv_init(); #初始化argobot 等
......
for (i = 0; i < dss_sys_xs_nr; i++){
------dss_start_xs_id(xs_id, false, DSS_SYS_ROLE)
--------dss_xstreams_init() /* 读取环境变量,启动X stream 见详情1*/
----------dss_start_one_xstream(obj->cpuset, xs_id); //分配cpu核,分配名字:daos_sys_$num\daos_io_$tgtid\daos_off_$num
------------dss_sched_init(dx); //
/** start progress ULT */
------------daos_abt_thread_create(dx->dx_sp, dss_free_stack_cb, dx->dx_pools[DSS_POOL_NET_POLL],
dss_srv_handler, dx, attr,
&dx->dx_progress); //启动了abt线程执行dss_srv_handler
}
dss_srv_handler
--crt_context_create(crt_context_t *crt_ctx)
----crt_context_provider_create(crt_ctx, crt_gdata.cg_init_prov) //有条件
------crt_context_init(ctx) //
-------crt_swim_init(crt_gdata.cg_swim_crt_idx) //有条件.// 进程启动的第二个线程才会启动swim操作, 确保只有一个线程操作swim
-------crt_hg_ctx_init(&ctx->cc_hg_ctx, provider, cur_ctx_num)
--------crt_hg_class_init(provider, idx, &hg_class)
----------hg_class = HG_Init_opt(info_string, crt_is_service(), &init_info)
--------crt_hg_pool_init(hg_ctx)
}
----->水星层
详情1:
启动 执行stream:
- 启动 service XS
- 启动 IO service XS
- 启动 offload XS
/** Number of dRPC xstreams */
#define DRPC_XS_NR (1)
/** Number of offload XS */
unsigned int dss_tgt_offload_xs_nr;
/** Number of target (XS set) per engine */
unsigned int dss_tgt_nr;
/** Number of system XS */
unsigned int dss_sys_xs_nr = DAOS_TGT0_OFFSET + DRPC_XS_NR;
/* start system service XS */
for (i = 0; i < dss_sys_xs_nr; i++) {
xs_id = i;
rc = dss_start_xs_id(xs_id);
if (rc)
D_GOTO(out, rc);
}
/* start main IO service XS */
for (i = 0; i < dss_tgt_nr; i++) {
xs_id = DSS_MAIN_XS_ID(i);
rc = dss_start_xs_id(xs_id);
if (rc)
D_GOTO(out, rc);
}
浙公网安备 33010602011771号