pos如何从xpu中接受请求
server 将请求传给pos,pos接收的函数是pos_process
int POSWorkspace::pos_process(
uint64_t api_id, //客户端调用的api
pos_client_uuid_t uuid, //客户端的uuid
std::vector<POSAPIParamDesp_t> param_desps, //api参数数组
void* ret_data, //api返回数据的存储地址
uint64_t ret_data_len //存储长度
)
获取客户端
uuid = 0; // 临时禁用 UUID 传递,可能是因为远程框架不支持 piggyback UUID 机制
while(client == nullptr){
client = this->get_client_by_uuid(uuid);
}
while(client->status != kPOS_ClientStatus_Active){}
wqe 是API 调用的执行状态
wqe = new POSAPIContext_QE(
api_id, uuid, param_desps,
client->get_and_move_api_inst_pc(), ret_data, ret_data_len, client
);
POS_CHECK_POINTER(wqe);
wqe 被加入 从 RPC 到 parser 的 API 上下文队列
client->push_q<kPOS_QueueDirection_Rpc2Parser, kPOS_QueueType_ApiCxt_WQ>(wqe);
来到parser线程
从 Rpc2Parser 队列中拉取 API 上下文。
apicxt_wqes.clear();
this->_client->poll_q<kPOS_QueueDirection_Rpc2Parser, kPOS_QueueType_ApiCxt_WQ>(&apicxt_wqes);
for(i=0; i<apicxt_wqes.size(); i++){
POS_CHECK_POINTER(apicxt_wqe = apicxt_wqes[i]);
api_id = apicxt_wqe->api_cxt->api_id;
api_meta = _ws->api_mgnr->api_metas[api_id];
取出api请求
for(i=0; i<apicxt_wqes.size(); i++){
POS_CHECK_POINTER(apicxt_wqe = apicxt_wqes[i]);
api_id = apicxt_wqe->api_cxt->api_id;
api_meta = _ws->api_mgnr->api_metas[api_id];
解析api请求
_parser_functions映射着api_id对应的函数
apicxt_wqe->parser_s_tick = POSUtilTscTimer::get_tsc();
parser_retval = (*(this->_parser_functions[api_id]))(this->_ws, this, apicxt_wqe);
apicxt_wqe->parser_e_tick = POSUtilTscTimer::get_tsc();
拿cudamalloc举例 parser_functions中的函数是在这里被插入的
//pos/cuda_impl/parser.h
this->_parser_functions.insert({
/* CUDA runtime functions */
{ CUDA_MALLOC, ps_functions::cuda_malloc::parse },
在 CUDA 内存管理器中分配模拟内存
//pos/cuda_impl/src/parser/cuda_runtime.cpp
POSHandleManager_CUDA_Memory *hm_memory;
//记录相关handle
wqe->record_handle<kPOS_Edge_Direction_In>({
/* handle */ hm_context->latest_used_handle
});
//分配显存
retval = hm_memory->allocate_mocked_resource(
/* handle */ &memory_handle,
/* related_handles */ std::map<uint64_t, std::vector<POSHandle*>>({{
/* id */ kPOS_ResourceTypeId_CUDA_Context,
/* handles */ std::vector<POSHandle*>({hm_context->latest_used_handle})
}}),
/* size */ pos_api_param_value(wqe, 0, size_t),
/* use_expected_addr */ false,
/* expected_addr */ 0,
/* state_size */ (uint64_t)pos_api_param_value(wqe, 0, size_t)
);
parser成功后,我们回到parser.cpp。将wqe再从parser push 给 worker
this->_client->template push_q<kPOS_QueueDirection_Parser2Worker, kPOS_QueueType_ApiCxt_WQ>(apicxt_wqe);
worker.cpp中的daemon会根据某些选项执行不同操作
void POSWorker::__daemon(){
if(unlikely(POS_SUCCESS != this->daemon_init())){
POS_WARN_C("failed to init daemon, worker daemon exit");
goto exit;
}
#if POS_CONF_EVAL_MigrOptLevel == 0
// case: continuous checkpoint
#if POS_CONF_EVAL_CkptOptLevel <= 1
this->__daemon_ckpt_sync();
#elif POS_CONF_EVAL_CkptOptLevel == 2
this->__daemon_ckpt_async();
#endif
#else
this->__daemon_migration_opt();
#endif
exit:
return;
}
```1. 1.