pos如何从xpu中接受请求

server 将请求传给pos,pos接收的函数是pos_process

int POSWorkspace::pos_process(
    uint64_t api_id,   //客户端调用的api
    pos_client_uuid_t uuid,  //客户端的uuid
    std::vector<POSAPIParamDesp_t> param_desps,  //api参数数组
    void* ret_data, //api返回数据的存储地址
    uint64_t ret_data_len //存储长度
)

获取客户端

uuid = 0;  // 临时禁用 UUID 传递,可能是因为远程框架不支持 piggyback UUID 机制

while(client == nullptr){
    client = this->get_client_by_uuid(uuid);
}

while(client->status != kPOS_ClientStatus_Active){}

wqe 是API 调用的执行状态

wqe = new POSAPIContext_QE(
    api_id, uuid, param_desps,
    client->get_and_move_api_inst_pc(), ret_data, ret_data_len, client
);
POS_CHECK_POINTER(wqe);

wqe 被加入 从 RPC 到 parser 的 API 上下文队列

client->push_q<kPOS_QueueDirection_Rpc2Parser, kPOS_QueueType_ApiCxt_WQ>(wqe);

来到parser线程

从 Rpc2Parser 队列中拉取 API 上下文。

apicxt_wqes.clear();
        this->_client->poll_q<kPOS_QueueDirection_Rpc2Parser, kPOS_QueueType_ApiCxt_WQ>(&apicxt_wqes);

        for(i=0; i<apicxt_wqes.size(); i++){
            POS_CHECK_POINTER(apicxt_wqe = apicxt_wqes[i]);

            api_id = apicxt_wqe->api_cxt->api_id;
            api_meta = _ws->api_mgnr->api_metas[api_id];

取出api请求

for(i=0; i<apicxt_wqes.size(); i++){
    POS_CHECK_POINTER(apicxt_wqe = apicxt_wqes[i]);

    api_id = apicxt_wqe->api_cxt->api_id;
    api_meta = _ws->api_mgnr->api_metas[api_id];

解析api请求

_parser_functions映射着api_id对应的函数

apicxt_wqe->parser_s_tick = POSUtilTscTimer::get_tsc();
parser_retval = (*(this->_parser_functions[api_id]))(this->_ws, this, apicxt_wqe);
apicxt_wqe->parser_e_tick = POSUtilTscTimer::get_tsc();

拿cudamalloc举例 parser_functions中的函数是在这里被插入的

//pos/cuda_impl/parser.h

this->_parser_functions.insert({
            /* CUDA runtime functions */
            {   CUDA_MALLOC,                    ps_functions::cuda_malloc::parse                        },

在 CUDA 内存管理器中分配模拟内存

//pos/cuda_impl/src/parser/cuda_runtime.cpp

POSHandleManager_CUDA_Memory *hm_memory;

//记录相关handle
wqe->record_handle<kPOS_Edge_Direction_In>({
    /* handle */ hm_context->latest_used_handle
});

//分配显存
retval = hm_memory->allocate_mocked_resource(
    /* handle */ &memory_handle,
    /* related_handles */ std::map<uint64_t, std::vector<POSHandle*>>({{ 
        /* id */ kPOS_ResourceTypeId_CUDA_Context, 
        /* handles */ std::vector<POSHandle*>({hm_context->latest_used_handle}) 
    }}),
    /* size */ pos_api_param_value(wqe, 0, size_t),
    /* use_expected_addr */ false,
    /* expected_addr */ 0,
    /* state_size */ (uint64_t)pos_api_param_value(wqe, 0, size_t)
);

parser成功后,我们回到parser.cpp。将wqe再从parser push 给 worker

this->_client->template push_q<kPOS_QueueDirection_Parser2Worker, kPOS_QueueType_ApiCxt_WQ>(apicxt_wqe);

worker.cpp中的daemon会根据某些选项执行不同操作

void POSWorker::__daemon(){
    if(unlikely(POS_SUCCESS != this->daemon_init())){
        POS_WARN_C("failed to init daemon, worker daemon exit");
        goto exit;
    }

    #if POS_CONF_EVAL_MigrOptLevel == 0
        // case: continuous checkpoint
        #if POS_CONF_EVAL_CkptOptLevel <= 1
            this->__daemon_ckpt_sync();    
        #elif POS_CONF_EVAL_CkptOptLevel == 2
            this->__daemon_ckpt_async();
        #endif
    #else
        this->__daemon_migration_opt();
    #endif

exit:
    return;
}

```1. 1. 
posted @ 2025-04-05 23:54  拾墨、  阅读(15)  评论(0)    收藏  举报