mindie开启DeepSeek的128K

  需要修改的地方有2处:

一、启动脚本:注意主从的脚本环境变量MIES_CONTAINER_IP不一样。

source /usr/local/Ascend/ascend-toolkit/set_env.sh
source /usr/local/Ascend/nnal/atb/set_env.sh
source /usr/local/Ascend/atb-models/set_env.sh
source /usr/local/Ascend/mindie/set_env.sh
export RANK_TABLE_FILE="/app1/scripts/ranktable.json"
export MIES_CONTAINER_IP="192.168.1.234"
export MASTER_IP="192.168.1.234"
export WORLD_SIZE=16

export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True
export HCCL_OP_EXPANSION_MODE="AIV"
export NPU_MEMORY_FRACTION=0.96
export ATB_LLM_HCCL_ENABLE=1
#export INF_NAN_MODE_ENABLE=1
export OMP_NUM_THREADS=10
#export TASK_QUEUE_ENABLE=2

export MINDIE_ASYNC_SCHEDULING_ENABLE=1
export ATB_OPERATION_EXECUTE_ASYNC=1
export ATB_LLM_ENABLE_AUTO_TRANSPOSE=0
export HCCL_BUFFSIZE=64
export ATB_WORKSPACE_MEM_ALLOC_ALG_TYPE=3
export ATB_WORKSPACE_MEM_ALLOC_GLOBAL=1
export ATB_LAYER_INTERNAL_TENSOR_REUSE=1
export LD_PRELOAD="/usr/lib64/libjemalloc.so.2:$LD_PRELOAD"

export HCCL_ALGO="level0:NA;level1:pipeline"
for var in $(compgen -e | grep 'STDOUT$'); do export "$var=0"; done
for var in $(compgen -e | grep 'LOG_TO_FILE$'); do export "$var=0";done

export HCCL_CONNECT_TIMEOUT=3600
export HCCL_EXEC_TIMEOUT=0
export MINDIE_LOG_LEVEL=info
export MINDIE_LOG_TO_STDOUT=1

cd /usr/local/Ascend/mindie/latest/mindie-service/
./bin/mindieservice_daemon

 

二、配置文件(mindie的/usr/local/Ascend/mindie/latest/mindie-service/conf/config.json):

{
    "Version" : "1.0.0",

    "ServerConfig" :
    {
        "ipAddress" : "192.168.1.234",
        "managementIpAddress" : "192.168.1.234",
        "port" : 1025,
        "managementPort" : 1026,
        "metricsPort" : 1027,
        "allowAllZeroIpListening" : false,
        "maxLinkNum" : 1000,
        "httpsEnabled" : false,
        "fullTextEnabled" : false,
        "tlsCaPath" : "security/ca/",
        "tlsCaFile" : ["ca.pem"],
        "tlsCert" : "security/certs/server.pem",
        "tlsPk" : "security/keys/server.key.pem",
        "tlsPkPwd" : "security/pass/key_pwd.txt",
        "tlsCrlPath" : "security/certs/",
        "tlsCrlFiles" : ["server_crl.pem"],
        "managementTlsCaFile" : ["management_ca.pem"],
        "managementTlsCert" : "security/certs/management/server.pem",
        "managementTlsPk" : "security/keys/management/server.key.pem",
        "managementTlsPkPwd" : "security/pass/management/key_pwd.txt",
        "managementTlsCrlPath" : "security/management/certs/",
        "managementTlsCrlFiles" : ["server_crl.pem"],
        "kmcKsfMaster" : "tools/pmt/master/ksfa",
        "kmcKsfStandby" : "tools/pmt/standby/ksfb",
        "inferMode" : "standard",
        "interCommTLSEnabled" : false,
        "interCommPort" : 1121,
        "interCommTlsCaPath" : "security/grpc/ca/",
        "interCommTlsCaFiles" : ["ca.pem"],
        "interCommTlsCert" : "security/grpc/certs/server.pem",
        "interCommPk" : "security/grpc/keys/server.key.pem",
        "interCommPkPwd" : "security/grpc/pass/key_pwd.txt",
        "interCommTlsCrlPath" : "security/grpc/certs/",
        "interCommTlsCrlFiles" : ["server_crl.pem"],
        "openAiSupport" : "vllm",
        "tokenTimeout" :3600,
        "e2eTimeout" : 3600,
        "distDPServerEnabled":false
    },

    "BackendConfig" : {
        "backendName" : "mindieservice_llm_engine",
        "modelInstanceNumber" : 1,
        "npuDeviceIds" : [[0,1,2,3,4,5,6,7]],
        "tokenizerProcessNumber" : 8,
        "multiNodesInferEnabled" : true,
        "multiNodesInferPort" : 1120,
        "interNodeTLSEnabled" : false,
        "interNodeTlsCaPath" : "security/grpc/ca/",
        "interNodeTlsCaFiles" : ["ca.pem"],
        "interNodeTlsCert" : "security/grpc/certs/server.pem",
        "interNodeTlsPk" : "security/grpc/keys/server.key.pem",
        "interNodeTlsPkPwd" : "security/grpc/pass/mindie_server_key_pwd.txt",
        "interNodeTlsCrlPath" : "security/grpc/certs/",
        "interNodeTlsCrlFiles" : ["server_crl.pem"],
        "interNodeKmcKsfMaster" : "tools/pmt/master/ksfa",
        "interNodeKmcKsfStandby" : "tools/pmt/standby/ksfb",
        "ModelDeployConfig" :
        {
            "maxSeqLen" : 131072,
            "maxInputTokenLen" : 131072,
            "truncation" : false,
            "ModelConfig" : [
                {
                    "modelInstanceType" : "Standard",
                    "modelName" : "DeepSeek-R1",
                    "modelWeightPath" : "/app1/models/DeepSeek-R1-0528-w8a8",
                    "worldSize" : 8,
                    "cpuMemSize" : 5,
                    "npuMemSize" : -1,
                    "backendType" : "atb",
                    "trustRemoteCode" : false,
                    "moe_ep": 16,
                    "moe_tp": 1,
                    "sp": 8,
                    "cp": 2,
                    "tp": 8,
                    "dp": 1,
                    "ignore_eos": true,
                    "async_scheduler_wait_time": 120,
                    "kv_trans_timeout": 10,
                    "kv_link_timeout": 1080,
                    "models":{
                    "deepseekv2": {
                                  "ep_level":1,
                                  "enable_init_routing_cutoff": true,
                                  "topk_scaling_factor": 0.25
                                  }
                              }                
                }
            ]
        },

        "ScheduleConfig" :
        {
            "templateType" : "Standard",
            "templateName" : "Standard_LLM",
            "cacheBlockSize" : 128,

            "maxPrefillBatchSize" : 50,
            "maxPrefillTokens" : 131072,
            "prefillTimeMsPerReq" : 150,
            "prefillPolicyType" : 0,

            "decodeTimeMsPerReq" : 50,
            "decodePolicyType" : 0,

            "maxBatchSize" : 200,
            "maxIterTimes" : 131072,
            "maxPreemptCount" : 0,
            "supportSelectBatch" : false,
            "maxQueueDelayMicroseconds" : 5000
        }
    }
}

 

posted @ 2025-10-11 16:43  badwood  阅读(7)  评论(0)    收藏  举报
Badwood's Blog