mindie开启DeepSeek的128K
需要修改的地方有2处:
一、启动脚本:注意主从的脚本环境变量MIES_CONTAINER_IP不一样。
source /usr/local/Ascend/ascend-toolkit/set_env.sh source /usr/local/Ascend/nnal/atb/set_env.sh source /usr/local/Ascend/atb-models/set_env.sh source /usr/local/Ascend/mindie/set_env.sh export RANK_TABLE_FILE="/app1/scripts/ranktable.json" export MIES_CONTAINER_IP="192.168.1.234" export MASTER_IP="192.168.1.234" export WORLD_SIZE=16 export PYTORCH_NPU_ALLOC_CONF=expandable_segments:True export HCCL_OP_EXPANSION_MODE="AIV" export NPU_MEMORY_FRACTION=0.96 export ATB_LLM_HCCL_ENABLE=1 #export INF_NAN_MODE_ENABLE=1 export OMP_NUM_THREADS=10 #export TASK_QUEUE_ENABLE=2 export MINDIE_ASYNC_SCHEDULING_ENABLE=1 export ATB_OPERATION_EXECUTE_ASYNC=1 export ATB_LLM_ENABLE_AUTO_TRANSPOSE=0 export HCCL_BUFFSIZE=64 export ATB_WORKSPACE_MEM_ALLOC_ALG_TYPE=3 export ATB_WORKSPACE_MEM_ALLOC_GLOBAL=1 export ATB_LAYER_INTERNAL_TENSOR_REUSE=1 export LD_PRELOAD="/usr/lib64/libjemalloc.so.2:$LD_PRELOAD" export HCCL_ALGO="level0:NA;level1:pipeline" for var in $(compgen -e | grep 'STDOUT$'); do export "$var=0"; done for var in $(compgen -e | grep 'LOG_TO_FILE$'); do export "$var=0";done export HCCL_CONNECT_TIMEOUT=3600 export HCCL_EXEC_TIMEOUT=0 export MINDIE_LOG_LEVEL=info export MINDIE_LOG_TO_STDOUT=1 cd /usr/local/Ascend/mindie/latest/mindie-service/ ./bin/mindieservice_daemon
二、配置文件(mindie的/usr/local/Ascend/mindie/latest/mindie-service/conf/config.json):
{ "Version" : "1.0.0", "ServerConfig" : { "ipAddress" : "192.168.1.234", "managementIpAddress" : "192.168.1.234", "port" : 1025, "managementPort" : 1026, "metricsPort" : 1027, "allowAllZeroIpListening" : false, "maxLinkNum" : 1000, "httpsEnabled" : false, "fullTextEnabled" : false, "tlsCaPath" : "security/ca/", "tlsCaFile" : ["ca.pem"], "tlsCert" : "security/certs/server.pem", "tlsPk" : "security/keys/server.key.pem", "tlsPkPwd" : "security/pass/key_pwd.txt", "tlsCrlPath" : "security/certs/", "tlsCrlFiles" : ["server_crl.pem"], "managementTlsCaFile" : ["management_ca.pem"], "managementTlsCert" : "security/certs/management/server.pem", "managementTlsPk" : "security/keys/management/server.key.pem", "managementTlsPkPwd" : "security/pass/management/key_pwd.txt", "managementTlsCrlPath" : "security/management/certs/", "managementTlsCrlFiles" : ["server_crl.pem"], "kmcKsfMaster" : "tools/pmt/master/ksfa", "kmcKsfStandby" : "tools/pmt/standby/ksfb", "inferMode" : "standard", "interCommTLSEnabled" : false, "interCommPort" : 1121, "interCommTlsCaPath" : "security/grpc/ca/", "interCommTlsCaFiles" : ["ca.pem"], "interCommTlsCert" : "security/grpc/certs/server.pem", "interCommPk" : "security/grpc/keys/server.key.pem", "interCommPkPwd" : "security/grpc/pass/key_pwd.txt", "interCommTlsCrlPath" : "security/grpc/certs/", "interCommTlsCrlFiles" : ["server_crl.pem"], "openAiSupport" : "vllm", "tokenTimeout" :3600, "e2eTimeout" : 3600, "distDPServerEnabled":false }, "BackendConfig" : { "backendName" : "mindieservice_llm_engine", "modelInstanceNumber" : 1, "npuDeviceIds" : [[0,1,2,3,4,5,6,7]], "tokenizerProcessNumber" : 8, "multiNodesInferEnabled" : true, "multiNodesInferPort" : 1120, "interNodeTLSEnabled" : false, "interNodeTlsCaPath" : "security/grpc/ca/", "interNodeTlsCaFiles" : ["ca.pem"], "interNodeTlsCert" : "security/grpc/certs/server.pem", "interNodeTlsPk" : "security/grpc/keys/server.key.pem", "interNodeTlsPkPwd" : "security/grpc/pass/mindie_server_key_pwd.txt", "interNodeTlsCrlPath" : "security/grpc/certs/", "interNodeTlsCrlFiles" : ["server_crl.pem"], "interNodeKmcKsfMaster" : "tools/pmt/master/ksfa", "interNodeKmcKsfStandby" : "tools/pmt/standby/ksfb", "ModelDeployConfig" : { "maxSeqLen" : 131072, "maxInputTokenLen" : 131072, "truncation" : false, "ModelConfig" : [ { "modelInstanceType" : "Standard", "modelName" : "DeepSeek-R1", "modelWeightPath" : "/app1/models/DeepSeek-R1-0528-w8a8", "worldSize" : 8, "cpuMemSize" : 5, "npuMemSize" : -1, "backendType" : "atb", "trustRemoteCode" : false, "moe_ep": 16, "moe_tp": 1, "sp": 8, "cp": 2, "tp": 8, "dp": 1, "ignore_eos": true, "async_scheduler_wait_time": 120, "kv_trans_timeout": 10, "kv_link_timeout": 1080, "models":{ "deepseekv2": { "ep_level":1, "enable_init_routing_cutoff": true, "topk_scaling_factor": 0.25 } } } ] }, "ScheduleConfig" : { "templateType" : "Standard", "templateName" : "Standard_LLM", "cacheBlockSize" : 128, "maxPrefillBatchSize" : 50, "maxPrefillTokens" : 131072, "prefillTimeMsPerReq" : 150, "prefillPolicyType" : 0, "decodeTimeMsPerReq" : 50, "decodePolicyType" : 0, "maxBatchSize" : 200, "maxIterTimes" : 131072, "maxPreemptCount" : 0, "supportSelectBatch" : false, "maxQueueDelayMicroseconds" : 5000 } } }