xinference文档
启动服务
windows xinference
$env:XINFERENCE_MODEL_SRC = modelscope
$env:XINFERENCE_HOME = "D:\AppCache\xinference"
& xinference-local --host 0.0.0.0 --port 9998 
linux
export XINFERENCE_MODEL_SRC=modelscope
export XINFERENCE_HOME=/data1/envs/xinference_cache
export VLLM_USE_V1=0
xinference-local --host 0.0.0.0 --port 9997  --log-level DEBUG
启动模型
embedding
xinference launch --model-name bge-m3 --model-type embedding
llm
xinference launch --model-engine ${engine} --model-name qwen3 --size-in-billions 0_6 --model-format pytorch --quantization ${quantization}