大模型部署
下载cuda
安装必要依赖
yum -y install kernel-headers-$(uname -r)
yum -y install kernel-devel-$(uname -r)
yum update -y
yum -y install pkgconfig libglvnd-devel
yum -y install gcc gcc-c++ make tar
安装cuda
chmod 755 cuda_12.4.1_550.54.15_linux.run
./cuda_12.4.1_550.54.15_linux.run
安装验证
nvidia-smi

安装NVIDIA 容器工具包
curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
启用镜像源
yum-config-manager --enable nvidia-container-toolkit-experimental
安装
yum install -y nvidia-container-toolkit
重启docker
systemctl restart docker
部署模型
拉取推理框架
docker pull registry.cn-hangzhou.aliyuncs.com/xprobe_xinference/xinference:latest
运行推理框架
docker run -d \
-v /data/xinference/data:/data \
-v /data/models:/data/models \
-e XINFERENCE_MODEL_SRC=modelscope \
-e XINFERENCE_HOME=/data \
--net=host \
--privileged \
--restart=always \
--gpus all \
--shm-size=100G \
--name xinference-latest \
registry.cn-hangzhou.aliyuncs.com/xprobe_xinference/xinference:latest \
xinference-local -H 0.0.0.0
拉取模型
git lfs install
git lfs clone https://www.modelscope.cn/Qwen/Qwen2.5-14B-Instruct.git

浙公网安备 33010602011771号