大模型部署

下载cuda

wget https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux.run

安装必要依赖

yum -y install kernel-headers-$(uname -r)

yum -y install kernel-devel-$(uname -r)

yum update -y

yum -y install pkgconfig libglvnd-devel

yum -y install gcc gcc-c++ make tar

安装cuda

chmod 755 cuda_12.4.1_550.54.15_linux.run

./cuda_12.4.1_550.54.15_linux.run

安装验证

nvidia-smi

1762918855446_5577E331-2D5F-435d-90FE-09BFF6663854

安装NVIDIA 容器工具包

curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | \sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo

启用镜像源

yum-config-manager --enable nvidia-container-toolkit-experimental

安装

yum install -y nvidia-container-toolkit

重启docker

systemctl restart docker

部署模型

拉取推理框架

docker pull registry.cn-hangzhou.aliyuncs.com/xprobe_xinference/xinference:latest

运行推理框架

docker run -d \

-v /data/xinference/data:/data \

-v /data/models:/data/models \

-e XINFERENCE_MODEL_SRC=modelscope \

-e XINFERENCE_HOME=/data \

--net=host \

--privileged \

--restart=always \

--gpus all \

--shm-size=100G \

--name xinference-latest \

registry.cn-hangzhou.aliyuncs.com/xprobe_xinference/xinference:latest \

xinference-local -H 0.0.0.0

拉取模型

git lfs install

git lfs clone https://www.modelscope.cn/Qwen/Qwen2.5-14B-Instruct.git

posted @ 2025-11-20 16:10  小蓝莓  阅读(10)  评论(0)    收藏  举报