《60天AI学习计划启动 | Day 12:本地模型部署 - 实现离线 AI 功能》
Day 12: 本地模型部署 - 实现离线 AI 功能
学习目标
核心学习内容
1. 本地模型部署
为什么需要本地模型?
- 数据隐私保护
- 降低成本(无 API 费用)
- 离线可用
- 无速率限制
常见方案:
- Ollama(推荐,易用)
- Llama.cpp(高性能)
- LM Studio(GUI 工具)
- Hugging Face Transformers
2. Ollama 简介
什么是 Ollama?
- 本地运行大语言模型
- 简单易用
- 支持多种模型
- 提供 API 接口
支持的模型:
- Llama 2/3
- Mistral
- CodeLlama
- Phi
- Gemma
3. 本地 vs 云端对比
对比:
本地模型:
✅ 数据隐私
✅ 无 API 费用
✅ 离线可用
❌ 需要硬件资源
❌ 性能可能较慢
云端模型:
✅ 性能强大
✅ 无需硬件
❌ API 费用
❌ 需要网络
❌ 数据隐私
实践作业
作业1:安装和配置 Ollama
安装步骤:
# macOS / Linux
curl -fsSL https://ollama.com/install.sh | sh
# Windows
# 下载安装包:https://ollama.com/download
# 验证安装
ollama --version
# 拉取模型(Llama 2 7B)
ollama pull llama2
# 测试运行
ollama run llama2
常用命令:
# 列出已安装模型
ollama list
# 运行模型
ollama run llama2
# 删除模型
ollama rm llama2
# 查看模型信息
ollama show llama2
作业2:实现 Ollama API 调用
src/services/ollama.js:
import axios from 'axios';
import { logger } from '../utils/logger.js';
/**
* Ollama 服务
*/
export class OllamaService {
constructor() {
this.baseURL = process.env.OLLAMA_URL || 'http://localhost:11434';
this.defaultModel = process.env.OLLAMA_MODEL || 'llama2';
}
/**
* 检查 Ollama 是否运行
*/
async checkHealth() {
try {
const response = await axios.get(`${this.baseURL}/api/tags`);
return response.status === 200;
} catch (error) {
logger.error('Ollama 连接失败:', error.message);
return false;
}
}
/**
* 获取可用模型列表
*/
async listModels() {
try {
const response = await axios.get(`${this.baseURL}/api/tags`);
return response.data.models || [];
} catch (error) {
logger.error('获取模型列表失败:', error);
throw error;
}
}
/**
* 聊天(非流式)
*/
async chat(messages, options = {}) {
try {
const {
model = this.defaultModel,
temperature = 0.7,
stream = false
} = options;
const response = await axios.post(
`${this.baseURL}/api/chat`,
{
model: model,
messages: messages,
stream: stream,
options: {
temperature: temperature
}
},
{
timeout: 300000 // 5分钟超时
}
);
return response.data;
} catch (error) {
logger.error('Ollama 聊天失败:', error);
throw error;
}
}
/**
* 流式聊天
*/
async streamChat(messages, callbacks = {}, options = {}) {
const { onChunk, onComplete, onError } = callbacks;
const {
model = this.defaultModel,
temperature = 0.7
} = options;
try {
const response = await axios.post(
`${this.baseURL}/api/chat`,
{
model: model,
messages: messages,
stream: true,
options: {
temperature: temperature
}
},
{
responseType: 'stream',
timeout: 300000
}
);
let fullContent = '';
response.data.on('data', (chunk) => {
const lines = chunk.toString().split('\n').filter(line => line.trim());
for (const line of lines) {
try {
const data = JSON.parse(line);
if (data.message?.content) {
const content = data.message.content;
fullContent += content;
if (onChunk) {
onChunk(content);
}
}
if (data.done) {
if (onComplete) {
onComplete({
content: fullContent,
model: data.model,
usage: data.usage
});
}
}
} catch (e) {
// 忽略解析错误
}
}
});
response.data.on('error', (error) => {
logger.error('流式响应错误:', error);
if (onError) {
onError(error);
}
});
} catch (error) {
logger.error('流式聊天失败:', error);
if (onError) {
onError(error);
}
throw error;
}
}
/**
* 生成 Embedding
*/
async generateEmbedding(text, model = 'llama2') {
try {
const response = await axios.post(
`${this.baseURL}/api/embeddings`,
{
model: model,
prompt: text
}
);
return response.data.embedding;
} catch (error) {
logger.error('生成 Embedding 失败:', error);
throw error;
}
}
/**
* 拉取模型
*/
async pullModel(modelName) {
try {
const response = await axios.post(
`${this.baseURL}/api/pull`,
{
name: modelName,
stream: true
},
{
responseType: 'stream'
}
);
return new Promise((resolve, reject) => {
response.data.on('data', (chunk) => {
const data = JSON.parse(chunk.toString());
logger.info(`下载进度: ${data.status || ''}`);
});
response.data.on('end', () => {
resolve();
});
response.data.on('error', (error) => {
reject(error);
});
});
} catch (error) {
logger.error('拉取模型失败:', error);
throw error;
}
}
}
export const ollamaService = new OllamaService();
作业3:创建本地模型路由
src/routes/local-llm.js:
import express from 'express';
import { ollamaService } from '../services/ollama.js';
import { logger } from '../utils/logger.js';
export const localLLMRouter = express.Router();
// GET /api/local-llm/health - 健康检查
localLLMRouter.get('/health', async (req, res) => {
try {
const isHealthy = await ollamaService.checkHealth();
res.json({
success: isHealthy,
message: isHealthy ? 'Ollama 运行正常' : 'Ollama 未运行'
});
} catch (error) {
res.json({
success: false,
message: 'Ollama 连接失败'
});
}
});
// GET /api/local-llm/models - 获取模型列表
localLLMRouter.get('/models', async (req, res) => {
try {
const models = await ollamaService.listModels();
res.json({
success: true,
data: models
});
} catch (error) {
logger.error('获取模型列表错误:', error);
res.status(500).json({
success: false,
error: error.message
});
}
});
// POST /api/local-llm/chat - 聊天
localLLMRouter.post('/chat', async (req, res) => {
try {
const { message, conversationHistory = [], model, temperature } = req.body;
if (!message) {
return res.status(400).json({
success: false,
error: '消息内容不能为空'
});
}
const messages = [
...conversationHistory,
{ role: 'user', content: message }
];
const result = await ollamaService.chat(messages, {
model: model || 'llama2',
temperature: temperature || 0.7
});
res.json({
success: true,
data: {
message: result.message.content,
model: result.model,
usage: result.usage
}
});
} catch (error) {
logger.error('本地 LLM 聊天错误:', error);
res.status(500).json({
success: false,
error: error.message
});
}
});
// POST /api/local-llm/stream - 流式聊天
localLLMRouter.post('/stream', async (req, res) => {
try {
const { message, conversationHistory = [], model, temperature } = req.body;
if (!message) {
return res.status(400).json({
success: false,
error: '消息内容不能为空'
});
}
res.setHeader('Content-Type', 'text/event-stream');
res.setHeader('Cache-Control', 'no-cache');
res.setHeader('Connection', 'keep-alive');
const messages = [
...conversationHistory,
{ role: 'user', content: message }
];
await ollamaService.streamChat(
messages,
{
onChunk: (content) => {
res.write(`data: ${JSON.stringify({ content })}\n\n`);
},
onComplete: (result) => {
res.write(`data: ${JSON.stringify({
done: true,
model: result.model,
usage: result.usage
})}\n\n`);
res.end();
},
onError: (error) => {
res.write(`data: ${JSON.stringify({
error: error.message
})}\n\n`);
res.end();
}
},
{
model: model || 'llama2',
temperature: temperature || 0.7
}
);
} catch (error) {
logger.error('流式聊天错误:', error);
if (!res.headersSent) {
res.status(500).json({
success: false,
error: error.message
});
}
}
});
// POST /api/local-llm/pull - 拉取模型
localLLMRouter.post('/pull', async (req, res) => {
try {
const { model } = req.body;
if (!model) {
return res.status(400).json({
success: false,
error: '模型名称不能为空'
});
}
await ollamaService.pullModel(model);
res.json({
success: true,
message: `模型 ${model} 拉取成功`
});
} catch (error) {
logger.error('拉取模型错误:', error);
res.status(500).json({
success: false,
error: error.message
});
}
});
作业4:实现模型切换服务
src/services/model-switcher.js:
import { ollamaService } from './ollama.js';
import { chatWithAI } from './openai.js';
import { logger } from '../utils/logger.js';
/**
* 模型切换服务
*/
export class ModelSwitcher {
constructor() {
this.currentProvider = process.env.DEFAULT_PROVIDER || 'openai';
this.fallbackEnabled = true;
}
/**
* 切换提供者
*/
setProvider(provider) {
if (['openai', 'ollama'].includes(provider)) {
this.currentProvider = provider;
logger.info(`切换到 ${provider}`);
return true;
}
return false;
}
/**
* 获取当前提供者
*/
getProvider() {
return this.currentProvider;
}
/**
* 聊天(自动选择模型)
*/
async chat(message, conversationHistory = [], options = {}) {
try {
if (this.currentProvider === 'ollama') {
return await this.chatWithOllama(message, conversationHistory, options);
} else {
return await this.chatWithOpenAI(message, conversationHistory, options);
}
} catch (error) {
// 如果失败且启用降级,尝试另一个提供者
if (this.fallbackEnabled) {
logger.warn(`${this.currentProvider} 失败,尝试降级`);
const fallbackProvider = this.currentProvider === 'openai' ? 'ollama' : 'openai';
try {
if (fallbackProvider === 'ollama') {
return await this.chatWithOllama(message, conversationHistory, options);
} else {
return await this.chatWithOpenAI(message, conversationHistory, options);
}
} catch (fallbackError) {
logger.error('降级也失败:', fallbackError);
throw error;
}
}
throw error;
}
}
/**
* 使用 OpenAI
*/
async chatWithOpenAI(message, conversationHistory, options) {
const result = await chatWithAI(message, conversationHistory, options);
return {
content: result.content,
provider: 'openai',
usage: result.usage
};
}
/**
* 使用 Ollama
*/
async chatWithOllama(message, conversationHistory, options) {
const messages = [
...conversationHistory,
{ role: 'user', content: message }
];
const result = await ollamaService.chat(messages, {
model: options.model || 'llama2',
temperature: options.temperature || 0.7
});
return {
content: result.message.content,
provider: 'ollama',
usage: result.usage
};
}
/**
* 流式聊天
*/
async streamChat(message, conversationHistory, callbacks, options) {
if (this.currentProvider === 'ollama') {
const messages = [
...conversationHistory,
{ role: 'user', content: message }
];
return await ollamaService.streamChat(messages, callbacks, options);
} else {
// 使用 OpenAI 流式
return await chatWithAI(message, conversationHistory, {
...options,
stream: true,
callbacks: callbacks
});
}
}
}
export const modelSwitcher = new ModelSwitcher();
作业5:性能对比测试
test/performance-comparison.js:
import { ollamaService } from '../src/services/ollama.js';
import { chatWithAI } from '../src/services/openai.js';
/**
* 性能对比测试
*/
async function performanceComparison() {
const testQuestion = '用一句话解释什么是前端开发';
const iterations = 5;
console.log('=== 性能对比测试 ===\n');
console.log(`测试问题: ${testQuestion}`);
console.log(`测试次数: ${iterations}\n`);
// 测试 OpenAI
console.log('测试 OpenAI...');
const openaiTimes = [];
for (let i = 0; i < iterations; i++) {
const start = Date.now();
await chatWithAI(testQuestion);
const duration = Date.now() - start;
openaiTimes.push(duration);
console.log(` 第${i + 1}次: ${duration}ms`);
}
// 测试 Ollama
console.log('\n测试 Ollama...');
const ollamaTimes = [];
for (let i = 0; i < iterations; i++) {
const start = Date.now();
await ollamaService.chat([
{ role: 'user', content: testQuestion }
]);
const duration = Date.now() - start;
ollamaTimes.push(duration);
console.log(` 第${i + 1}次: ${duration}ms`);
}
// 统计结果
const avgOpenAI = openaiTimes.reduce((a, b) => a + b) / iterations;
const avgOllama = ollamaTimes.reduce((a, b) => a + b) / iterations;
console.log('\n=== 测试结果 ===');
console.log(`OpenAI 平均响应时间: ${avgOpenAI.toFixed(2)}ms`);
console.log(`Ollama 平均响应时间: ${avgOllama.toFixed(2)}ms`);
console.log(`性能差异: ${((avgOllama / avgOpenAI - 1) * 100).toFixed(2)}%`);
}
performanceComparison().catch(console.error);
作业6:前端模型选择界面
ModelSelector.vue:
<template>
<div class="model-selector">
<h3>模型选择</h3>
<div class="provider-tabs">
<button
v-for="provider in providers"
:key="provider.id"
:class="['tab', { active: currentProvider === provider.id }]"
@click="switchProvider(provider.id)">
{{ provider.name }}
</button>
</div>
<!-- OpenAI 模型 -->
<div v-if="currentProvider === 'openai'" class="model-list">
<div
v-for="model in openaiModels"
:key="model.id"
:class="['model-item', { active: selectedModel === model.id }]"
@click="selectModel(model.id)">
<div class="model-name">{{ model.name }}</div>
<div class="model-desc">{{ model.description }}</div>
<div class="model-info">
<span>成本: {{ model.cost }}</span>
<span>速度: {{ model.speed }}</span>
</div>
</div>
</div>
<!-- Ollama 模型 -->
<div v-if="currentProvider === 'ollama'" class="model-list">
<div v-if="loadingModels" class="loading">加载中...</div>
<div
v-for="model in ollamaModels"
:key="model.name"
:class="['model-item', { active: selectedModel === model.name }]"
@click="selectModel(model.name)">
<div class="model-name">{{ model.name }}</div>
<div class="model-info">
<span>大小: {{ formatSize(model.size) }}</span>
<span>本地模型</span>
</div>
</div>
<button @click="loadOllamaModels" class="btn-refresh">
刷新模型列表
</button>
</div>
<div class="model-status">
<div v-if="currentProvider === 'ollama'">
<span :class="['status-dot', { online: ollamaOnline }]"></span>
{{ ollamaOnline ? 'Ollama 在线' : 'Ollama 离线' }}
</div>
</div>
</div>
</template>
<script>
export default {
name: 'ModelSelector',
data() {
return {
currentProvider: 'openai',
selectedModel: 'gpt-3.5-turbo',
loadingModels: false,
ollamaOnline: false,
providers: [
{ id: 'openai', name: 'OpenAI (云端)' },
{ id: 'ollama', name: 'Ollama (本地)' }
],
openaiModels: [
{
id: 'gpt-3.5-turbo',
name: 'GPT-3.5 Turbo',
description: '快速、经济',
cost: '低',
speed: '快'
},
{
id: 'gpt-4',
name: 'GPT-4',
description: '最强大',
cost: '高',
speed: '中等'
}
],
ollamaModels: []
};
},
mounted() {
this.checkOllamaHealth();
this.loadOllamaModels();
},
methods: {
async switchProvider(provider) {
this.currentProvider = provider;
if (provider === 'ollama') {
await this.loadOllamaModels();
}
},
selectModel(modelId) {
this.selectedModel = modelId;
this.$emit('model-changed', {
provider: this.currentProvider,
model: modelId
});
},
async checkOllamaHealth() {
try {
const response = await fetch('http://localhost:3000/api/local-llm/health');
const result = await response.json();
this.ollamaOnline = result.success;
} catch (error) {
this.ollamaOnline = false;
}
},
async loadOllamaModels() {
this.loadingModels = true;
try {
const response = await fetch('http://localhost:3000/api/local-llm/models');
const result = await response.json();
if (result.success) {
this.ollamaModels = result.data;
}
} catch (error) {
console.error('加载模型失败:', error);
} finally {
this.loadingModels = false;
}
},
formatSize(bytes) {
if (bytes < 1024) return bytes + ' B';
if (bytes < 1024 * 1024) return (bytes / 1024).toFixed(2) + ' KB';
return (bytes / (1024 * 1024)).toFixed(2) + ' MB';
}
}
};
</script>
<style scoped>
.model-selector {
padding: 20px;
}
.provider-tabs {
display: flex;
gap: 10px;
margin-bottom: 20px;
}
.tab {
padding: 10px 20px;
border: 1px solid #ddd;
background: white;
border-radius: 4px;
cursor: pointer;
}
.tab.active {
background: #1976d2;
color: white;
}
.model-list {
display: flex;
flex-direction: column;
gap: 10px;
}
.model-item {
padding: 15px;
border: 1px solid #ddd;
border-radius: 8px;
cursor: pointer;
transition: all 0.2s;
}
.model-item:hover {
border-color: #1976d2;
background: #f5f5f5;
}
.model-item.active {
border-color: #1976d2;
background: #e3f2fd;
}
.status-dot {
display: inline-block;
width: 8px;
height: 8px;
border-radius: 50%;
background: #f00;
margin-right: 8px;
}
.status-dot.online {
background: #0f0;
}
</style>
遇到的问题
问题1:Ollama 未运行
解决方案:
// 检查健康状态
const isHealthy = await ollamaService.checkHealth();
if (!isHealthy) {
throw new Error('Ollama 未运行,请先启动 Ollama');
}
问题2:模型响应慢
解决方案:
- 使用更小的模型(如 llama2:7b)
- 优化硬件配置
- 调整参数(降低 temperature)
学习总结
今日收获
- ✅ 理解本地模型部署
- ✅ 掌握 Ollama 使用
- ✅ 实现离线 AI 功能
- ✅ 性能对比测试
- ✅ 模型切换功能
关键知识点
- 本地模型保护隐私,数据不上传
- Ollama 简单易用,快速部署
- 性能对比,根据需求选择
- 模型切换,灵活使用
使用建议
- 开发测试:使用本地模型
- 生产环境:根据需求选择
- 敏感数据:优先本地模型
- 高性能需求:使用云端模型
明日计划
明天将学习:
期待明天的学习! 🚀
参考资源
代码仓库
项目已更新:
- ✅ Ollama 集成
- ✅ 本地模型服务
- ✅ 模型切换功能
- ✅ 性能测试
GitHub 提交: Day 12 - 本地模型部署
标签: #AI学习 #本地模型 #Ollama #离线AI #性能优化 #学习笔记
写在最后
今天学习了本地模型部署,通过 Ollama 实现了离线 AI 功能。
这对于数据隐私和成本控制都很重要。明天将学习 AI Agent 开发,
让 AI 能够自主执行任务!
继续加油! 💪
快速检查清单
完成这些,第十二天就达标了! ✅

浙公网安备 33010602011771号