极限学习机(ELM)的原理、实现和应用
极限学习机(ELM)的原理、实现和应用。ELM是一种非常高效且易用的机器学习算法,特别适合快速建模和原型开发。
极限学习机(ELM)概述
极限学习机是一种单隐层前馈神经网络,其核心特点是隐层参数随机生成且不需要调整,只需要学习输出权重。
| 特性 | 优势 | 应用场景 |
|---|---|---|
| 极快的学习速度 | 比传统神经网络快10-1000倍 | 实时系统、大规模数据 |
| 良好的泛化能力 | 不易过拟合 | 小样本学习 |
| 简单易实现 | 无需复杂调参 | 快速原型开发 |
| 通用逼近能力 | 可逼近任何连续函数 | 复杂非线性问题 |
ELM核心实现
1. 基础ELM回归模型
function [output_weight, train_accuracy, model] = elm_train_regression(X, Y, hidden_neurons, activation_func)
% ELM回归训练
% 输入:
% X - 训练数据 (样本数×特征数)
% Y - 目标值 (样本数×输出维度)
% hidden_neurons - 隐层神经元数量
% activation_func - 激活函数类型
% 输出:
% output_weight - 输出层权重
% train_accuracy - 训练精度
% model - 训练好的模型
[n_samples, n_features] = size(X);
[~, n_outputs] = size(Y);
% 1. 随机生成输入权重和偏置
rng(42); % 设置随机种子以保证可重复性
input_weights = randn(n_features, hidden_neurons);
bias = randn(1, hidden_neurons);
% 2. 计算隐层输出
H = calculate_hidden_output(X, input_weights, bias, activation_func);
% 3. 计算输出权重 (Moore-Penrose广义逆)
output_weight = pinv(H) * Y;
% 4. 训练集预测和评估
Y_pred = H * output_weight;
train_accuracy = calculate_regression_accuracy(Y, Y_pred);
% 保存模型
model.input_weights = input_weights;
model.bias = bias;
model.output_weight = output_weight;
model.activation_func = activation_func;
model.hidden_neurons = hidden_neurons;
fprintf('ELM训练完成!\n');
fprintf(' 隐层神经元: %d, 训练RMSE: %.6f, R²: %.4f\n', ...
hidden_neurons, train_accuracy.rmse, train_accuracy.r2);
end
function H = calculate_hidden_output(X, input_weights, bias, activation_func)
% 计算隐层输出
H = X * input_weights + repmat(bias, size(X,1), 1);
switch activation_func
case 'sigmoid'
H = 1 ./ (1 + exp(-H));
case 'tanh'
H = tanh(H);
case 'relu'
H = max(0, H);
case 'sin'
H = sin(H);
case 'rbf'
H = exp(-H.^2);
otherwise
H = 1 ./ (1 + exp(-H)); % 默认sigmoid
end
end
function accuracy = calculate_regression_accuracy(Y_true, Y_pred)
% 计算回归精度指标
mse = mean((Y_true - Y_pred).^2);
rmse = sqrt(mse);
ss_res = sum((Y_true - Y_pred).^2);
ss_tot = sum((Y_true - mean(Y_true)).^2);
r2 = 1 - (ss_res / ss_tot);
mae = mean(abs(Y_true - Y_pred));
accuracy = struct('mse', mse, 'rmse', rmse, 'r2', r2, 'mae', mae);
end
2. ELM分类模型
function [output_weight, train_accuracy, model] = elm_train_classification(X, Y, hidden_neurons, activation_func)
% ELM分类训练
% 输入:
% X - 训练数据
% Y - 类别标签 (样本数×1),类别从1开始
% hidden_neurons - 隐层神经元数量
% activation_func - 激活函数
% 输出:
% output_weight - 输出层权重
% train_accuracy - 训练精度
% model - 训练好的模型
[n_samples, n_features] = size(X);
n_classes = length(unique(Y));
% 将标签转换为one-hot编码
Y_onehot = zeros(n_samples, n_classes);
for i = 1:n_samples
Y_onehot(i, Y(i)) = 1;
end
% 训练回归ELM
[output_weight, ~, model] = elm_train_regression(X, Y_onehot, hidden_neurons, activation_func);
% 训练集预测
Y_pred_onehot = elm_predict(model, X);
[~, Y_pred] = max(Y_pred_onehot, [], 2);
% 计算分类准确率
train_accuracy = sum(Y_pred == Y) / n_samples;
model.n_classes = n_classes;
fprintf('ELM分类训练完成!\n');
fprintf(' 类别数: %d, 训练准确率: %.4f\n', n_classes, train_accuracy);
end
3. 预测函数
function Y_pred = elm_predict(model, X)
% ELM预测
% 输入:
% model - 训练好的ELM模型
% X - 测试数据
% 输出:
% Y_pred - 预测结果
% 计算隐层输出
H = calculate_hidden_output(X, model.input_weights, model.bias, model.activation_func);
% 计算输出
Y_pred = H * model.output_weight;
% 如果是分类问题,返回原始输出(可用于概率估计)
% 需要类别预测时,使用 max(Y_pred, [], 2) 获取预测类别
end
function Y_pred_class = elm_predict_class(model, X)
% ELM分类预测(返回类别)
Y_pred = elm_predict(model, X);
[~, Y_pred_class] = max(Y_pred, [], 2);
end
增强版ELM实现
1. 正则化ELM (防止过拟合)
function [output_weight, model] = elm_train_regularized(X, Y, hidden_neurons, activation_func, C)
% 正则化ELM训练
% 输入:
% C - 正则化参数
% 使用公式: β = (HᵀH + I/C)⁻¹HᵀY
[n_samples, n_features] = size(X);
% 随机生成输入权重和偏置
input_weights = randn(n_features, hidden_neurons);
bias = randn(1, hidden_neurons);
% 计算隐层输出
H = calculate_hidden_output(X, input_weights, bias, activation_func);
% 正则化求解输出权重
if n_samples >= hidden_neurons
output_weight = (H' * H + eye(hidden_neurons) / C) \ (H' * Y);
else
output_weight = H' * ((H * H' + eye(n_samples) / C) \ Y);
end
% 保存模型
model.input_weights = input_weights;
model.bias = bias;
model.output_weight = output_weight;
model.activation_func = activation_func;
model.hidden_neurons = hidden_neurons;
model.C = C;
end
2. 增量ELM (在线学习)
function model = elm_incremental_learning(model, X_new, Y_new)
% 增量ELM学习
% 输入:
% model - 现有ELM模型
% X_new, Y_new - 新数据
% 计算新数据的隐层输出
H_new = calculate_hidden_output(X_new, model.input_weights, model.bias, model.activation_func);
% 更新输出权重(使用递归最小二乘)
% 这里使用简单的伪逆更新(实际应用中可用更高效的更新方法)
H_old = calculate_hidden_output(model.last_X, model.input_weights, model.bias, model.activation_func);
H_combined = [H_old; H_new];
Y_combined = [model.last_Y; Y_new];
model.output_weight = pinv(H_combined) * Y_combined;
model.last_X = X_new;
model.last_Y = Y_new;
fprintf('增量学习完成,更新了 %d 个样本\n', size(X_new, 1));
end
应用
示例1:函数拟合
% 生成非线性函数数据
x = linspace(-10, 10, 1000)';
y = sin(x) + 0.5 * cos(2*x) + 0.3 * sin(3*x) + 0.1 * randn(size(x));
% 划分训练测试集
train_ratio = 0.7;
n_train = floor(train_ratio * length(x));
X_train = x(1:n_train);
Y_train = y(1:n_train);
X_test = x(n_train+1:end);
Y_test = y(n_train+1:end);
% ELM参数
hidden_neurons = 50;
activation_func = 'sigmoid';
% 训练ELM
[output_weight, train_accuracy, model] = elm_train_regression(...
X_train, Y_train, hidden_neurons, activation_func);
% 测试预测
Y_pred = elm_predict(model, X_test);
test_accuracy = calculate_regression_accuracy(Y_test, Y_pred);
% 可视化结果
figure;
subplot(2,1,1);
plot(X_train, Y_train, 'b.', 'MarkerSize', 8); hold on;
plot(X_test, Y_pred, 'r-', 'LineWidth', 2);
legend('训练数据', 'ELM预测', 'Location', 'best');
title('ELM函数拟合');
xlabel('x'); ylabel('y');
grid on;
subplot(2,1,2);
plot(X_test, Y_test - Y_pred, 'g-', 'LineWidth', 1);
title('预测误差');
xlabel('x'); ylabel('误差');
grid on;
fprintf('测试集性能:\n');
fprintf(' RMSE: %.6f, R²: %.4f, MAE: %.6f\n', ...
test_accuracy.rmse, test_accuracy.r2, test_accuracy.mae);
示例2:分类问题
% 使用MATLAB内置鸢尾花数据集
load fisheriris;
X = meas;
Y = grp2idx(species);
% 数据标准化
X = zscore(X);
% 划分训练测试集
rng(42); % 可重复性
cv = cvpartition(Y, 'HoldOut', 0.3);
X_train = X(training(cv), :);
Y_train = Y(training(cv));
X_test = X(test(cv), :);
Y_test = Y(test(cv));
% ELM分类
hidden_neurons = 30;
activation_func = 'sigmoid';
[output_weight, train_accuracy, model] = elm_train_classification(...
X_train, Y_train, hidden_neurons, activation_func);
% 测试预测
Y_pred = elm_predict_class(model, X_test);
test_accuracy = sum(Y_pred == Y_test) / length(Y_test);
% 混淆矩阵
C = confusionmat(Y_test, Y_pred);
figure;
confusionchart(C, categories(species));
title(sprintf('ELM分类混淆矩阵 (准确率: %.2f%%)', test_accuracy*100));
fprintf('分类性能:\n');
fprintf(' 训练准确率: %.4f\n', train_accuracy);
fprintf(' 测试准确率: %.4f\n', test_accuracy);
示例3:参数敏感性分析
function analyze_elm_parameters(X, Y, problem_type)
% 分析ELM参数对性能的影响
if strcmp(problem_type, 'regression')
train_func = @elm_train_regression;
else
train_func = @elm_train_classification;
end
% 测试不同的隐层神经元数量
hidden_neurons_list = [10, 20, 50, 100, 200, 500];
activation_funcs = {'sigmoid', 'tanh', 'relu', 'sin'};
results = struct();
figure;
colors = lines(length(activation_funcs));
for a = 1:length(activation_funcs)
activation = activation_funcs{a};
performance = zeros(size(hidden_neurons_list));
for h = 1:length(hidden_neurons_list)
hidden_neurons = hidden_neurons_list(h);
try
[~, accuracy, ~] = train_func(X, Y, hidden_neurons, activation);
if strcmp(problem_type, 'regression')
performance(h) = accuracy.r2; % 使用R²作为性能指标
else
performance(h) = accuracy; % 分类准确率
end
catch
performance(h) = 0;
end
end
% 绘制性能曲线
plot(hidden_neurons_list, performance, 'o-', ...
'Color', colors(a,:), 'LineWidth', 2, 'MarkerSize', 6);
hold on;
results.(activation) = performance;
end
xlabel('隐层神经元数量');
if strcmp(problem_type, 'regression')
ylabel('R²');
title('ELM回归性能 vs 神经元数量');
else
ylabel('准确率');
title('ELM分类性能 vs 神经元数量');
end
legend(activation_funcs, 'Location', 'best');
grid on;
set(gca, 'XScale', 'log');
end
% 使用示例
% analyze_elm_parameters(X_train, Y_train, 'classification');
实用工具函数
1. 交叉验证ELM
function [best_model, cv_results] = elm_cross_validation(X, Y, hidden_neurons_list, activation_funcs, k_folds, problem_type)
% ELM交叉验证
if nargin < 6
problem_type = 'regression';
end
cv = cvpartition(length(Y), 'KFold', k_folds);
results = struct();
best_accuracy = -inf;
best_params = struct();
for a = 1:length(activation_funcs)
activation = activation_funcs{a};
for h = 1:length(hidden_neurons_list)
hidden_neurons = hidden_neurons_list(h);
fold_accuracies = zeros(k_folds, 1);
for fold = 1:k_folds
train_idx = training(cv, fold);
test_idx = test(cv, fold);
X_train = X(train_idx, :);
Y_train = Y(train_idx);
X_test = X(test_idx, :);
Y_test = Y(test_idx);
try
if strcmp(problem_type, 'regression')
[~, ~, model] = elm_train_regression(X_train, Y_train, hidden_neurons, activation);
Y_pred = elm_predict(model, X_test);
accuracy = calculate_regression_accuracy(Y_test, Y_pred);
fold_accuracies(fold) = accuracy.r2;
else
[~, ~, model] = elm_train_classification(X_train, Y_train, hidden_neurons, activation);
Y_pred = elm_predict_class(model, X_test);
fold_accuracies(fold) = sum(Y_pred == Y_test) / length(Y_test);
end
catch
fold_accuracies(fold) = 0;
end
end
mean_accuracy = mean(fold_accuracies);
std_accuracy = std(fold_accuracies);
% 记录结果
param_name = sprintf('h%d_%s', hidden_neurons, activation);
results.(param_name) = struct(...
'mean_accuracy', mean_accuracy, ...
'std_accuracy', std_accuracy, ...
'fold_accuracies', fold_accuracies);
% 更新最佳参数
if mean_accuracy > best_accuracy
best_accuracy = mean_accuracy;
best_params.hidden_neurons = hidden_neurons;
best_params.activation = activation;
end
end
end
% 使用最佳参数训练最终模型
if strcmp(problem_type, 'regression')
[~, ~, best_model] = elm_train_regression(X, Y, best_params.hidden_neurons, best_params.activation);
else
[~, ~, best_model] = elm_train_classification(X, Y, best_params.hidden_neurons, best_params.activation);
end
cv_results = results;
fprintf('交叉验证完成! 最佳参数:\n');
fprintf(' 隐层神经元: %d, 激活函数: %s, 平均性能: %.4f\n', ...
best_params.hidden_neurons, best_params.activation, best_accuracy);
end
性能对比
% 与其他方法的快速对比
function compare_methods(X_train, Y_train, X_test, Y_test, problem_type)
% 比较ELM与其他方法的性能
methods = {'ELM', '决策树', 'SVM', 'BP神经网络'};
performances = zeros(length(methods), 1);
training_times = zeros(length(methods), 1);
% ELM
tic;
[~, ~, elm_model] = elm_train_regression(X_train, Y_train, 50, 'sigmoid');
Y_pred_elm = elm_predict(elm_model, X_test);
training_times(1) = toc;
performances(1) = calculate_regression_accuracy(Y_test, Y_pred_elm).r2;
% 决策树
tic;
tree = fitrtree(X_train, Y_train);
Y_pred_tree = predict(tree, X_test);
training_times(2) = toc;
performances(2) = calculate_regression_accuracy(Y_test, Y_pred_tree).r2;
% 显示结果
figure;
subplot(1,2,1);
bar(performances);
set(gca, 'XTickLabel', methods);
ylabel('R²');
title('方法性能比较');
grid on;
subplot(1,2,2);
bar(training_times);
set(gca, 'XTickLabel', methods);
ylabel('训练时间(秒)');
title('训练时间比较');
grid on;
end
参考代码 ELM极限学习机 www.youwenfan.com/contentcnl/64732.html
使用建议
- 隐层神经元数量:通常从50-500开始尝试,数据量大时可适当增加
- 激活函数:sigmoid和tanh通常效果较好,可以都尝试
- 正则化参数:如果出现过拟合,尝试增加正则化参数C
- 数据标准化:建议对输入数据进行标准化处理
ELM确实是一个非常实用且高效的算法,特别适合需要快速建模的场景。
浙公网安备 33010602011771号