交叉熵方法优化高斯混合模型(GMM)
使用交叉熵方法优化高斯混合模型(GMM)。交叉熵优化是一种强大的随机优化技术,特别适用于复杂的多模态优化问题。
交叉熵优化算法原理
交叉熵方法通过迭代地更新概率分布参数,使其逐渐集中在最优解附近。
核心算法框架
classdef CrossEntropyGMM < handle
properties
num_components % GMM分量数量
dimension % 数据维度
population_size % 每代样本数量
elite_size % 精英样本数量
max_iter % 最大迭代次数
smoothing_factor % 平滑因子
end
methods
function obj = CrossEntropyGMM(num_comp, dim, pop_size, elite_ratio, max_iter, smooth_factor)
obj.num_components = num_comp;
obj.dimension = dim;
obj.population_size = pop_size;
obj.elite_size = round(elite_ratio * pop_size);
obj.max_iter = max_iter;
obj.smoothing_factor = smooth_factor;
end
end
end
高斯混合模型定义
classdef GaussianMixtureModel
properties
weights % 混合权重 [1 x K]
means % 均值矩阵 [D x K]
covariances % 协方差矩阵 [D x D x K]
K % 分量数量
D % 数据维度
end
methods
function obj = GaussianMixtureModel(K, D)
obj.K = K;
obj.D = D;
obj.weights = ones(1, K) / K;
obj.means = zeros(D, K);
obj.covariances = zeros(D, D, K);
% 初始化协方差矩阵为单位矩阵
for k = 1:K
obj.covariances(:, :, k) = eye(D);
end
end
function probability = pdf(obj, X)
% 计算GMM在数据点X处的概率密度
% X: [N x D] 数据矩阵
% probability: [N x 1] 概率密度向量
[N, ~] = size(X);
component_probs = zeros(N, obj.K);
for k = 1:obj.K
component_probs(:, k) = obj.weights(k) * ...
mvnpdf(X, obj.means(:, k)', squeeze(obj.covariances(:, :, k)));
end
probability = sum(component_probs, 2);
end
function log_likelihood = compute_log_likelihood(obj, X)
% 计算数据的对数似然
probabilities = obj.pdf(X);
log_likelihood = sum(log(probabilities + eps)); % 加eps防止log(0)
end
end
end
交叉熵优化GMM的实现
function [best_gmm, best_log_likelihood, history] = cross_entropy_gmm_optimization(X, K, ce_params)
% 交叉熵优化高斯混合模型
% 输入:
% X: 数据矩阵 [N x D]
% K: GMM分量数量
% ce_params: 交叉熵参数结构体
% 输出:
% best_gmm: 最优GMM模型
% best_log_likelihood: 最优对数似然
% history: 优化历史记录
[N, D] = size(X);
% 默认参数
if nargin < 3
ce_params.population_size = 100;
ce_params.elite_ratio = 0.2;
ce_params.max_iter = 50;
ce_params.smoothing_alpha = 0.7;
ce_params.initial_std = 1.0;
end
% 初始化参数分布
param_distribution = initialize_parameter_distribution(K, D, X, ce_params.initial_std);
% 记录历史
history.best_log_likelihood = zeros(ce_params.max_iter, 1);
history.parameter_evolution = cell(ce_params.max_iter, 1);
best_log_likelihood = -inf;
best_gmm = [];
for iter = 1:ce_params.max_iter
fprintf('迭代 %d/%d...\n', iter, ce_params.max_iter);
% 步骤1: 从当前分布生成样本
population = generate_gmm_population(param_distribution, ce_params.population_size, K, D);
% 步骤2: 评估所有样本
log_likelihoods = evaluate_gmm_population(population, X);
% 步骤3: 选择精英样本
[elite_samples, elite_scores] = select_elite_samples(population, log_likelihoods, ...
ce_params.elite_ratio);
% 步骤4: 更新参数分布
param_distribution = update_parameter_distribution(param_distribution, elite_samples, ...
ce_params.smoothing_alpha);
% 更新最优解
current_best = max(elite_scores);
if current_best > best_log_likelihood
best_log_likelihood = current_best;
best_idx = find(log_likelihoods == current_best, 1);
best_gmm = population{best_idx};
end
% 记录历史
history.best_log_likelihood(iter) = current_best;
history.parameter_evolution{iter} = param_distribution;
fprintf('当前最优对数似然: %.4f\n', current_best);
% 检查收敛
if iter > 5 && check_convergence(history.best_log_likelihood, iter)
fprintf('算法在迭代 %d 收敛\n', iter);
break;
end
end
history.best_log_likelihood = history.best_log_likelihood(1:iter);
history.parameter_evolution = history.parameter_evolution(1:iter);
end
function param_dist = initialize_parameter_distribution(K, D, X, initial_std)
% 初始化参数的概率分布
% 计算数据的统计量用于合理初始化
data_mean = mean(X);
data_std = std(X);
param_dist.weights_mean = ones(1, K) / K;
param_dist.weights_std = ones(1, K) * 0.1;
param_dist.means_mean = zeros(D, K);
param_dist.means_std = zeros(D, K);
for k = 1:K
% 均值初始化在数据范围内
param_dist.means_mean(:, k) = data_mean' + randn(D, 1) .* (data_std' / 2);
param_dist.means_std(:, k) = ones(D, 1) * initial_std;
end
% 协方差矩阵使用Wishart分布的思想初始化
param_dist.cov_scale = zeros(K, 1);
param_dist.cov_df = zeros(K, 1);
for k = 1:K
param_dist.cov_scale(k) = 1.0;
param_dist.cov_df(k) = D + 2; % 确保自由度足够
end
end
function population = generate_gmm_population(param_dist, population_size, K, D)
% 从当前参数分布生成GMM种群
population = cell(population_size, 1);
for i = 1:population_size
gmm = GaussianMixtureModel(K, D);
% 生成权重 (使用softmax确保和为1)
weights_raw = param_dist.weights_mean + param_dist.weights_std .* randn(1, K);
gmm.weights = exp(weights_raw) / sum(exp(weights_raw));
% 生成均值
for k = 1:K
gmm.means(:, k) = param_dist.means_mean(:, k) + ...
param_dist.means_std(:, k) .* randn(D, 1);
end
% 生成协方差矩阵 (确保正定性)
for k = 1:K
% 使用Wishart分布生成正定协方差矩阵
scale_matrix = eye(D) * param_dist.cov_scale(k);
df = param_dist.cov_df(k);
% 生成Wishart分布样本
cov_matrix = wishrnd(scale_matrix, df) / df;
% 添加小的正则化项确保数值稳定性
gmm.covariances(:, :, k) = cov_matrix + eye(D) * 1e-6;
end
population{i} = gmm;
end
end
function log_likelihoods = evaluate_gmm_population(population, X)
% 评估GMM种群中所有个体的对数似然
population_size = length(population);
log_likelihoods = zeros(population_size, 1);
parfor i = 1:population_size
try
log_likelihoods(i) = population{i}.compute_log_likelihood(X);
catch
% 如果计算失败,赋予一个很低的似然值
log_likelihoods(i) = -1e10;
end
end
end
function [elite_samples, elite_scores] = select_elite_samples(population, scores, elite_ratio)
% 选择精英样本
population_size = length(population);
elite_size = round(elite_ratio * population_size);
[sorted_scores, sorted_indices] = sort(scores, 'descend');
elite_indices = sorted_indices(1:elite_size);
elite_samples = population(elite_indices);
elite_scores = sorted_scores(1:elite_size);
end
function new_param_dist = update_parameter_distribution(old_param_dist, elite_samples, alpha)
% 使用精英样本更新参数分布
K = length(elite_samples{1}.weights);
D = size(elite_samples{1}.means, 1);
elite_size = length(elite_samples);
new_param_dist = old_param_dist;
% 收集精英样本的参数
elite_weights = zeros(elite_size, K);
elite_means = zeros(D, K, elite_size);
elite_covariances = zeros(D, D, K, elite_size);
for i = 1:elite_size
gmm = elite_samples{i};
elite_weights(i, :) = gmm.weights;
elite_means(:, :, i) = gmm.means;
for k = 1:K
elite_covariances(:, :, k, i) = gmm.covariances(:, :, k);
end
end
% 更新权重分布
weights_raw = log(elite_weights); % 转换到softmax前的空间
new_param_dist.weights_mean = alpha * mean(weights_raw, 1) + ...
(1 - alpha) * old_param_dist.weights_mean;
new_param_dist.weights_std = alpha * std(weights_raw, 0, 1) + ...
(1 - alpha) * old_param_dist.weights_std;
% 更新均值分布
for k = 1:K
means_k = squeeze(elite_means(:, k, :)); % [D x elite_size]
new_param_dist.means_mean(:, k) = alpha * mean(means_k, 2) + ...
(1 - alpha) * old_param_dist.means_mean(:, k);
new_param_dist.means_std(:, k) = alpha * std(means_k, 0, 2) + ...
(1 - alpha) * old_param_dist.means_std(:, k);
end
% 更新协方差分布 (简化处理)
for k = 1:K
covs_k = squeeze(elite_covariances(:, :, k, :)); % [D x D x elite_size]
% 计算平均协方差矩阵的特征值作为尺度参数
mean_cov = mean(covs_k, 3);
eigen_vals = eig(mean_cov);
new_scale = mean(eigen_vals);
new_param_dist.cov_scale(k) = alpha * new_scale + ...
(1 - alpha) * old_param_dist.cov_scale(k);
end
end
function converged = check_convergence(log_likelihood_history, current_iter)
% 检查算法是否收敛
if current_iter < 10
converged = false;
return;
end
recent_improvements = diff(log_likelihood_history(current_iter-4:current_iter));
avg_improvement = mean(recent_improvements);
% 如果平均改进小于阈值,认为收敛
converged = abs(avg_improvement) < 1e-4;
end
使用示例和验证
% 示例:使用交叉熵优化GMM
clear; clc;
% 生成测试数据
rng(42); % 设置随机种子
[N, D, K_true] = deal(1000, 2, 3);
% 生成三个高斯分布混合的数据
true_weights = [0.3, 0.4, 0.3];
true_means = [1, 1; -1, -1; 2, -2]';
true_covs = cat(3, [0.5, 0.1; 0.1, 0.5], [0.3, -0.2; -0.2, 0.3], [0.4, 0; 0, 0.4]);
X = [];
for k = 1:K_true
n_k = round(N * true_weights(k));
X_k = mvnrnd(true_means(:, k)', squeeze(true_covs(:, :, k)), n_k);
X = [X; X_k];
end
% 设置交叉熵参数
ce_params.population_size = 50;
ce_params.elite_ratio = 0.3;
ce_params.max_iter = 30;
ce_params.smoothing_alpha = 0.8;
ce_params.initial_std = 0.5;
% 运行交叉熵优化
K_fit = 3; % 假设我们知道真实分量数量
[best_gmm, best_log_likelihood, history] = cross_entropy_gmm_optimization(...
X, K_fit, ce_params);
% 可视化结果
figure('Position', [100, 100, 1200, 400]);
% 子图1: 原始数据和拟合的GMM
subplot(1, 3, 1);
scatter(X(:, 1), X(:, 2), 10, 'filled', 'MarkerFaceAlpha', 0.6);
hold on;
% 绘制拟合的高斯分量
[x_grid, y_grid] = meshgrid(linspace(min(X(:,1)), max(X(:,1)), 50), ...
linspace(min(X(:,2)), max(X(:,2)), 50));
grid_points = [x_grid(:), y_grid(:)];
for k = 1:K_fit
% 绘制等高线
pdf_vals = mvnpdf(grid_points, best_gmm.means(:, k)', ...
squeeze(best_gmm.covariances(:, :, k)));
pdf_vals = reshape(pdf_vals, size(x_grid));
contour(x_grid, y_grid, pdf_vals, 3, 'LineWidth', 1.5);
end
title('数据分布和拟合的GMM');
xlabel('特征1');
ylabel('特征2');
legend('数据点', 'GMM分量', 'Location', 'best');
% 子图2: 优化过程
subplot(1, 3, 2);
plot(1:length(history.best_log_likelihood), history.best_log_likelihood, 'LineWidth', 2);
xlabel('迭代次数');
ylabel('对数似然');
title('交叉熵优化过程');
grid on;
% 子图3: 权重演化
subplot(1, 3, 3);
weights_evolution = zeros(length(history.parameter_evolution), K_fit);
for iter = 1:length(history.parameter_evolution)
weights_mean = history.parameter_evolution{iter}.weights_mean;
weights_evolution(iter, :) = exp(weights_mean) / sum(exp(weights_mean));
end
plot(weights_evolution, 'LineWidth', 2);
xlabel('迭代次数');
ylabel('混合权重');
title('权重演化过程');
legend(arrayfun(@(k) sprintf('分量 %d', k), 1:K_fit, 'UniformOutput', false));
grid on;
% 输出结果比较
fprintf('\n=== 交叉熵GMM优化结果 ===\n');
fprintf('最优对数似然: %.4f\n', best_log_likelihood);
fprintf('估计的混合权重: [%s]\n', sprintf('%.3f ', best_gmm.weights));
fprintf('真实混合权重: [%s]\n', sprintf('%.3f ', true_weights));
% 与传统EM算法比较
fprintf('\n与传统EM算法比较:\n');
gmm_em = fitgmdist(X, K_fit, 'Replicates', 5);
log_likelihood_em = sum(log(pdf(gmm_em, X) + eps));
fprintf('EM算法对数似然: %.4f\n', log_likelihood_em);
fprintf('交叉熵改进: %.4f\n', best_log_likelihood - log_likelihood_em);
高级特性和改进
1. 自适应参数调整
function ce_params = adaptive_parameter_control(ce_params, iteration, improvement)
% 自适应调整交叉熵参数
if iteration > 10 && improvement < 1e-3
% 如果改进很小,增加探索
ce_params.smoothing_alpha = max(0.5, ce_params.smoothing_alpha * 0.95);
ce_params.elite_ratio = min(0.5, ce_params.elite_ratio * 1.05);
elseif improvement > 1e-2
% 如果改进较大,加强利用
ce_params.smoothing_alpha = min(0.95, ce_params.smoothing_alpha * 1.05);
end
end
2. 分量数量选择
function [best_K, results] = select_optimal_components(X, K_candidates, ce_params)
% 使用交叉熵方法选择最优分量数量
results = struct();
for i = 1:length(K_candidates)
K = K_candidates(i);
fprintf('测试 K = %d...\n', K);
[best_gmm, best_ll, history] = cross_entropy_gmm_optimization(X, K, ce_params);
% 计算BIC准则
n_params = K * (1 + size(X, 2) + size(X, 2) * (size(X, 2) + 1) / 2) - 1;
bic = -2 * best_ll + n_params * log(size(X, 1));
results(i).K = K;
results(i).log_likelihood = best_ll;
results(i).bic = bic;
results(i).gmm = best_gmm;
end
% 选择BIC最小的模型
[~, best_idx] = min([results.bic]);
best_K = results(best_idx).K;
fprintf('最优分量数量: K = %d (BIC = %.2f)\n', best_K, results(best_idx).bic);
end
参考代码 交叉熵优化高斯混合模型 www.3dddown.com/cna/82225.html
关键优势和应用场景
优势:
- 全局优化:避免陷入局部最优
- 鲁棒性:对初始值不敏感
- 灵活性:适用于各种复杂的GMM结构
- 并行性:天然适合并行计算
应用场景:
- 复杂数据分布的密度估计
- 图像分割和模式识别
- 异常检测系统
- 金融时间序列建模
- 生物信息学中的群体分析
这种方法特别适用于传统EM算法容易陷入局部最优的复杂多模态分布情况。交叉熵优化通过全局搜索策略,能够找到更好的GMM参数配置。

浙公网安备 33010602011771号