可逆跳跃马尔可夫链蒙特卡罗采样(RJMCMC)算法实现
可逆跳跃马尔可夫链蒙特卡罗(RJMCMC)算法的MATLAB实现。该实现展示了如何在模型空间和参数空间中进行高效采样,特别适用于模型选择问题。
classdef RJMCMC
% 可逆跳跃马尔可夫链蒙特卡罗采样算法
% 用于在模型空间和参数空间中进行联合采样
properties
data; % 观测数据
models; % 候选模型列表
priorProbs; % 模型先验概率
temperature; % 退火温度参数
maxGenerations; % 最大迭代代数
thinning; % 稀释因子(每k次迭代保存一次)
burnIn; % 预热期迭代次数
jumpProb; % 模型跳跃概率
proposalStd; % 参数提议分布的标准差
results; % 存储采样结果
acceptanceRates; % 记录接受率
end
methods
function obj = RJMCMC(data, models, priorProbs, varargin)
% 构造函数
obj.data = data;
obj.models = models;
obj.priorProbs = priorProbs;
% 默认参数设置
obj.temperature = 1.0;
obj.maxGenerations = 10000;
obj.thinning = 10;
obj.burnIn = 1000;
obj.jumpProb = 0.2;
obj.proposalStd = 0.1;
obj.results = struct();
obj.acceptanceRates = struct('withinModel', 0, 'betweenModels', 0);
% 解析可选参数
for i = 1:2:length(varargin)
switch lower(varargin{i})
case 'temperature'
obj.temperature = varargin{i+1};
case 'maxgenerations'
obj.maxGenerations = varargin{i+1};
case 'thinning'
obj.thinning = varargin{i+1};
case 'burnin'
obj.burnIn = varargin{i+1};
case 'jumpprob'
obj.jumpProb = varargin{i+1};
case 'proposalstd'
obj.proposalStd = varargin{i+1};
otherwise
warning('未知参数: %s', varargin{i});
end
end
end
function [samples, modelCounts, logPosteriors] = run(obj)
% 运行RJMCMC采样
numModels = length(obj.models);
numParams = arrayfun(@(m) m.numParams, obj.models);
maxParams = max(numParams);
% 初始化
currentModel = randi(numModels);
currentParams = obj.initializeParameters(currentModel);
currentLogPosterior = obj.logPosterior(currentModel, currentParams);
% 存储结果
samples = cell(1, numModels);
for i = 1:numModels
samples{i} = nan(obj.maxGenerations, numParams(i));
end
modelCounts = zeros(1, numModels);
logPosteriors = nan(obj.maxGenerations, 1);
% 接受计数器
acceptWithin = 0;
acceptBetween = 0;
totalWithin = 0;
totalBetween = 0;
% MCMC主循环
for gen = 1:obj.maxGenerations
% 决定是进行模型内移动还是模型间跳跃
if rand < obj.jumpProb
% 模型间跳跃
totalBetween = totalBetween + 1;
[newModel, newParams, accepted] = obj.modelJump(currentModel, currentParams);
if accepted
acceptBetween = acceptBetween + 1;
currentModel = newModel;
currentParams = newParams;
end
else
% 模型内移动
totalWithin = totalWithin + 1;
[newParams, accepted] = obj.withinModelMove(currentModel, currentParams);
if accepted
acceptWithin = acceptWithin + 1;
currentParams = newParams;
end
end
% 计算当前后验概率
currentLogPosterior = obj.logPosterior(currentModel, currentParams);
% 存储结果(考虑稀释因子)
if mod(gen, obj.thinning) == 0 && gen > obj.burnIn
idx = (gen - obj.burnIn) / obj.thinning;
for i = 1:numParams(currentModel)
samples{currentModel}(idx, i) = currentParams(i);
end
modelCounts(currentModel) = modelCounts(currentModel) + 1;
logPosteriors(idx) = currentLogPosterior;
end
% 显示进度
if mod(gen, 1000) == 0
fprintf('迭代 %d/%d, 当前模型: %d, 接受率(内): %.2f%%, 接受率(间): %.2f%%\n', ...
gen, obj.maxGenerations, currentModel, ...
100*acceptWithin/totalWithin, 100*acceptBetween/totalBetween);
end
end
% 计算最终接受率
obj.acceptanceRates.withinModel = acceptWithin / totalWithin;
obj.acceptanceRates.betweenModels = acceptBetween / totalBetween;
% 裁剪结果数组
for i = 1:numModels
samples{i} = samples{i}(1:modelCounts(i), :);
end
logPosteriors = logPosteriors(1:sum(modelCounts));
fprintf('\n===== 采样完成 =====\n');
fprintf('总迭代次数: %d\n', obj.maxGenerations);
fprintf('预热期: %d\n', obj.burnIn);
fprintf('有效样本数: %d\n', sum(modelCounts));
fprintf('模型接受率(内): %.2f%%\n', 100*obj.acceptanceRates.withinModel);
fprintf('模型接受率(间): %.2f%%\n', 100*obj.acceptanceRates.betweenModels);
fprintf('模型选择频率:\n');
for i = 1:numModels
fprintf(' 模型 %d: %d 次 (%.2f%%)\n', i, modelCounts(i), ...
100*modelCounts(i)/sum(modelCounts));
end
end
function params = initializeParameters(obj, modelIdx)
% 初始化模型参数
model = obj.models(modelIdx);
params = model.priorSampler();
end
function [newParams, accepted] = withinModelMove(obj, modelIdx, currentParams)
% 模型内参数移动
model = obj.models(modelIdx);
proposedParams = currentParams + obj.proposalStd * randn(size(currentParams));
% 计算接受概率
logAlpha = obj.logPosterior(modelIdx, proposedParams) - ...
obj.logPosterior(modelIdx, currentParams);
% Metropolis-Hastings准则
if log(rand) < logAlpha
newParams = proposedParams;
accepted = true;
else
newParams = currentParams;
accepted = false;
end
end
function [newModel, newParams, accepted] = modelJump(obj, currentModel, currentParams)
% 模型间跳跃
numModels = length(obj.models);
newModel = randi(numModels);
% 如果选择相同模型,则进行模型内移动
if newModel == currentModel
[newParams, accepted] = obj.withinModelMove(currentModel, currentParams);
return;
end
% 获取模型信息
currentModelObj = obj.models(currentModel);
newModelObj = obj.models(newModel);
numCurrentParams = currentModelObj.numParams;
numNewParams = newModelObj.numParams;
% 参数维度变换
if numCurrentParams == numNewParams
% 维度相同,直接移动
proposedParams = currentParams + obj.proposalStd * randn(1, numNewParams);
else
% 维度不同,使用随机映射
if numCurrentParams > numNewParams
% 降维:随机选择子集
keepIdx = randperm(numCurrentParams, numNewParams);
proposedParams = currentParams(keepIdx) + obj.proposalStd * randn(1, numNewParams);
else
% 升维:添加新参数
baseParams = currentParams + obj.proposalStd * randn(1, numCurrentParams);
newParams = obj.initializeParameters(newModel);
proposedParams = [baseParams, newParams(numCurrentParams+1:end)];
end
end
% 计算接受概率
logAlpha = obj.calculateJumpProbability(currentModel, currentParams, ...
newModel, proposedParams, ...
numCurrentParams, numNewParams);
% Metropolis-Hastings准则
if log(rand) < logAlpha
newParams = proposedParams;
accepted = true;
else
newParams = currentParams;
accepted = false;
end
end
function logAlpha = calculateJumpProbability(obj, fromModel, fromParams, ...
toModel, toParams, ...
fromDim, toDim)
% 计算模型跳跃的接受概率
logPostFrom = obj.logPosterior(fromModel, fromParams);
logPostTo = obj.logPosterior(toModel, toParams);
% 先验概率比
priorRatio = log(obj.priorProbs(toModel)) - log(obj.priorProbs(fromModel));
% 提议分布比(简化处理)
if fromDim == toDim
propRatio = 0; % 对称提议分布
else
% 非对称提议分布的处理
propRatio = 0; % 简化处理
end
% 雅可比行列式(简化处理)
jacobian = 0; % 简化处理
% 接受概率
logAlpha = (logPostTo - logPostFrom) + priorRatio + propRatio + jacobian;
logAlpha = logAlpha / obj.temperature; % 温度缩放
end
function logPost = logPosterior(obj, modelIdx, params)
% 计算对数后验概率
model = obj.models(modelIdx);
logLikelihood = model.logLikelihood(obj.data, params);
logPrior = model.logPrior(params);
logPost = logLikelihood + logPrior;
end
function plotResults(obj, samples, modelCounts)
% 可视化结果
numModels = length(obj.models);
% 模型选择频率
figure;
bar(modelCounts / sum(modelCounts));
title('模型选择频率');
xlabel('模型索引');
ylabel('选择概率');
grid on;
% 参数后验分布
for m = 1:numModels
if ~isempty(samples{m})
figure;
subplot(1,2,1);
histogram(samples{m}, 20);
title(sprintf('模型 %d 参数后验分布', m));
xlabel('参数值');
ylabel('频数');
subplot(1,2,2);
plot(samples{m});
title(sprintf('模型 %d 参数轨迹', m));
xlabel('样本索引');
ylabel('参数值');
grid on;
end
end
% 模型证据估计
logEvidences = obj.estimateModelEvidence(samples, modelCounts);
figure;
bar(logEvidences);
title('模型对数证据');
xlabel('模型索引');
ylabel('对数证据');
grid on;
end
function logEvidences = estimateModelEvidence(obj, samples, modelCounts)
% 估计模型证据(使用调和均值估计器)
numModels = length(obj.models);
logEvidences = zeros(1, numModels);
for m = 1:numModels
if modelCounts(m) > 0
logLikes = zeros(modelCounts(m), 1);
for i = 1:modelCounts(m)
logLikes(i) = obj.models(m).logLikelihood(obj.data, samples{m}(i, :));
end
% 调和均值估计器
logEvidences(m) = -log(mean(exp(-logLikes)));
end
end
end
end
end
%% 模型基类
classdef Model < handle
properties
name % 模型名称
numParams % 参数数量
end
methods (Abstract)
logLikelihood(obj, data, params)
logPrior(obj, params)
priorSampler(obj)
end
end
%% 高斯模型实现
classdef GaussianModel < Model
properties
mu0 % μ的先验均值
sigma0 % μ的先验标准差
sigma % 数据标准差(已知)
end
methods
function obj = GaussianModel(mu0, sigma0, sigma)
obj.name = '高斯模型';
obj.numParams = 1;
obj.mu0 = mu0;
obj.sigma0 = sigma0;
obj.sigma = sigma;
end
function logL = logLikelihood(obj, data, params)
mu = params(1);
n = length(data);
logL = -0.5*n*log(2*pi*obj.sigma^2) ...
-0.5*sum((data - mu).^2)/obj.sigma^2;
end
function logP = logPrior(obj, params)
mu = params(1);
logP = -0.5*((mu - obj.mu0)/obj.sigma0)^2 ...
- log(obj.sigma0*sqrt(2*pi));
end
function params = priorSampler(obj)
params = obj.mu0 + obj.sigma0*randn();
end
end
end
%% 高斯混合模型实现
classdef GaussianMixtureModel < Model
properties
mu0 % μ的先验均值
sigma0 % μ的先验标准差
sigma % 数据标准差(已知)
end
methods
function obj = GaussianMixtureModel(mu0, sigma0, sigma)
obj.name = '高斯混合模型';
obj.numParams = 2; % 均值和混合比例
obj.mu0 = mu0;
obj.sigma0 = sigma0;
obj.sigma = sigma;
end
function logL = logLikelihood(obj, data, params)
mu = params(1);
pi_k = 1/(1+exp(-params(2))); % sigmoid转换确保[0,1]
n = length(data);
% 混合似然
logL = 0;
for i = 1:n
ll1 = log(pi_k) + log(normpdf(data(i), mu, obj.sigma));
ll2 = log(1-pi_k) + log(normpdf(data(i), -mu, obj.sigma));
logL = logL + log(sum(exp([ll1, ll2])));
end
logL = logL - n*log(2); % 归一化因子近似
end
function logP = logPrior(obj, params)
mu = params(1);
pi_logit = params(2);
% μ的先验
logP_mu = -0.5*((mu - obj.mu0)/obj.sigma0)^2 ...
- log(obj.sigma0*sqrt(2*pi));
% π的先验(逻辑分布)
logP_pi = -log(1 + pi_logit^2) - log(pi); % Cauchy先验近似
logP = logP_mu + logP_pi;
end
function params = priorSampler(obj)
mu = obj.mu0 + obj.sigma0*randn();
pi_logit = 4*randn(); % 逻辑分布的近似采样
params = [mu, pi_logit];
end
end
end
%% 指数模型实现
classdef ExponentialModel < Model
properties
lambda0 % λ的先验参数
beta % λ的先验形状参数
end
methods
function obj = ExponentialModel(lambda0, beta)
obj.name = '指数模型';
obj.numParams = 1;
obj.lambda0 = lambda0;
obj.beta = beta;
end
function logL = logLikelihood(obj, data, params)
lambda = params(1);
if lambda <= 0
logL = -Inf;
return;
end
n = length(data);
logL = n*log(lambda) - lambda*sum(data);
end
function logP = logPrior(obj, params)
lambda = params(1);
if lambda <= 0
logP = -Inf;
return;
end
% Gamma先验
logP = (obj.beta-1)*log(lambda) - obj.lambda0*lambda ...
- gammaln(obj.beta) + obj.beta*log(obj.lambda0);
end
function params = priorSampler(obj)
% Gamma分布采样
params = gamrnd(obj.beta, 1/obj.lambda0);
end
end
end
%% 演示脚本
function demoRJMCMC()
% 生成模拟数据
rng(42); % 设置随机种子
trueModel = 2; % 真实模型索引
nSamples = 200;
switch trueModel
case 1 % 高斯模型
mu_true = 2.5;
sigma_true = 1.0;
data = mu_true + sigma_true*randn(1, nSamples);
models = [GaussianModel(0, 5, 1), ...
GaussianMixtureModel(0, 5, 1), ...
ExponentialModel(1, 1)];
priorProbs = [0.3, 0.4, 0.3];
case 2 % 高斯混合模型
mu_true = 1.8;
sigma_true = 0.8;
weights = [0.7, 0.3];
data = [mu_true + sigma_true*randn(1, round(weights(1)*nSamples));
-mu_true + sigma_true*randn(1, round(weights(2)*nSamples))];
data = data(:)';
models = [GaussianModel(0, 5, 1), ...
GaussianMixtureModel(0, 5, 1), ...
ExponentialModel(1, 1)];
priorProbs = [0.3, 0.4, 0.3];
case 3 % 指数模型
lambda_true = 0.7;
data = exprnd(1/lambda_true, 1, nSamples);
models = [GaussianModel(0, 5, 1), ...
GaussianMixtureModel(0, 5, 1), ...
ExponentialModel(1, 1)];
priorProbs = [0.3, 0.4, 0.3];
end
% 创建RJMCMC实例
rjmcmc = RJMCMC(data, models, priorProbs, ...
'maxGenerations', 20000, ...
'burnIn', 5000, ...
'thinning', 5, ...
'jumpProb', 0.3, ...
'proposalStd', 0.2);
% 运行采样
[samples, modelCounts, logPosteriors] = rjmcmc.run();
% 可视化结果
rjmcmc.plotResults(samples, modelCounts);
% 显示真实模型
fprintf('\n===== 真实模型信息 =====\n');
fprintf('真实模型: %d (%s)\n', trueModel, models(trueModel).name);
switch trueModel
case 1
fprintf('真实参数: μ = %.2f\n', mu_true);
case 2
fprintf('真实参数: μ = %.2f, 权重 = [%.2f, %.2f]\n', mu_true, weights(1), weights(2));
case 3
fprintf('真实参数: λ = %.2f\n', lambda_true);
end
% 显示估计结果
fprintf('\n===== 参数估计结果 =====\n');
for m = 1:length(models)
if modelCounts(m) > 0
fprintf('模型 %d (%s):\n', m, models(m).name);
paramMeans = mean(samples{m});
paramStds = std(samples{m});
for p = 1:models(m).numParams
fprintf(' 参数 %d: %.2f ± %.2f\n', p, paramMeans(p), paramStds(p));
end
end
end
end
算法原理与实现细节
1. RJMCMC核心概念
可逆跳跃马尔可夫链蒙特卡罗(RJMCMC)是MCMC方法的扩展,允许在参数空间的不同维度之间跳跃。其核心思想是:
- 在模型空间中进行马尔可夫链转移
- 在参数空间中进行随机游走
- 在模型间跳跃时进行维度变换
2. 算法关键组件
2.1 模型表示
classdef Model < handle
properties
name % 模型名称
numParams % 参数数量
end
methods (Abstract)
logLikelihood(obj, data, params) % 对数似然函数
logPrior(obj, params) % 对数先验函数
priorSampler(obj) % 先验参数采样
end
end
2.2 状态转移机制
- 模型内移动:在固定模型参数空间内进行随机游走
- 模型间跳跃:在不同模型间转换,涉及参数维度变化
2.3 接受概率计算
接受概率由以下公式决定:

其中:
- \(p(M∣y)\)是模型后验概率
- \(q\)是提议分布
- \(J\)是雅可比行列式
3. 实现的关键函数
3.1 主运行函数
function [samples, modelCounts, logPosteriors] = run(obj)
% 初始化状态和计数器
% MCMC主循环
for gen = 1:obj.maxGenerations
if rand < obj.jumpProb
% 模型间跳跃
[newModel, newParams, accepted] = obj.modelJump(...);
else
% 模型内移动
[newParams, accepted] = obj.withinModelMove(...);
end
% 存储结果
end
end
3.2 模型跳跃实现
function [newModel, newParams, accepted] = modelJump(obj, currentModel, currentParams)
% 随机选择新模型
% 处理参数维度变化
if numCurrentParams == numNewParams
% 同维度变换
else
% 不同维度变换(升维/降维)
end
% 计算接受概率
logAlpha = obj.calculateJumpProbability(...);
% Metropolis-Hastings决策
end
4. 模型实现示例
4.1 高斯模型
classdef GaussianModel < Model
methods
function logL = logLikelihood(obj, data, params)
mu = params(1);
n = length(data);
logL = -0.5*n*log(2*pi*obj.sigma^2) ...
-0.5*sum((data - mu).^2)/obj.sigma^2;
end
end
end
4.2 高斯混合模型
classdef GaussianMixtureModel < Model
methods
function logL = logLikelihood(obj, data, params)
mu = params(1);
pi_k = 1/(1+exp(-params(2))); % sigmoid转换
% 计算混合似然
end
end
end
4.3 指数模型
classdef ExponentialModel < Model
methods
function logL = logLikelihood(obj, data, params)
lambda = params(1);
n = length(data);
logL = n*log(lambda) - lambda*sum(data);
end
end
end
参考代码 可逆跳转蒙特卡罗采样(RJMCMC)算法详细程序 www.youwenfan.com/contentcns/23229.html
使用指南
1. 基本使用步骤
% 1. 准备数据
data = ...; % 观测数据
% 2. 定义候选模型
models = [GaussianModel(0, 5, 1), ...
GaussianMixtureModel(0, 5, 1), ...
ExponentialModel(1, 1)];
% 3. 设置模型先验概率
priorProbs = [0.3, 0.4, 0.3];
% 4. 创建RJMCMC实例
rjmcmc = RJMCMC(data, models, priorProbs, ...
'maxGenerations', 20000, ...
'burnIn', 5000, ...
'thinning', 5);
% 5. 运行采样
[samples, modelCounts, logPosteriors] = rjmcmc.run();
% 6. 可视化结果
rjmcmc.plotResults(samples, modelCounts);
2. 参数调优建议
- 提议分布标准差:
- 太大:接受率低
- 太小:探索效率低
- 建议:从0.1开始,根据接受率调整
- 模型跳跃概率:
- 典型值:0.1-0.3
- 模型差异大时增加
- 模型相似时减少
- 预热期和稀释因子:
- 预热期:通常占总迭代次数的10-50%
- 稀释因子:根据自相关性调整,通常5-20
- 温度参数:
- 用于模拟退火
- 高温:探索性强
- 低温:开发性强
3. 结果解释
- 模型选择:
- 比较各模型被选择的频率
- 计算模型证据(使用
estimateModelEvidence)
- 参数估计:
- 检查各参数的后验分布
- 计算后验均值和置信区间
- 收敛诊断:
- 检查迹线图是否平稳
- 计算Gelman-Rubin统计量
- 观察自相关函数
扩展功能
1. 并行计算支持
function runParallel(obj, numChains)
% 并行运行多个MCMC链
parfor chain = 1:numChains
[samples{chain}, modelCounts(chain,:), ...] = obj.run();
end
% 合并结果
end
2. 自适应提议分布
function adaptProposal(obj, windowSize)
% 根据近期接受率调整提议分布
recentAccept = ...; % 最近windowSize次的接受率
if recentAccept < 0.2
obj.proposalStd = obj.proposalStd * 0.9;
elseif recentAccept > 0.4
obj.proposalStd = obj.proposalStd * 1.1;
end
end
3. 自动相关性检测
function thinFactor = autoCorrelationThin(samples)
% 计算自相关函数
acf = autocorr(samples, 'NumLags', 100);
% 找到自相关接近0的点
zeroCross = find(acf(:,1) < 0.05, 1);
thinFactor = max(1, zeroCross);
end
4. 层次模型支持
classdef HierarchicalModel < Model
methods
function logL = logLikelihood(obj, data, params)
% 实现层次似然函数
groupParams = ...; % 从超参数计算组参数
logL = 0;
for g = 1:numGroups
logL = logL + subModel.logLikelihood(data{g}, groupParams(g));
end
% 添加超参数先验
end
end
end
实际应用建议
- 模型设计:
- 确保候选模型覆盖可能的假设空间
- 设计合理的参数化方式
- 考虑模型的可识别性
- 计算效率:
- 对似然函数进行优化
- 使用解析梯度(如果可能)
- 并行化计算密集型部分
- 结果验证:
- 使用合成数据测试算法
- 比较不同初始条件的收敛性
- 进行敏感性分析
- 软件集成:
- 与Stan、PyMC3等概率编程工具接口
- 实现到现有分析流程中
- 开发图形用户界面

浙公网安备 33010602011771号