可逆跳跃马尔可夫链蒙特卡罗采样(RJMCMC)算法实现

可逆跳跃马尔可夫链蒙特卡罗(RJMCMC)算法的MATLAB实现。该实现展示了如何在模型空间和参数空间中进行高效采样,特别适用于模型选择问题。

classdef RJMCMC
    % 可逆跳跃马尔可夫链蒙特卡罗采样算法
    % 用于在模型空间和参数空间中进行联合采样
    
    properties
        data;               % 观测数据
        models;             % 候选模型列表
        priorProbs;         % 模型先验概率
        temperature;        % 退火温度参数
        maxGenerations;     % 最大迭代代数
        thinning;           % 稀释因子(每k次迭代保存一次)
        burnIn;             % 预热期迭代次数
        jumpProb;           % 模型跳跃概率
        proposalStd;        % 参数提议分布的标准差
        results;            % 存储采样结果
        acceptanceRates;    % 记录接受率
    end
    
    methods
        function obj = RJMCMC(data, models, priorProbs, varargin)
            % 构造函数
            obj.data = data;
            obj.models = models;
            obj.priorProbs = priorProbs;
            
            % 默认参数设置
            obj.temperature = 1.0;
            obj.maxGenerations = 10000;
            obj.thinning = 10;
            obj.burnIn = 1000;
            obj.jumpProb = 0.2;
            obj.proposalStd = 0.1;
            obj.results = struct();
            obj.acceptanceRates = struct('withinModel', 0, 'betweenModels', 0);
            
            % 解析可选参数
            for i = 1:2:length(varargin)
                switch lower(varargin{i})
                    case 'temperature'
                        obj.temperature = varargin{i+1};
                    case 'maxgenerations'
                        obj.maxGenerations = varargin{i+1};
                    case 'thinning'
                        obj.thinning = varargin{i+1};
                    case 'burnin'
                        obj.burnIn = varargin{i+1};
                    case 'jumpprob'
                        obj.jumpProb = varargin{i+1};
                    case 'proposalstd'
                        obj.proposalStd = varargin{i+1};
                    otherwise
                        warning('未知参数: %s', varargin{i});
                end
            end
        end
        
        function [samples, modelCounts, logPosteriors] = run(obj)
            % 运行RJMCMC采样
            numModels = length(obj.models);
            numParams = arrayfun(@(m) m.numParams, obj.models);
            maxParams = max(numParams);
            
            % 初始化
            currentModel = randi(numModels);
            currentParams = obj.initializeParameters(currentModel);
            currentLogPosterior = obj.logPosterior(currentModel, currentParams);
            
            % 存储结果
            samples = cell(1, numModels);
            for i = 1:numModels
                samples{i} = nan(obj.maxGenerations, numParams(i));
            end
            modelCounts = zeros(1, numModels);
            logPosteriors = nan(obj.maxGenerations, 1);
            
            % 接受计数器
            acceptWithin = 0;
            acceptBetween = 0;
            totalWithin = 0;
            totalBetween = 0;
            
            % MCMC主循环
            for gen = 1:obj.maxGenerations
                % 决定是进行模型内移动还是模型间跳跃
                if rand < obj.jumpProb
                    % 模型间跳跃
                    totalBetween = totalBetween + 1;
                    [newModel, newParams, accepted] = obj.modelJump(currentModel, currentParams);
                    
                    if accepted
                        acceptBetween = acceptBetween + 1;
                        currentModel = newModel;
                        currentParams = newParams;
                    end
                else
                    % 模型内移动
                    totalWithin = totalWithin + 1;
                    [newParams, accepted] = obj.withinModelMove(currentModel, currentParams);
                    
                    if accepted
                        acceptWithin = acceptWithin + 1;
                        currentParams = newParams;
                    end
                end
                
                % 计算当前后验概率
                currentLogPosterior = obj.logPosterior(currentModel, currentParams);
                
                % 存储结果(考虑稀释因子)
                if mod(gen, obj.thinning) == 0 && gen > obj.burnIn
                    idx = (gen - obj.burnIn) / obj.thinning;
                    for i = 1:numParams(currentModel)
                        samples{currentModel}(idx, i) = currentParams(i);
                    end
                    modelCounts(currentModel) = modelCounts(currentModel) + 1;
                    logPosteriors(idx) = currentLogPosterior;
                end
                
                % 显示进度
                if mod(gen, 1000) == 0
                    fprintf('迭代 %d/%d, 当前模型: %d, 接受率(内): %.2f%%, 接受率(间): %.2f%%\n', ...
                        gen, obj.maxGenerations, currentModel, ...
                        100*acceptWithin/totalWithin, 100*acceptBetween/totalBetween);
                end
            end
            
            % 计算最终接受率
            obj.acceptanceRates.withinModel = acceptWithin / totalWithin;
            obj.acceptanceRates.betweenModels = acceptBetween / totalBetween;
            
            % 裁剪结果数组
            for i = 1:numModels
                samples{i} = samples{i}(1:modelCounts(i), :);
            end
            logPosteriors = logPosteriors(1:sum(modelCounts));
            
            fprintf('\n===== 采样完成 =====\n');
            fprintf('总迭代次数: %d\n', obj.maxGenerations);
            fprintf('预热期: %d\n', obj.burnIn);
            fprintf('有效样本数: %d\n', sum(modelCounts));
            fprintf('模型接受率(内): %.2f%%\n', 100*obj.acceptanceRates.withinModel);
            fprintf('模型接受率(间): %.2f%%\n', 100*obj.acceptanceRates.betweenModels);
            fprintf('模型选择频率:\n');
            for i = 1:numModels
                fprintf('  模型 %d: %d 次 (%.2f%%)\n', i, modelCounts(i), ...
                    100*modelCounts(i)/sum(modelCounts));
            end
        end
        
        function params = initializeParameters(obj, modelIdx)
            % 初始化模型参数
            model = obj.models(modelIdx);
            params = model.priorSampler();
        end
        
        function [newParams, accepted] = withinModelMove(obj, modelIdx, currentParams)
            % 模型内参数移动
            model = obj.models(modelIdx);
            proposedParams = currentParams + obj.proposalStd * randn(size(currentParams));
            
            % 计算接受概率
            logAlpha = obj.logPosterior(modelIdx, proposedParams) - ...
                       obj.logPosterior(modelIdx, currentParams);
            
            % Metropolis-Hastings准则
            if log(rand) < logAlpha
                newParams = proposedParams;
                accepted = true;
            else
                newParams = currentParams;
                accepted = false;
            end
        end
        
        function [newModel, newParams, accepted] = modelJump(obj, currentModel, currentParams)
            % 模型间跳跃
            numModels = length(obj.models);
            newModel = randi(numModels);
            
            % 如果选择相同模型,则进行模型内移动
            if newModel == currentModel
                [newParams, accepted] = obj.withinModelMove(currentModel, currentParams);
                return;
            end
            
            % 获取模型信息
            currentModelObj = obj.models(currentModel);
            newModelObj = obj.models(newModel);
            numCurrentParams = currentModelObj.numParams;
            numNewParams = newModelObj.numParams;
            
            % 参数维度变换
            if numCurrentParams == numNewParams
                % 维度相同,直接移动
                proposedParams = currentParams + obj.proposalStd * randn(1, numNewParams);
            else
                % 维度不同,使用随机映射
                if numCurrentParams > numNewParams
                    % 降维:随机选择子集
                    keepIdx = randperm(numCurrentParams, numNewParams);
                    proposedParams = currentParams(keepIdx) + obj.proposalStd * randn(1, numNewParams);
                else
                    % 升维:添加新参数
                    baseParams = currentParams + obj.proposalStd * randn(1, numCurrentParams);
                    newParams = obj.initializeParameters(newModel);
                    proposedParams = [baseParams, newParams(numCurrentParams+1:end)];
                end
            end
            
            % 计算接受概率
            logAlpha = obj.calculateJumpProbability(currentModel, currentParams, ...
                                                    newModel, proposedParams, ...
                                                    numCurrentParams, numNewParams);
            
            % Metropolis-Hastings准则
            if log(rand) < logAlpha
                newParams = proposedParams;
                accepted = true;
            else
                newParams = currentParams;
                accepted = false;
            end
        end
        
        function logAlpha = calculateJumpProbability(obj, fromModel, fromParams, ...
                                                     toModel, toParams, ...
                                                     fromDim, toDim)
            % 计算模型跳跃的接受概率
            logPostFrom = obj.logPosterior(fromModel, fromParams);
            logPostTo = obj.logPosterior(toModel, toParams);
            
            % 先验概率比
            priorRatio = log(obj.priorProbs(toModel)) - log(obj.priorProbs(fromModel));
            
            % 提议分布比(简化处理)
            if fromDim == toDim
                propRatio = 0; % 对称提议分布
            else
                % 非对称提议分布的处理
                propRatio = 0; % 简化处理
            end
            
            % 雅可比行列式(简化处理)
            jacobian = 0; % 简化处理
            
            % 接受概率
            logAlpha = (logPostTo - logPostFrom) + priorRatio + propRatio + jacobian;
            logAlpha = logAlpha / obj.temperature; % 温度缩放
        end
        
        function logPost = logPosterior(obj, modelIdx, params)
            % 计算对数后验概率
            model = obj.models(modelIdx);
            logLikelihood = model.logLikelihood(obj.data, params);
            logPrior = model.logPrior(params);
            logPost = logLikelihood + logPrior;
        end
        
        function plotResults(obj, samples, modelCounts)
            % 可视化结果
            numModels = length(obj.models);
            
            % 模型选择频率
            figure;
            bar(modelCounts / sum(modelCounts));
            title('模型选择频率');
            xlabel('模型索引');
            ylabel('选择概率');
            grid on;
            
            % 参数后验分布
            for m = 1:numModels
                if ~isempty(samples{m})
                    figure;
                    subplot(1,2,1);
                    histogram(samples{m}, 20);
                    title(sprintf('模型 %d 参数后验分布', m));
                    xlabel('参数值');
                    ylabel('频数');
                    
                    subplot(1,2,2);
                    plot(samples{m});
                    title(sprintf('模型 %d 参数轨迹', m));
                    xlabel('样本索引');
                    ylabel('参数值');
                    grid on;
                end
            end
            
            % 模型证据估计
            logEvidences = obj.estimateModelEvidence(samples, modelCounts);
            figure;
            bar(logEvidences);
            title('模型对数证据');
            xlabel('模型索引');
            ylabel('对数证据');
            grid on;
        end
        
        function logEvidences = estimateModelEvidence(obj, samples, modelCounts)
            % 估计模型证据(使用调和均值估计器)
            numModels = length(obj.models);
            logEvidences = zeros(1, numModels);
            
            for m = 1:numModels
                if modelCounts(m) > 0
                    logLikes = zeros(modelCounts(m), 1);
                    for i = 1:modelCounts(m)
                        logLikes(i) = obj.models(m).logLikelihood(obj.data, samples{m}(i, :));
                    end
                    
                    % 调和均值估计器
                    logEvidences(m) = -log(mean(exp(-logLikes)));
                end
            end
        end
    end
end

%% 模型基类
classdef Model < handle
    properties
        name        % 模型名称
        numParams   % 参数数量
    end
    
    methods (Abstract)
        logLikelihood(obj, data, params)
        logPrior(obj, params)
        priorSampler(obj)
    end
end

%% 高斯模型实现
classdef GaussianModel < Model
    properties
        mu0     % μ的先验均值
        sigma0  % μ的先验标准差
        sigma   % 数据标准差(已知)
    end
    
    methods
        function obj = GaussianModel(mu0, sigma0, sigma)
            obj.name = '高斯模型';
            obj.numParams = 1;
            obj.mu0 = mu0;
            obj.sigma0 = sigma0;
            obj.sigma = sigma;
        end
        
        function logL = logLikelihood(obj, data, params)
            mu = params(1);
            n = length(data);
            logL = -0.5*n*log(2*pi*obj.sigma^2) ...
                  -0.5*sum((data - mu).^2)/obj.sigma^2;
        end
        
        function logP = logPrior(obj, params)
            mu = params(1);
            logP = -0.5*((mu - obj.mu0)/obj.sigma0)^2 ...
                   - log(obj.sigma0*sqrt(2*pi));
        end
        
        function params = priorSampler(obj)
            params = obj.mu0 + obj.sigma0*randn();
        end
    end
end

%% 高斯混合模型实现
classdef GaussianMixtureModel < Model
    properties
        mu0     % μ的先验均值
        sigma0  % μ的先验标准差
        sigma   % 数据标准差(已知)
    end
    
    methods
        function obj = GaussianMixtureModel(mu0, sigma0, sigma)
            obj.name = '高斯混合模型';
            obj.numParams = 2; % 均值和混合比例
            obj.mu0 = mu0;
            obj.sigma0 = sigma0;
            obj.sigma = sigma;
        end
        
        function logL = logLikelihood(obj, data, params)
            mu = params(1);
            pi_k = 1/(1+exp(-params(2))); % sigmoid转换确保[0,1]
            n = length(data);
            
            % 混合似然
            logL = 0;
            for i = 1:n
                ll1 = log(pi_k) + log(normpdf(data(i), mu, obj.sigma));
                ll2 = log(1-pi_k) + log(normpdf(data(i), -mu, obj.sigma));
                logL = logL + log(sum(exp([ll1, ll2])));
            end
            logL = logL - n*log(2); % 归一化因子近似
        end
        
        function logP = logPrior(obj, params)
            mu = params(1);
            pi_logit = params(2);
            
            % μ的先验
            logP_mu = -0.5*((mu - obj.mu0)/obj.sigma0)^2 ...
                      - log(obj.sigma0*sqrt(2*pi));
            
            % π的先验(逻辑分布)
            logP_pi = -log(1 + pi_logit^2) - log(pi); % Cauchy先验近似
            
            logP = logP_mu + logP_pi;
        end
        
        function params = priorSampler(obj)
            mu = obj.mu0 + obj.sigma0*randn();
            pi_logit = 4*randn(); % 逻辑分布的近似采样
            params = [mu, pi_logit];
        end
    end
end

%% 指数模型实现
classdef ExponentialModel < Model
    properties
        lambda0  % λ的先验参数
        beta    % λ的先验形状参数
    end
    
    methods
        function obj = ExponentialModel(lambda0, beta)
            obj.name = '指数模型';
            obj.numParams = 1;
            obj.lambda0 = lambda0;
            obj.beta = beta;
        end
        
        function logL = logLikelihood(obj, data, params)
            lambda = params(1);
            if lambda <= 0
                logL = -Inf;
                return;
            end
            n = length(data);
            logL = n*log(lambda) - lambda*sum(data);
        end
        
        function logP = logPrior(obj, params)
            lambda = params(1);
            if lambda <= 0
                logP = -Inf;
                return;
            end
            % Gamma先验
            logP = (obj.beta-1)*log(lambda) - obj.lambda0*lambda ...
                   - gammaln(obj.beta) + obj.beta*log(obj.lambda0);
        end
        
        function params = priorSampler(obj)
            % Gamma分布采样
            params = gamrnd(obj.beta, 1/obj.lambda0);
        end
    end
end

%% 演示脚本
function demoRJMCMC()
    % 生成模拟数据
    rng(42); % 设置随机种子
    trueModel = 2; % 真实模型索引
    nSamples = 200;
    
    switch trueModel
        case 1 % 高斯模型
            mu_true = 2.5;
            sigma_true = 1.0;
            data = mu_true + sigma_true*randn(1, nSamples);
            models = [GaussianModel(0, 5, 1), ...
                      GaussianMixtureModel(0, 5, 1), ...
                      ExponentialModel(1, 1)];
            priorProbs = [0.3, 0.4, 0.3];
            
        case 2 % 高斯混合模型
            mu_true = 1.8;
            sigma_true = 0.8;
            weights = [0.7, 0.3];
            data = [mu_true + sigma_true*randn(1, round(weights(1)*nSamples));
                   -mu_true + sigma_true*randn(1, round(weights(2)*nSamples))];
            data = data(:)';
            models = [GaussianModel(0, 5, 1), ...
                      GaussianMixtureModel(0, 5, 1), ...
                      ExponentialModel(1, 1)];
            priorProbs = [0.3, 0.4, 0.3];
            
        case 3 % 指数模型
            lambda_true = 0.7;
            data = exprnd(1/lambda_true, 1, nSamples);
            models = [GaussianModel(0, 5, 1), ...
                      GaussianMixtureModel(0, 5, 1), ...
                      ExponentialModel(1, 1)];
            priorProbs = [0.3, 0.4, 0.3];
    end
    
    % 创建RJMCMC实例
    rjmcmc = RJMCMC(data, models, priorProbs, ...
        'maxGenerations', 20000, ...
        'burnIn', 5000, ...
        'thinning', 5, ...
        'jumpProb', 0.3, ...
        'proposalStd', 0.2);
    
    % 运行采样
    [samples, modelCounts, logPosteriors] = rjmcmc.run();
    
    % 可视化结果
    rjmcmc.plotResults(samples, modelCounts);
    
    % 显示真实模型
    fprintf('\n===== 真实模型信息 =====\n');
    fprintf('真实模型: %d (%s)\n', trueModel, models(trueModel).name);
    switch trueModel
        case 1
            fprintf('真实参数: μ = %.2f\n', mu_true);
        case 2
            fprintf('真实参数: μ = %.2f, 权重 = [%.2f, %.2f]\n', mu_true, weights(1), weights(2));
        case 3
            fprintf('真实参数: λ = %.2f\n', lambda_true);
    end
    
    % 显示估计结果
    fprintf('\n===== 参数估计结果 =====\n');
    for m = 1:length(models)
        if modelCounts(m) > 0
            fprintf('模型 %d (%s):\n', m, models(m).name);
            paramMeans = mean(samples{m});
            paramStds = std(samples{m});
            for p = 1:models(m).numParams
                fprintf('  参数 %d: %.2f ± %.2f\n', p, paramMeans(p), paramStds(p));
            end
        end
    end
end

算法原理与实现细节

1. RJMCMC核心概念

可逆跳跃马尔可夫链蒙特卡罗(RJMCMC)是MCMC方法的扩展,允许在参数空间的不同维度之间跳跃。其核心思想是:

  • 在模型空间中进行马尔可夫链转移
  • 在参数空间中进行随机游走
  • 在模型间跳跃时进行维度变换

2. 算法关键组件

2.1 模型表示

classdef Model < handle
    properties
        name        % 模型名称
        numParams   % 参数数量
    end
    
    methods (Abstract)
        logLikelihood(obj, data, params)  % 对数似然函数
        logPrior(obj, params)             % 对数先验函数
        priorSampler(obj)                 % 先验参数采样
    end
end

2.2 状态转移机制

  • 模型内移动:在固定模型参数空间内进行随机游走
  • 模型间跳跃:在不同模型间转换,涉及参数维度变化

2.3 接受概率计算

接受概率由以下公式决定:

其中:

  • \(p(M∣y)\)是模型后验概率
  • \(q\)是提议分布
  • \(J\)是雅可比行列式

3. 实现的关键函数

3.1 主运行函数

function [samples, modelCounts, logPosteriors] = run(obj)
    % 初始化状态和计数器
    % MCMC主循环
    for gen = 1:obj.maxGenerations
        if rand < obj.jumpProb
            % 模型间跳跃
            [newModel, newParams, accepted] = obj.modelJump(...);
        else
            % 模型内移动
            [newParams, accepted] = obj.withinModelMove(...);
        end
        % 存储结果
    end
end

3.2 模型跳跃实现

function [newModel, newParams, accepted] = modelJump(obj, currentModel, currentParams)
    % 随机选择新模型
    % 处理参数维度变化
    if numCurrentParams == numNewParams
        % 同维度变换
    else
        % 不同维度变换(升维/降维)
    end
    % 计算接受概率
    logAlpha = obj.calculateJumpProbability(...);
    % Metropolis-Hastings决策
end

4. 模型实现示例

4.1 高斯模型

classdef GaussianModel < Model
    methods
        function logL = logLikelihood(obj, data, params)
            mu = params(1);
            n = length(data);
            logL = -0.5*n*log(2*pi*obj.sigma^2) ...
                  -0.5*sum((data - mu).^2)/obj.sigma^2;
        end
    end
end

4.2 高斯混合模型

classdef GaussianMixtureModel < Model
    methods
        function logL = logLikelihood(obj, data, params)
            mu = params(1);
            pi_k = 1/(1+exp(-params(2))); % sigmoid转换
            % 计算混合似然
        end
    end
end

4.3 指数模型

classdef ExponentialModel < Model
    methods
        function logL = logLikelihood(obj, data, params)
            lambda = params(1);
            n = length(data);
            logL = n*log(lambda) - lambda*sum(data);
        end
    end
end

参考代码 可逆跳转蒙特卡罗采样(RJMCMC)算法详细程序 www.youwenfan.com/contentcns/23229.html

使用指南

1. 基本使用步骤

% 1. 准备数据
data = ...; % 观测数据

% 2. 定义候选模型
models = [GaussianModel(0, 5, 1), ...
          GaussianMixtureModel(0, 5, 1), ...
          ExponentialModel(1, 1)];

% 3. 设置模型先验概率
priorProbs = [0.3, 0.4, 0.3];

% 4. 创建RJMCMC实例
rjmcmc = RJMCMC(data, models, priorProbs, ...
    'maxGenerations', 20000, ...
    'burnIn', 5000, ...
    'thinning', 5);

% 5. 运行采样
[samples, modelCounts, logPosteriors] = rjmcmc.run();

% 6. 可视化结果
rjmcmc.plotResults(samples, modelCounts);

2. 参数调优建议

  1. 提议分布标准差
    • 太大:接受率低
    • 太小:探索效率低
    • 建议:从0.1开始,根据接受率调整
  2. 模型跳跃概率
    • 典型值:0.1-0.3
    • 模型差异大时增加
    • 模型相似时减少
  3. 预热期和稀释因子
    • 预热期:通常占总迭代次数的10-50%
    • 稀释因子:根据自相关性调整,通常5-20
  4. 温度参数
    • 用于模拟退火
    • 高温:探索性强
    • 低温:开发性强

3. 结果解释

  1. 模型选择
    • 比较各模型被选择的频率
    • 计算模型证据(使用estimateModelEvidence
  2. 参数估计
    • 检查各参数的后验分布
    • 计算后验均值和置信区间
  3. 收敛诊断
    • 检查迹线图是否平稳
    • 计算Gelman-Rubin统计量
    • 观察自相关函数

扩展功能

1. 并行计算支持

function runParallel(obj, numChains)
    % 并行运行多个MCMC链
    parfor chain = 1:numChains
        [samples{chain}, modelCounts(chain,:), ...] = obj.run();
    end
    % 合并结果
end

2. 自适应提议分布

function adaptProposal(obj, windowSize)
    % 根据近期接受率调整提议分布
    recentAccept = ...; % 最近windowSize次的接受率
    if recentAccept < 0.2
        obj.proposalStd = obj.proposalStd * 0.9;
    elseif recentAccept > 0.4
        obj.proposalStd = obj.proposalStd * 1.1;
    end
end

3. 自动相关性检测

function thinFactor = autoCorrelationThin(samples)
    % 计算自相关函数
    acf = autocorr(samples, 'NumLags', 100);
    % 找到自相关接近0的点
    zeroCross = find(acf(:,1) < 0.05, 1);
    thinFactor = max(1, zeroCross);
end

4. 层次模型支持

classdef HierarchicalModel < Model
    methods
        function logL = logLikelihood(obj, data, params)
            % 实现层次似然函数
            groupParams = ...; % 从超参数计算组参数
            logL = 0;
            for g = 1:numGroups
                logL = logL + subModel.logLikelihood(data{g}, groupParams(g));
            end
            % 添加超参数先验
        end
    end
end

实际应用建议

  1. 模型设计
    • 确保候选模型覆盖可能的假设空间
    • 设计合理的参数化方式
    • 考虑模型的可识别性
  2. 计算效率
    • 对似然函数进行优化
    • 使用解析梯度(如果可能)
    • 并行化计算密集型部分
  3. 结果验证
    • 使用合成数据测试算法
    • 比较不同初始条件的收敛性
    • 进行敏感性分析
  4. 软件集成
    • 与Stan、PyMC3等概率编程工具接口
    • 实现到现有分析流程中
    • 开发图形用户界面
posted @ 2026-03-14 16:08  风一直那个吹  阅读(0)  评论(0)    收藏  举报