信息熵的特征选择算法MATLAB实现

信息熵的特征选择算法MATLAB实现。信息熵是衡量特征重要性的强大工具，特别适用于高维数据降维。

1. 信息熵理论基础

1.1 基本概念

信息熵定义：

H(X) = -Σ P(x_i) log₂ P(x_i)

条件熵：

H(Y|X) = -Σ P(x_i) Σ P(y_j|x_i) log₂ P(y_j|x_i)

信息增益：

IG(Y,X) = H(Y) - H(Y|X)

2. 基于信息熵的特征选择算法

2.1 主框架类

classdef EntropyFeatureSelector < handle
    % 基于信息熵的特征选择器
    
    properties
        FeatureScores        % 特征得分
        SelectedFeatures    % 选择的特征索引
        FeatureNames        % 特征名称
        Method              % 选择方法
        NumFeatures         % 选择的特征数量
        DiscretizeMethod    % 离散化方法
        NumBins            % 离散化分箱数
    end
    
    methods
        function obj = EntropyFeatureSelector(method, num_features)
            % 构造函数
            if nargin < 1
                obj.Method = 'information_gain';
            else
                obj.Method = method;
            end
            
            if nargin < 2
                obj.NumFeatures = 10;
            else
                obj.NumFeatures = num_features;
            end
            
            obj.DiscretizeMethod = 'equal_width';
            obj.NumBins = 10;
        end
        
        function fit(obj, X, y, feature_names)
            % 训练特征选择器
            % X: 特征矩阵 (n_samples × n_features)
            % y: 目标变量
            % feature_names: 特征名称 (可选)
            
            [n_samples, n_features] = size(X);
            
            if nargin < 4
                obj.FeatureNames = arrayfun(@(x) sprintf('Feature_%d', x), ...
                    1:n_features, 'UniformOutput', false);
            else
                obj.FeatureNames = feature_names;
            end
            
            % 离散化连续特征
            X_disc = obj.discretize_features(X);
            y_disc = obj.discretize_target(y);
            
            % 计算特征得分
            obj.FeatureScores = zeros(1, n_features);
            
            for i = 1:n_features
                switch obj.Method
                    case 'information_gain'
                        obj.FeatureScores(i) = obj.information_gain(...
                            X_disc(:, i), y_disc);
                    case 'gain_ratio'
                        obj.FeatureScores(i) = obj.gain_ratio(...
                            X_disc(:, i), y_disc);
                    case 'symmetrical_uncertainty'
                        obj.FeatureScores(i) = obj.symmetrical_uncertainty(...
                            X_disc(:, i), y_disc);
                    case 'joint_entropy'
                        obj.FeatureScores(i) = obj.joint_entropy_feature(...
                            X_disc(:, i), y_disc);
                    otherwise
                        error('未知的特征选择方法: %s', obj.Method);
                end
            end
            
            % 选择特征
            obj.select_features();
        end
        
        function X_selected = transform(obj, X)
            % 转换数据，只保留选择的特征
            X_selected = X(:, obj.SelectedFeatures);
        end
        
        function X_selected = fit_transform(obj, X, y, feature_names)
            % 训练并转换数据
            obj.fit(X, y, feature_names);
            X_selected = obj.transform(X);
        end
    end
end

2.2 核心熵计算函数

function entropy_val = entropy(data)
    % 计算信息熵
    % data: 输入数据向量
    
    % 计算概率分布
    unique_vals = unique(data);
    probabilities = zeros(1, length(unique_vals));
    
    for i = 1:length(unique_vals)
        probabilities(i) = sum(data == unique_vals(i)) / length(data);
    end
    
    % 计算熵 (使用自然对数，结果单位是nats)
    entropy_val = -sum(probabilities .* log(probabilities + eps));
end

function cond_entropy = conditional_entropy(X, y)
    % 计算条件熵 H(Y|X)
    
    unique_x = unique(X);
    cond_entropy = 0;
    
    for i = 1:length(unique_x)
        x_val = unique_x(i);
        y_given_x = y(X == x_val);
        
        if ~isempty(y_given_x)
            prob_x = sum(X == x_val) / length(X);
            entropy_y_given_x = entropy(y_given_x);
            cond_entropy = cond_entropy + prob_x * entropy_y_given_x;
        end
    end
end

function joint_entropy = joint_entropy(X, y)
    % 计算联合熵 H(X,Y)
    
    % 创建联合分布
    joint_data = [X, y];
    joint_entropy = entropy(joint_data);
end

function mi = mutual_information(X, y)
    % 计算互信息 I(X;Y) = H(X) + H(Y) - H(X,Y)
    
    entropy_x = entropy(X);
    entropy_y = entropy(y);
    joint_entropy_xy = joint_entropy(X, y);
    
    mi = entropy_x + entropy_y - joint_entropy_xy;
end

2.3 各种特征选择准则

methods (Access = private)
    function ig = information_gain(obj, X, y)
        % 信息增益: IG(Y,X) = H(Y) - H(Y|X)
        entropy_y = entropy(y);
        cond_entropy_y_given_x = conditional_entropy(X, y);
        ig = entropy_y - cond_entropy_y_given_x;
    end
    
    function gr = gain_ratio(obj, X, y)
        % 增益率: GR(Y,X) = IG(Y,X) / H(X)
        ig = obj.information_gain(X, y);
        entropy_x = entropy(X);
        
        if entropy_x == 0
            gr = 0;
        else
            gr = ig / entropy_x;
        end
    end
    
    function su = symmetrical_uncertainty(obj, X, y)
        % 对称不确定性: SU(Y,X) = 2 * IG(Y,X) / (H(Y) + H(X))
        ig = obj.information_gain(X, y);
        entropy_y = entropy(y);
        entropy_x = entropy(X);
        
        if (entropy_y + entropy_x) == 0
            su = 0;
        else
            su = 2 * ig / (entropy_y + entropy_x);
        end
    end
    
    function je_score = joint_entropy_feature(obj, X, y)
        % 基于联合熵的特征评分
        % 联合熵越小，特征与目标变量的关联越强
        
        joint_entropy_val = joint_entropy(X, y);
        % 转换为评分（联合熵越小，评分越高）
        je_score = 1 / (joint_entropy_val + eps);
    end
    
    function discretized_data = discretize_features(obj, data)
        % 离散化连续特征
        
        [n_samples, n_features] = size(data);
        discretized_data = zeros(size(data));
        
        for i = 1:n_features
            feature_data = data(:, i);
            
            switch obj.DiscretizeMethod
                case 'equal_width'
                    % 等宽离散化
                    min_val = min(feature_data);
                    max_val = max(feature_data);
                    bin_edges = linspace(min_val, max_val, obj.NumBins + 1);
                    
                case 'equal_frequency'
                    % 等频离散化
                    sorted_data = sort(feature_data);
                    bin_edges = zeros(1, obj.NumBins + 1);
                    bin_edges(1) = min(feature_data);
                    bin_edges(end) = max(feature_data);
                    
                    for bin = 2:obj.NumBins
                        idx = round(bin * n_samples / obj.NumBins);
                        bin_edges(bin) = sorted_data(max(1, min(idx, n_samples)));
                    end
                    
                case 'kmeans'
                    % K-means离散化
                    [~, bin_centers] = kmeans(feature_data, obj.NumBins);
                    bin_edges = [-inf; sort(bin_centers); inf];
                    
                otherwise
                    error('未知的离散化方法: %s', obj.DiscretizeMethod);
            end
            
            % 分配离散标签
            [~, ~, discretized_data(:, i)] = histcounts(feature_data, bin_edges);
        end
    end
    
    function discretized_target = discretize_target(obj, y)
        % 离散化目标变量（用于分类问题）
        
        if iscategorical(y) || isinteger(y)
            % 已经是离散的
            discretized_target = double(y);
        else
            % 连续目标变量需要离散化
            discretized_target = obj.discretize_features(y);
        end
    end
    
    function select_features(obj)
        % 根据得分选择特征
        
        [sorted_scores, sorted_indices] = sort(obj.FeatureScores, 'descend');
        
        % 选择前k个特征
        k = min(obj.NumFeatures, length(sorted_scores));
        obj.SelectedFeatures = sorted_indices(1:k);
        
        fprintf('选择了 %d 个特征:\n', k);
        for i = 1:k
            idx = sorted_indices(i);
            fprintf('  %d. %s: %.4f\n', i, obj.FeatureNames{idx}, sorted_scores(i));
        end
    end
end

3. 高级特征选择算法

3.1 基于互信息的特征选择

classdef MutualInformationFeatureSelector < EntropyFeatureSelector
    % 基于互信息的特征选择器
    
    methods
        function obj = MutualInformationFeatureSelector(num_features)
            % 构造函数
            if nargin < 1
                num_features = 10;
            end
            obj@EntropyFeatureSelector('mutual_information', num_features);
        end
        
        function mi = mutual_information_feature(obj, X, y)
            % 计算互信息 I(X;Y)
            mi = mutual_information(X, y);
        end
    end
end

3.2 mRMR (最大相关最小冗余) 算法

classdef MRMRFeatureSelector < EntropyFeatureSelector
    % mRMR (Maximum Relevance Minimum Redundancy) 特征选择器
    
    properties
        SelectedFeatureSet   % 选择的特征集合
        RelevanceScores     % 相关性得分
        RedundancyScores    % 冗余性得分
    end
    
    methods
        function obj = MRMRFeatureSelector(num_features)
            % 构造函数
            if nargin < 1
                num_features = 10;
            end
            obj@EntropyFeatureSelector('mrmr', num_features);
        end
        
        function fit(obj, X, y, feature_names)
            % mRMR特征选择
            
            [n_samples, n_features] = size(X);
            
            if nargin < 4
                obj.FeatureNames = arrayfun(@(x) sprintf('Feature_%d', x), ...
                    1:n_features, 'UniformOutput', false);
            else
                obj.FeatureNames = feature_names;
            end
            
            % 离散化特征
            X_disc = obj.discretize_features(X);
            y_disc = obj.discretize_target(y);
            
            % 计算特征与目标的相关性
            relevance = zeros(1, n_features);
            for i = 1:n_features
                relevance(i) = mutual_information(X_disc(:, i), y_disc);
            end
            
            % mRMR前向选择
            selected = [];
            candidate_features = 1:n_features;
            
            % 选择第一个特征（相关性最高）
            [~, first_feature] = max(relevance);
            selected = [selected, first_feature];
            candidate_features(first_feature) = [];
            
            obj.RelevanceScores = zeros(1, obj.NumFeatures);
            obj.RedundancyScores = zeros(1, obj.NumFeatures);
            
            obj.RelevanceScores(1) = relevance(first_feature);
            obj.RedundancyScores(1) = 0;
            
            fprintf('mRMR特征选择过程:\n');
            fprintf('1. 选择特征 %d (%s), 相关性: %.4f\n', ...
                first_feature, obj.FeatureNames{first_feature}, relevance(first_feature));
            
            % 选择剩余特征
            for k = 2:min(obj.NumFeatures, n_features)
                best_score = -inf;
                best_feature = 0;
                avg_redundancy = 0;
                
                for i = 1:length(candidate_features)
                    feature_idx = candidate_features(i);
                    
                    % 计算与已选特征的冗余性
                    redundancy = 0;
                    for j = 1:length(selected)
                        redundancy = redundancy + mutual_information(...
                            X_disc(:, feature_idx), X_disc(:, selected(j)));
                    end
                    redundancy = redundancy / length(selected);
                    
                    % mRMR准则: Relevance - Redundancy
                    mrmr_score = relevance(feature_idx) - redundancy;
                    
                    if mrmr_score > best_score
                        best_score = mrmr_score;
                        best_feature = feature_idx;
                        avg_redundancy = redundancy;
                    end
                end
                
                if best_feature > 0
                    selected = [selected, best_feature];
                    candidate_features(candidate_features == best_feature) = [];
                    
                    obj.RelevanceScores(k) = relevance(best_feature);
                    obj.RedundancyScores(k) = avg_redundancy;
                    
                    fprintf('%d. 选择特征 %d (%s), mRMR得分: %.4f\n', ...
                        k, best_feature, obj.FeatureNames{best_feature}, best_score);
                else
                    break;
                end
            end
            
            obj.SelectedFeatures = selected;
            obj.SelectedFeatureSet = selected;
        end
        
        function plot_mrmr_process(obj)
            % 绘制mRMR选择过程
            
            if isempty(obj.RelevanceScores)
                error('请先运行fit方法');
            end
            
            figure('Position', [100, 100, 1200, 500]);
            
            subplot(1, 2, 1);
            k = 1:length(obj.RelevanceScores);
            plot(k, obj.RelevanceScores, 'bo-', 'LineWidth', 2, 'MarkerSize', 8);
            hold on;
            plot(k, obj.RedundancyScores, 'rs-', 'LineWidth', 2, 'MarkerSize', 8);
            xlabel('选择顺序');
            ylabel('得分');
            title('mRMR特征选择过程');
            legend('相关性', '平均冗余性', 'Location', 'best');
            grid on;
            
            subplot(1, 2, 2);
            mrmr_scores = obj.RelevanceScores - obj.RedundancyScores;
            bar(mrmr_scores, 'FaceColor', [0.2, 0.6, 0.8]);
            xlabel('选择顺序');
            ylabel('mRMR得分');
            title('mRMR得分');
            grid on;
        end
    end
end

3.3 基于JMI (Joint Mutual Information) 的算法

classdef JMIFeatureSelector < EntropyFeatureSelector
    % 基于联合互信息的特征选择器
    
    methods
        function obj = JMIFeatureSelector(num_features)
            % 构造函数
            if nargin < 1
                num_features = 10;
            end
            obj@EntropyFeatureSelector('jmi', num_features);
        end
        
        function fit(obj, X, y, feature_names)
            % JMI特征选择
            
            [n_samples, n_features] = size(X);
            
            if nargin < 4
                obj.FeatureNames = arrayfun(@(x) sprintf('Feature_%d', x), ...
                    1:n_features, 'UniformOutput', false);
            else
                obj.FeatureNames = feature_names;
            end
            
            % 离散化特征
            X_disc = obj.discretize_features(X);
            y_disc = obj.discretize_target(y);
            
            % JMI前向选择
            selected = [];
            candidate_features = 1:n_features;
            
            % 选择第一个特征（与目标互信息最大）
            mi_scores = zeros(1, n_features);
            for i = 1:n_features
                mi_scores(i) = mutual_information(X_disc(:, i), y_disc);
            end
            
            [~, first_feature] = max(mi_scores);
            selected = [selected, first_feature];
            candidate_features(first_feature) = [];
            
            fprintf('JMI特征选择过程:\n');
            fprintf('1. 选择特征 %d (%s), MI: %.4f\n', ...
                first_feature, obj.FeatureNames{first_feature}, mi_scores(first_feature));
            
            % 选择剩余特征
            for k = 2:min(obj.NumFeatures, n_features)
                best_score = -inf;
                best_feature = 0;
                
                for i = 1:length(candidate_features)
                    feature_idx = candidate_features(i);
                    
                    % 计算JMI得分: 与目标的条件互信息之和
                    jmi_score = 0;
                    for j = 1:length(selected)
                        % I(feature; target | selected_feature)
                        cond_mi = obj.conditional_mutual_information(...
                            X_disc(:, feature_idx), y_disc, X_disc(:, selected(j)));
                        jmi_score = jmi_score + cond_mi;
                    end
                    
                    if jmi_score > best_score
                        best_score = jmi_score;
                        best_feature = feature_idx;
                    end
                end
                
                if best_feature > 0
                    selected = [selected, best_feature];
                    candidate_features(candidate_features == best_feature) = [];
                    
                    fprintf('%d. 选择特征 %d (%s), JMI得分: %.4f\n', ...
                        k, best_feature, obj.FeatureNames{best_feature}, best_score);
                else
                    break;
                end
            end
            
            obj.SelectedFeatures = selected;
        end
        
        function cmi = conditional_mutual_information(obj, X, Y, Z)
            % 计算条件互信息 I(X;Y|Z)
            
            % I(X;Y|Z) = H(X|Z) - H(X|Y,Z)
            % 或者使用联合熵计算: I(X;Y|Z) = H(X,Z) + H(Y,Z) - H(Z) - H(X,Y,Z)
            
            entropy_xz = joint_entropy([X, Z]);
            entropy_yz = joint_entropy([Y, Z]);
            entropy_z = entropy(Z);
            entropy_xyz = joint_entropy([X, Y, Z]);
            
            cmi = entropy_xz + entropy_yz - entropy_z - entropy_xyz;
        end
    end
end

4. 完整示例和应用

4.1 主演示程序

function main_entropy_feature_selection()
    % 基于信息熵的特征选择主演示程序
    
    clear; clc; close all;
    
    %% 生成示例数据
    fprintf('生成示例数据...\n');
    [X, y, feature_names] = generate_example_data();
    
    fprintf('数据维度: %d 个样本 × %d 个特征\n', size(X, 1), size(X, 2));
    fprintf('目标变量类别数: %d\n', length(unique(y)));
    
    %% 比较不同的特征选择方法
    methods = {
        'information_gain', '信息增益';
        'gain_ratio', '增益率'; 
        'symmetrical_uncertainty', '对称不确定性';
        'mrmr', 'mRMR算法';
        'jmi', '联合互信息'
    };
    
    num_selected = 15;
    results = cell(size(methods, 1), 1);
    
    figure('Position', [100, 100, 1400, 800]);
    
    for i = 1:size(methods, 1)
        method = methods{i, 1};
        method_name = methods{i, 2};
        
        fprintf('\n=== %s ===\n', method_name);
        
        % 创建特征选择器
        switch method
            case 'mrmr'
                selector = MRMRFeatureSelector(num_selected);
            case 'jmi'
                selector = JMIFeatureSelector(num_selected);
            otherwise
                selector = EntropyFeatureSelector(method, num_selected);
        end
        
        % 执行特征选择
        tic;
        selector.fit(X, y, feature_names);
        time_elapsed = toc;
        
        % 存储结果
        results{i} = struct();
        results{i}.method = method_name;
        results{i}.selected_features = selector.SelectedFeatures;
        results{i}.feature_scores = selector.FeatureScores;
        results{i}.time = time_elapsed;
        
        % 绘制特征得分
        subplot(2, 3, i);
        [sorted_scores, sorted_idx] = sort(selector.FeatureScores, 'descend');
        top_features = min(20, length(sorted_scores));
        
        barh(sorted_scores(1:top_features), 'FaceColor', [0.3, 0.6, 0.9]);
        set(gca, 'YTickLabel', feature_names(sorted_idx(1:top_features)));
        ylabel('特征');
        xlabel('得分');
        title(sprintf('%s\n(耗时: %.2fs)', method_name, time_elapsed));
        grid on;
        
        fprintf('选择的前5个特征:\n');
        for j = 1:min(5, length(selector.SelectedFeatures))
            feat_idx = selector.SelectedFeatures(j);
            fprintf('  %d. %s: %.4f\n', j, feature_names{feat_idx}, ...
                selector.FeatureScores(feat_idx));
        end
    end
    
    %% 性能比较
    compare_feature_selection_performance(X, y, results);
    
    %% mRMR详细分析
    analyze_mrmr_performance(X, y, feature_names);
end

function [X, y, feature_names] = generate_example_data()
    % 生成示例数据
    
    n_samples = 1000;
    n_features = 50;
    
    % 生成随机特征
    rng(42); % 设置随机种子保证可重复性
    X = randn(n_samples, n_features);
    
    % 创建有意义的特征名称
    feature_names = cell(1, n_features);
    for i = 1:n_features
        feature_names{i} = sprintf('Feature_%02d', i);
    end
    
    % 创建目标变量（与部分特征相关）
    % 相关特征: 1, 5, 10, 15, 20
    relevant_features = [1, 5, 10, 15, 20, 25, 30];
    noise_level = 0.3;
    
    % 线性组合 + 噪声
    weights = randn(length(relevant_features), 1);
    linear_combination = X(:, relevant_features) * weights;
    
    % 添加非线性关系
    nonlinear_effect = sin(X(:, 1)) .* exp(X(:, 5)) + X(:, 10).^2;
    
    % 组合效应
    total_effect = linear_combination + 0.5 * nonlinear_effect + noise_level * randn(n_samples, 1);
    
    % 转换为分类问题（2类）
    y = double(total_effect > median(total_effect));
    
    fprintf('生成数据完成:\n');
    fprintf('  - 样本数: %d\n', n_samples);
    fprintf('  - 特征数: %d\n', n_features);
    fprintf('  - 相关特征: %s\n', mat2str(relevant_features));
end

function compare_feature_selection_performance(X, y, results)
    % 比较不同特征选择方法的性能
    
    fprintf('\n=== 特征选择方法性能比较 ===\n');
    
    % 计算分类准确率作为评估指标
    cv = cvpartition(y, 'KFold', 5);
    classifier = @(X_train, y_train, X_test) ...
        predict(fitcsvm(X_train, y_train), X_test);
    
    accuracies = zeros(length(results), 1);
    
    for i = 1:length(results)
        method_result = results{i};
        selected_features = method_result.selected_features;
        
        if length(selected_features) < 2
            accuracies(i) = 0;
            continue;
        end
        
        X_selected = X(:, selected_features);
        
        % 交叉验证
        cv_accuracy = crossval(classifier, X_selected, y, 'partition', cv);
        accuracies(i) = 1 - cv_accuracy;
        
        fprintf('%s: 准确率 = %.4f, 耗时 = %.2fs\n', ...
            method_result.method, accuracies(i), method_result.time);
    end
    
    % 绘制性能比较图
    figure('Position', [100, 100, 1000, 600]);
    
    subplot(1, 2, 1);
    methods = cellfun(@(x) x.method, results, 'UniformOutput', false);
    bar(accuracies, 'FaceColor', [0.4, 0.7, 0.4]);
    set(gca, 'XTickLabel', methods, 'XTickLabelRotation', 45);
    ylabel('分类准确率');
    title('不同特征选择方法的性能比较');
    grid on;
    
    subplot(1, 2, 2);
    times = cellfun(@(x) x.time, results);
    bar(times, 'FaceColor', [0.8, 0.4, 0.4]);
    set(gca, 'XTickLabel', methods, 'XTickLabelRotation', 45);
    ylabel('运行时间 (秒)');
    title('计算时间比较');
    grid on;
end

function analyze_mrmr_performance(X, y, feature_names)
    % 分析mRMR算法性能
    
    fprintf('\n=== mRMR算法详细分析 ===\n');
    
    mrmr_selector = MRMRFeatureSelector(20);
    mrmr_selector.fit(X, y, feature_names);
    
    % 绘制mRMR选择过程
    mrmr_selector.plot_mrmr_process();
    
    % 分析特征相关性
    analyze_feature_correlations(X, y, mrmr_selector.SelectedFeatures, feature_names);
end

function analyze_feature_correlations(X, y, selected_features, feature_names)
    % 分析特征相关性
    
    figure('Position', [100, 100, 1200, 500]);
    
    % 计算特征-目标相关性
    subplot(1, 2, 1);
    correlations = zeros(1, length(selected_features));
    for i = 1:length(selected_features)
        if iscategorical(y) || isinteger(y)
            % 对于分类问题，使用ANOVA F值
            [~, tbl] = anova1(X(:, selected_features(i)), y, 'off');
            correlations(i) = tbl{2, 5}; % F统计量
        else
            % 对于回归问题，使用相关系数
            correlations(i) = abs(corr(X(:, selected_features(i)), y));
        end
    end
    
    bar(correlations, 'FaceColor', [0.2, 0.5, 0.8]);
    set(gca, 'XTickLabel', feature_names(selected_features), ...
        'XTickLabelRotation', 45);
    ylabel('特征-目标相关性');
    title('选择特征与目标的相关性');
    grid on;
    
    % 计算特征间相关性矩阵
    subplot(1, 2, 2);
    selected_X = X(:, selected_features);
    correlation_matrix = corr(selected_X);
    
    imagesc(correlation_matrix);
    colorbar;
    set(gca, 'XTick', 1:length(selected_features), ...
        'XTickLabel', feature_names(selected_features), ...
        'XTickLabelRotation', 45);
    set(gca, 'YTick', 1:length(selected_features), ...
        'YTickLabel', feature_names(selected_features));
    title('选择特征间的相关性矩阵');
end

4.2 实际数据集应用

function real_world_application()
    % 真实数据集应用示例
    
    fprintf('=== 真实数据集特征选择应用 ===\n');
    
    % 加载数据集（这里使用MATLAB内置数据集作为示例）
    load fisheriris;
    X = meas;      % 特征矩阵
    y = species;   % 目标变量
    feature_names = {'Sepal_Length', 'Sepal_Width', 'Petal_Length', 'Petal_Width'};
    
    fprintf('数据集: Fisher Iris\n');
    fprintf('特征数: %d\n', size(X, 2));
    fprintf('样本数: %d\n', size(X, 1));
    fprintf('类别: %s\n', strjoin(unique(y), ', '));
    
    % 使用mRMR进行特征选择
    mrmr_selector = MRMRFeatureSelector(2); % 选择2个最重要的特征
    mrmr_selector.fit(X, y, feature_names);
    
    % 可视化结果
    visualize_iris_results(X, y, mrmr_selector, feature_names);
end

function visualize_iris_results(X, y, selector, feature_names)
    % 可视化Iris数据集特征选择结果
    
    selected_features = selector.SelectedFeatures;
    
    figure('Position', [100, 100, 1500, 600]);
    
    % 原始特征空间
    subplot(1, 3, 1);
    gscatter(X(:, 1), X(:, 2), y);
    xlabel(feature_names{1});
    ylabel(feature_names{2});
    title('原始特征空间 (前两个特征)');
    grid on;
    legend('Location', 'best');
    
    % 选择的特征空间
    subplot(1, 3, 2);
    if length(selected_features) >= 2
        feat1 = selected_features(1);
        feat2 = selected_features(2);
        gscatter(X(:, feat1), X(:, feat2), y);
        xlabel(feature_names{feat1});
        ylabel(feature_names{feat2});
        title('mRMR选择的特征空间');
        grid on;
        legend('Location', 'best');
    end
    
    % 特征重要性
    subplot(1, 3, 3);
    scores = selector.FeatureScores;
    [sorted_scores, sorted_idx] = sort(scores, 'descend');
    
    barh(sorted_scores, 'FaceColor', [0.3, 0.6, 0.3]);
    set(gca, 'YTick', 1:length(feature_names), ...
        'YTickLabel', feature_names(sorted_idx));
    xlabel('特征重要性得分');
    title('特征重要性排序');
    grid on;
    
    fprintf('\n特征重要性排序:\n');
    for i = 1:length(feature_names)
        fprintf('  %d. %s: %.4f\n', i, feature_names{sorted_idx(i)}, sorted_scores(i));
    end
end

5. 高级功能和优化

5.1 并行计算优化

classdef ParallelEntropyFeatureSelector < EntropyFeatureSelector
    % 并行计算优化的特征选择器
    
    methods
        function fit_parallel(obj, X, y, feature_names)
            % 并行版本的特征选择
            
            [n_samples, n_features] = size(X);
            
            if nargin < 4
                obj.FeatureNames = arrayfun(@(x) sprintf('Feature_%d', x), ...
                    1:n_features, 'UniformOutput', false);
            else
                obj.FeatureNames = feature_names;
            end
            
            % 离散化特征
            X_disc = obj.discretize_features(X);
            y_disc = obj.discretize_target(y);
            
            % 并行计算特征得分
            obj.FeatureScores = zeros(1, n_features);
            
            parfor i = 1:n_features
                switch obj.Method
                    case 'information_gain'
                        obj.FeatureScores(i) = obj.information_gain(...
                            X_disc(:, i), y_disc);
                    case 'gain_ratio'
                        obj.FeatureScores(i) = obj.gain_ratio(...
                            X_disc(:, i), y_disc);
                    case 'symmetrical_uncertainty'
                        obj.FeatureScores(i) = obj.symmetrical_uncertainty(...
                            X_disc(:, i), y_disc);
                    otherwise
                        obj.FeatureScores(i) = 0;
                end
            end
            
            % 选择特征
            obj.select_features();
        end
    end
end

5.2 稳定性分析

function stability_analysis(X, y, feature_names)
    % 特征选择稳定性分析
    
    fprintf('=== 特征选择稳定性分析 ===\n');
    
    num_runs = 10;
    num_selected = 10;
    
    % 存储每次运行的选择结果
    selection_results = zeros(num_runs, num_selected);
    
    for run = 1:num_runs
        % 自助采样
        n_samples = size(X, 1);
        bootstrap_indices = randsample(n_samples, n_samples, true);
        X_bootstrap = X(bootstrap_indices, :);
        y_bootstrap = y(bootstrap_indices);
        
        % 特征选择
        selector = MRMRFeatureSelector(num_selected);
        selector.fit(X_bootstrap, y_bootstrap, feature_names);
        
        selection_results(run, :) = selector.SelectedFeatures(1:num_selected);
    end
    
    % 计算稳定性
    stability = compute_selection_stability(selection_results);
    
    fprintf('特征选择稳定性: %.4f\n', stability);
    
    % 可视化稳定性结果
    plot_stability_analysis(selection_results, feature_names, stability);
end

function stability = compute_selection_stability(selection_results)
    % 计算特征选择稳定性
    
    [num_runs, num_selected] = size(selection_results);
    
    % 计算每对运行之间的相似度
    similarities = zeros(num_runs * (num_runs - 1) / 2, 1);
    idx = 1;
    
    for i = 1:num_runs
        for j = i+1:num_runs
            set_i = selection_results(i, :);
            set_j = selection_results(j, :);
            
            % Jaccard相似度
            intersection = length(intersect(set_i, set_j));
            union_set = length(union(set_i, set_j));
            
            similarities(idx) = intersection / union_set;
            idx = idx + 1;
        end
    end
    
    stability = mean(similarities);
end

function plot_stability_analysis(selection_results, feature_names, stability)
    % 绘制稳定性分析结果
    
    [num_runs, num_selected] = size(selection_results);
    
    figure('Position', [100, 100, 1200, 600]);
    
    % 特征选择频率
    subplot(1, 2, 1);
    all_selected_features = selection_results(:);
    unique_features = unique(all_selected_features);
    
    selection_frequency = zeros(1, length(unique_features));
    for i = 1:length(unique_features)
        selection_frequency(i) = sum(all_selected_features == unique_features(i)) / num_runs;
    end
    
    [sorted_freq, sorted_idx] = sort(selection_frequency, 'descend');
    bar(sorted_freq, 'FaceColor', [0.7, 0.3, 0.3]);
    
    feature_labels = cell(1, length(unique_features));
    for i = 1:length(unique_features)
        feat_idx = unique_features(sorted_idx(i));
        feature_labels{i} = feature_names{feat_idx};
    end
    
    set(gca, 'XTickLabel', feature_labels, 'XTickLabelRotation', 45);
    ylabel('选择频率');
    title(sprintf('特征选择频率 (稳定性: %.4f)', stability));
    grid on;
    
    % 选择结果热图
    subplot(1, 2, 2);
    selection_matrix = zeros(num_runs, max(unique_features));
    for i = 1:num_runs
        selection_matrix(i, selection_results(i, :)) = 1;
    end
    
    imagesc(selection_matrix);
    colorbar;
    xlabel('特征索引');
    ylabel('Bootstrap运行');
    title('特征选择结果热图');
end

参考代码基于信息熵的特征选择算法 www.youwenfan.com/contentcnj/64622.html

6. 总结

基于信息熵的特征选择算法具有以下优势：

理论基础扎实：基于信息论，有明确的数学解释
无需分布假设：对数据分布没有特定要求
能够发现非线性关系：基于概率分布，能捕捉非线性关联
适用于各种数据类型：通过离散化可处理连续和离散特征

主要算法比较：

信息增益：简单有效，但偏向多值特征
增益率：克服了信息增益的偏置问题
对称不确定性：标准化处理，适合比较不同特征
mRMR：平衡相关性和冗余性，效果优秀
JMI：考虑特征间的交互作用，更全面

这些算法特别适用于：

高维数据降维
生物信息学中的基因选择
文本分类中的特征选择
任何需要可解释特征重要性的场景

通过合理选择算法参数和适当的预处理，基于信息熵的特征选择能够显著提升模型性能并增强结果的可解释性。

posted @ 2025-10-22 14:24 晃悠人生阅读(9) 评论(0) 收藏举报

刷新页面返回顶部