利用SIFT算子提取图像特征描述子的原理和MATLAB实现
利用SIFT算子提取图像特征描述子的原理和MATLAB实现。SIFT(尺度不变特征变换)是图像处理中经典的特征提取算法。
一、SIFT特征提取原理
1. SIFT算法主要步骤
1. 尺度空间极值检测
├── 构建高斯金字塔
├── 构建高斯差分金字塔(DoG)
└── 检测DoG空间局部极值
2. 关键点定位
├── 去除低对比度点
└── 去除边缘响应点
3. 方向分配
├── 计算关键点梯度幅值和方向
└── 构建方向直方图
4. 特征描述子生成
├── 划分关键点邻域为子区域
├── 计算子区域梯度方向直方图
└── 归一化生成128维描述子
2. 特征描述子结构
- 维度: 128维 (4×4个子区域 × 8个方向)
- 特点: 旋转不变性、尺度不变性、光照不变性
二、MATLAB实现代码
1. 使用VLFeat库(推荐)
% ====================
% VLFeat库SIFT特征提取
% ====================
% 下载VLFeat库: http://www.vlfeat.org/download.html
% 设置VLFeat路径
vl_setup;
% 读取图像
img = imread('test.jpg');
if size(img, 3) == 3
img_gray = rgb2gray(img);
else
img_gray = img;
end
img_single = im2single(img_gray); % 转换为单精度
% 提取SIFT特征
[frames, descriptors] = vl_sift(img_single);
% frames: 关键点信息矩阵 [x; y; scale; orientation]
% descriptors: 128维描述子矩阵
% 可选参数设置
[frames, descriptors] = vl_sift(img_single, ...
'PeakThresh', 0, ... % 峰值阈值,默认0
'EdgeThresh', 10, ... % 边缘阈值,默认10
'FirstOctave', 0, ... % 第一组索引,默认0
'Levels', 3, ... % 每组层数,默认3
'Octaves', 4, ... % 组数,默认4
'Magnif', 3, ... % 放大倍数,默认3
'WindowSize', 2); % 窗口大小,默认2
% 显示关键点
figure;
imshow(img);
hold on;
h1 = vl_plotframe(frames);
h2 = vl_plotframe(frames);
set(h1, 'color', 'k', 'linewidth', 3);
set(h2, 'color', 'y', 'linewidth', 2);
title('SIFT关键点检测');
% 显示描述子统计信息
fprintf('检测到 %d 个关键点\n', size(frames, 2));
fprintf('描述子维度: %d\n', size(descriptors, 1));
2. 自定义SIFT实现(完整版)
function [keypoints, descriptors] = my_sift(img)
% 自定义SIFT特征提取实现
% 输入: 灰度图像
% 输出: 关键点和128维描述子
% 转换为双精度
I = double(img);
% ====================
% 1. 构建尺度空间
% ====================
fprintf('构建尺度空间...\n');
% 参数设置
num_octaves = 4; % 组数
num_scales = 5; % 每组尺度数
sigma0 = 1.6; % 基准尺度
k = 2^(1/num_scales); % 尺度倍增系数
% 构建高斯金字塔
gaussian_pyramid = cell(num_octaves, num_scales + 3);
for octave = 1:num_octaves
% 降采样图像
if octave == 1
octave_img = I;
else
octave_img = imresize(gaussian_pyramid{octave-1, 3}, 0.5);
end
for scale = 1:num_scales + 3
% 计算当前尺度
sigma = sigma0 * k^(scale-1);
% 高斯滤波
hsize = ceil(3 * sigma) * 2 + 1;
gaussian_filter = fspecial('gaussian', hsize, sigma);
gaussian_pyramid{octave, scale} = imfilter(octave_img, gaussian_filter, 'same');
end
end
% ====================
% 2. 构建DoG金字塔并检测极值点
% ====================
fprintf('检测DoG极值点...\n');
dog_pyramid = cell(num_octaves, num_scales + 2);
candidate_keypoints = [];
for octave = 1:num_octaves
for scale = 2:num_scales + 2
% 计算DoG
dog_pyramid{octave, scale-1} = gaussian_pyramid{octave, scale+1} - ...
gaussian_pyramid{octave, scale};
% 在当前DoG层检测极值
if scale > 2 && scale < num_scales + 2
layer_current = dog_pyramid{octave, scale-1};
layer_above = dog_pyramid{octave, scale};
layer_below = dog_pyramid{octave, scale-2};
% 3D邻域极值检测
for i = 2:size(layer_current, 1)-1
for j = 2:size(layer_current, 2)-1
% 当前点
current_val = layer_current(i, j);
% 检查是否是极值
neighbor_vals = [layer_current(i-1:i+1, j-1:j+1), ...
layer_above(i-1:i+1, j-1:j+1), ...
layer_below(i-1:i+1, j-1:j+1)];
neighbor_vals(14) = []; % 移除中心点
if (current_val > max(neighbor_vals(:)) || ...
current_val < min(neighbor_vals(:)))
% 记录候选关键点
candidate_keypoints = [candidate_keypoints; ...
octave, scale-1, i, j, current_val];
end
end
end
end
end
end
fprintf('找到 %d 个候选关键点\n', size(candidate_keypoints, 1));
% ====================
% 3. 关键点精确定位
% ====================
fprintf('关键点精确定位...\n');
keypoints = [];
contrast_thresh = 0.03; % 对比度阈值
edge_thresh = 10; % 边缘阈值
for i = 1:size(candidate_keypoints, 1)
octave = candidate_keypoints(i, 1);
scale = candidate_keypoints(i, 2);
x = candidate_keypoints(i, 3);
y = candidate_keypoints(i, 4);
% 获取当前DoG层
D = dog_pyramid{octave, scale};
% 3D二次函数拟合(简化实现)
Dx = (D(x+1, y) - D(x-1, y)) / 2;
Dy = (D(x, y+1) - D(x, y-1)) / 2;
Ds = (dog_pyramid{octave, min(scale+1, size(dog_pyramid, 2))}(x, y) - ...
dog_pyramid{octave, max(scale-1, 1)}(x, y)) / 2;
Dxx = D(x+1, y) + D(x-1, y) - 2*D(x, y);
Dyy = D(x, y+1) + D(x, y-1) - 2*D(x, y);
Dss = dog_pyramid{octave, min(scale+1, size(dog_pyramid, 2))}(x, y) + ...
dog_pyramid{octave, max(scale-1, 1)}(x, y) - 2*D(x, y);
% 计算偏移量
offset = -inv([Dxx, 0, 0; 0, Dyy, 0; 0, 0, Dss]) * [Dx; Dy; Ds];
% 更新位置
x_new = x + offset(1);
y_new = y + offset(2);
scale_new = scale + offset(3);
% 计算插值后的值
D_new = D(x, y) + 0.5 * ([Dx, Dy, Ds] * offset);
% 对比度过滤
if abs(D_new) < contrast_thresh
continue;
end
% 边缘响应过滤(Hessian矩阵)
H = [Dxx, Dx*Dy; Dx*Dy, Dyy];
trace_H = trace(H);
det_H = det(H);
if det_H <= 0 || (trace_H^2 / det_H) > ((edge_thresh + 1)^2 / edge_thresh)
continue;
end
% 保存关键点
keypoints = [keypoints; octave, scale_new, x_new, y_new];
end
fprintf('保留 %d 个关键点\n', size(keypoints, 1));
% ====================
% 4. 方向分配
% ====================
fprintf('方向分配...\n');
keypoints_with_orientation = [];
for i = 1:size(keypoints, 1)
octave = keypoints(i, 1);
scale_idx = round(keypoints(i, 2));
x = round(keypoints(i, 3));
y = round(keypoints(i, 4));
% 获取对应高斯层
L = gaussian_pyramid{octave, scale_idx + 1};
% 计算梯度
radius = round(3 * 1.5 * sigma0 * k^(scale_idx-1));
radius = min(radius, min(size(L)) / 2 - 1);
% 提取邻域
x_min = max(1, x - radius);
x_max = min(size(L, 1), x + radius);
y_min = max(1, y - radius);
y_max = min(size(L, 2), y + radius);
patch = L(x_min:x_max, y_min:y_max);
% 计算梯度幅值和方向
[Gx, Gy] = gradient(patch);
mag = sqrt(Gx.^2 + Gy.^2);
ori = atan2(Gy, Gx) * 180 / pi; % 转换为度
% 构建方向直方图(36 bins)
ori_hist = zeros(36, 1);
bin_size = 10; % 每个bin 10度
for p = 1:numel(mag)
bin = mod(round(ori(p) / bin_size), 36) + 1;
ori_hist(bin) = ori_hist(bin) + mag(p);
end
% 平滑直方图
ori_hist_smooth = conv(ori_hist, [1/3, 1/3, 1/3], 'same');
% 找到主方向
[max_val, main_ori] = max(ori_hist_smooth);
main_angle = (main_ori - 1) * bin_size;
% 添加关键点(带方向)
keypoints_with_orientation = [keypoints_with_orientation; ...
keypoints(i, :), main_angle];
% 检查副方向(大于主方向80%的峰值)
for b = 1:36
if ori_hist_smooth(b) > 0.8 * max_val && b ~= main_ori
secondary_angle = (b - 1) * bin_size;
keypoints_with_orientation = [keypoints_with_orientation; ...
keypoints(i, :), secondary_angle];
end
end
end
% ====================
% 5. 生成描述子
% ====================
fprintf('生成描述子...\n');
num_keypoints = size(keypoints_with_orientation, 1);
descriptors = zeros(128, num_keypoints);
for k = 1:num_keypoints
octave = keypoints_with_orientation(k, 1);
scale_idx = round(keypoints_with_orientation(k, 2));
x = keypoints_with_orientation(k, 3);
y = keypoints_with_orientation(k, 4);
orientation = keypoints_with_orientation(k, 5);
% 获取对应高斯层
L = gaussian_pyramid{octave, scale_idx + 1};
% 旋转坐标轴到关键点方向
cos_theta = cosd(orientation);
sin_theta = sind(orientation);
% 提取16×16的邻域
descriptor = zeros(4, 4, 8); % 4×4个子区域,每个8方向
for i = -8:7
for j = -8:7
% 旋转坐标
x_rot = i * cos_theta - j * sin_theta;
y_rot = i * sin_theta + j * cos_theta;
% 计算在原始图像中的位置
x_img = round(x + x_rot);
y_img = round(y + y_rot);
if x_img >= 1 && x_img <= size(L, 1) && ...
y_img >= 1 && y_img <= size(L, 2)
% 计算梯度
if x_img > 1 && x_img < size(L, 1) && ...
y_img > 1 && y_img < size(L, 2)
dx = L(x_img+1, y_img) - L(x_img-1, y_img);
dy = L(x_img, y_img+1) - L(x_img, y_img-1);
else
continue;
end
% 计算梯度幅值和方向(相对关键点方向)
mag = sqrt(dx^2 + dy^2);
grad_ori = atan2(dy, dx) * 180 / pi - orientation;
grad_ori = mod(grad_ori, 360);
% 确定子区域和方向bin
sub_x = floor((i + 8) / 4);
sub_y = floor((j + 8) / 4);
if sub_x >= 0 && sub_x < 4 && sub_y >= 0 && sub_y < 4
ori_bin = floor(grad_ori / 45);
ori_bin = mod(ori_bin, 8);
% 双线性插值权重
x_frac = (i + 8) / 4 - sub_x;
y_frac = (j + 8) / 4 - sub_y;
% 更新描述子
for dx_bin = 0:1
for dy_bin = 0:1
weight = mag * (1 - abs(x_frac - dx_bin)) * ...
(1 - abs(y_frac - dy_bin));
sub_x_idx = min(3, sub_x + dx_bin);
sub_y_idx = min(3, sub_y + dy_bin);
descriptor(sub_x_idx+1, sub_y_idx+1, ori_bin+1) = ...
descriptor(sub_x_idx+1, sub_y_idx+1, ori_bin+1) + weight;
end
end
end
end
end
end
% 展平为一维向量
desc_vec = descriptor(:);
% 归一化
desc_norm = sqrt(sum(desc_vec.^2));
if desc_norm > 0
desc_vec = desc_vec / desc_norm;
end
% 限制最大值(增强鲁棒性)
desc_vec = min(desc_vec, 0.2);
% 重新归一化
desc_norm = sqrt(sum(desc_vec.^2));
if desc_norm > 0
desc_vec = desc_vec / desc_norm;
end
% 存储描述子
descriptors(:, k) = desc_vec;
end
fprintf('生成 %d 个128维描述子\n', size(descriptors, 2));
end
3. 特征匹配与可视化
function [matches, scores] = match_sift_features(desc1, desc2, method)
% SIFT特征匹配
% 输入: 两个图像的描述子矩阵
% 输出: 匹配对和匹配分数
% 方法选择
if nargin < 3
method = 'ratio'; % 默认比率测试法
end
% 确保描述子维度一致
assert(size(desc1, 1) == size(desc2, 1), '描述子维度不一致');
matches = [];
scores = [];
switch method
case 'ratio'
% 比率测试法(Lowe's ratio test)
ratio_thresh = 0.8;
for i = 1:size(desc1, 2)
% 计算当前描述子与所有描述子的距离
distances = sqrt(sum((desc2 - desc1(:, i)).^2, 1));
% 排序找到最近的两个
[sorted_dist, sorted_idx] = sort(distances);
% 比率测试
if sorted_dist(1) < ratio_thresh * sorted_dist(2)
matches = [matches; i, sorted_idx(1)];
scores = [scores; sorted_dist(1)];
end
end
case 'mutual'
% 相互最近邻匹配
matches1 = [];
matches2 = [];
% 从图像1到图像2的匹配
for i = 1:size(desc1, 2)
distances = sqrt(sum((desc2 - desc1(:, i)).^2, 1));
[min_dist, min_idx] = min(distances);
matches1 = [matches1; i, min_idx, min_dist];
end
% 从图像2到图像1的匹配
for i = 1:size(desc2, 2)
distances = sqrt(sum((desc1 - desc2(:, i)).^2, 1));
[min_dist, min_idx] = min(distances);
matches2 = [matches2; min_idx, i, min_dist];
end
% 找出相互匹配
for i = 1:size(matches1, 1)
idx1 = matches1(i, 1);
idx2 = matches1(i, 2);
% 检查是否是相互最近邻
if matches2(idx2, 1) == idx1
matches = [matches; idx1, idx2];
scores = [scores; matches1(i, 3)];
end
end
end
fprintf('找到 %d 个匹配对\n', size(matches, 1));
end
function visualize_matches(img1, img2, kp1, kp2, matches)
% 可视化特征匹配
% 输入: 两幅图像及其关键点坐标和匹配对
% 创建拼接图像
[h1, w1] = size(img1);
[h2, w2] = size(img2);
h = max(h1, h2);
img_combined = zeros(h, w1 + w2, 'uint8');
img_combined(1:h1, 1:w1) = img1;
img_combined(1:h2, w1+1:end) = img2;
figure;
imshow(img_combined);
hold on;
% 绘制关键点
plot(kp1(1, :), kp1(2, :), 'r+', 'MarkerSize', 10, 'LineWidth', 2);
plot(kp2(1, :) + w1, kp2(2, :), 'b+', 'MarkerSize', 10, 'LineWidth', 2);
% 绘制匹配线
for i = 1:size(matches, 1)
idx1 = matches(i, 1);
idx2 = matches(i, 2);
if idx1 <= size(kp1, 2) && idx2 <= size(kp2, 2)
x1 = kp1(1, idx1);
y1 = kp1(2, idx1);
x2 = kp2(1, idx2) + w1;
y2 = kp2(2, idx2);
line([x1, x2], [y1, y2], 'Color', 'g', 'LineWidth', 1.5);
plot(x1, y1, 'ro', 'MarkerSize', 8, 'LineWidth', 2);
plot(x2, y2, 'bo', 'MarkerSize', 8, 'LineWidth', 2);
end
end
title(sprintf('SIFT特征匹配 (%d 个匹配对)', size(matches, 1)));
hold off;
end
4. 完整应用示例
% ====================
% SIFT特征提取完整示例
% ====================
clear; close all; clc;
% 1. 读取图像
img1 = imread('image1.jpg');
img2 = imread('image2.jpg'); % 可以是同一图像的不同视角
% 转换为灰度图
if size(img1, 3) == 3
img1_gray = rgb2gray(img1);
else
img1_gray = img1;
end
if size(img2, 3) == 3
img2_gray = rgb2gray(img2);
else
img2_gray = img2;
end
% 2. 提取SIFT特征(使用VLFeat库)
fprintf('提取图像1的SIFT特征...\n');
img1_single = im2single(img1_gray);
[frames1, descriptors1] = vl_sift(img1_single, 'PeakThresh', 0.01);
fprintf('提取图像2的SIFT特征...\n');
img2_single = im2single(img2_gray);
[frames2, descriptors2] = vl_sift(img2_single, 'PeakThresh', 0.01);
% 提取关键点坐标
kp1 = frames1(1:2, :);
kp2 = frames2(1:2, :);
% 3. 特征匹配
fprintf('特征匹配...\n');
[matches, scores] = match_sift_features(descriptors1, descriptors2, 'ratio');
% 4. 可视化
figure('Position', [100, 100, 1400, 600]);
% 显示原始图像
subplot(2, 3, [1, 2]);
visualize_matches(img1_gray, img2_gray, kp1, kp2, matches);
% 显示描述子统计
subplot(2, 3, 3);
histogram(scores, 50);
xlabel('匹配距离');
ylabel('频数');
title('匹配距离分布');
grid on;
% 显示关键点分布
subplot(2, 3, 4);
imshow(img1_gray);
hold on;
plot(kp1(1, :), kp1(2, :), 'r.', 'MarkerSize', 10);
title(sprintf('图像1: %d 个关键点', size(kp1, 2)));
subplot(2, 3, 5);
imshow(img2_gray);
hold on;
plot(kp2(1, :), kp2(2, :), 'b.', 'MarkerSize', 10);
title(sprintf('图像2: %d 个关键点', size(kp2, 2)));
% 显示描述子可视化
subplot(2, 3, 6);
if ~isempty(descriptors1)
% 可视化第一个关键点的描述子
desc_reshaped = reshape(descriptors1(:, 1), [4, 4, 8]);
desc_vis = zeros(16, 16);
for i = 1:4
for j = 1:4
% 提取8方向直方图
hist8 = desc_reshaped(i, j, :);
hist8 = hist8(:);
% 创建方向箭头图
for dir = 1:8
angle = (dir-1) * 45;
length = hist8(dir) * 10;
x_center = (i-1)*4 + 2;
y_center = (j-1)*4 + 2;
x_end = x_center + length * cosd(angle);
y_end = y_center + length * sind(angle);
line([y_center, y_end], [x_center, x_end], 'Color', 'r', 'LineWidth', 1);
end
end
end
axis([0, 16, 0, 16]);
axis equal;
axis off;
title('描述子可视化 (4×4×8)');
end
% 5. 性能评估
fprintf('\n====================\n');
fprintf('SIFT特征提取结果\n');
fprintf('====================\n');
fprintf('图像1: %d 个关键点\n', size(kp1, 2));
fprintf('图像2: %d 个关键点\n', size(kp2, 2));
fprintf('成功匹配: %d 对\n', size(matches, 1));
fprintf('匹配率: %.2f%%\n', size(matches, 1) / min(size(kp1, 2), size(kp2, 2)) * 100);
fprintf('平均匹配距离: %.4f\n', mean(scores));
5. 应用场景扩展
% ====================
% SIFT在不同场景的应用
% ====================
% 1. 图像拼接
function panorama = stitch_images_with_sift(images)
% 使用SIFT进行图像拼接
% 提取所有图像的特征
num_images = length(images);
all_features = cell(num_images, 2);
for i = 1:num_images
img_gray = rgb2gray(images{i});
img_single = im2single(img_gray);
[frames, descriptors] = vl_sift(img_single);
all_features{i, 1} = frames;
all_features{i, 2} = descriptors;
end
% 匹配相邻图像
homographies = cell(num_images-1, 1);
for i = 1:num_images-1
% 匹配特征
[matches, ~] = match_sift_features(all_features{i, 2}, ...
all_features{i+1, 2});
% 提取匹配点
kp1 = all_features{i, 1}(1:2, matches(:, 1));
kp2 = all_features{i+1, 1}(1:2, matches(:, 2));
% 计算单应性矩阵(RANSAC)
[H, inliers] = ransac_homography(kp1, kp2);
homographies{i} = H;
end
% 拼接图像
panorama = create_panorama(images, homographies);
end
% 2. 物体识别
function [label, confidence] = object_recognition_sift(test_img, database)
% 基于SIFT的物体识别
% 提取测试图像特征
test_gray = rgb2gray(test_img);
test_single = im2single(test_gray);
[test_frames, test_descriptors] = vl_sift(test_single);
% 与数据库中的图像匹配
num_classes = length(database);
scores = zeros(num_classes, 1);
for i = 1:num_classes
% 提取数据库图像特征
db_img = database{i}.image;
db_gray = rgb2gray(db_img);
db_single = im2single(db_gray);
[db_frames, db_descriptors] = vl_sift(db_single);
% 特征匹配
[matches, match_scores] = match_sift_features(test_descriptors, db_descriptors);
% 计算匹配分数
scores(i) = size(matches, 1) / min(size(test_descriptors, 2), ...
size(db_descriptors, 2));
end
% 找到最佳匹配
[confidence, idx] = max(scores);
label = database{idx}.name;
end
% 3. 3D重建
function point_cloud = sfm_with_sift(images)
% 从多视角图像进行三维重建
% 提取所有图像的特征
num_images = length(images);
features = cell(num_images, 1);
for i = 1:num_images
img_gray = rgb2gray(images{i});
img_single = im2single(img_gray);
[frames, descriptors] = vl_sift(img_single);
features{i} = struct('frames', frames, 'descriptors', descriptors);
end
% 匹配所有图像对
matches = cell(num_images, num_images);
for i = 1:num_images
for j = i+1:num_images
[match_pairs, ~] = match_sift_features(features{i}.descriptors, ...
features{j}.descriptors);
matches{i, j} = match_pairs;
end
end
% 三角测量生成3D点
point_cloud = triangulate_points(features, matches);
% 捆绑调整优化
point_cloud = bundle_adjustment(point_cloud, features, matches);
end
三、性能优化技巧
1. 加速特征提取
% 使用GPU加速
function [frames, descriptors] = sift_gpu(img)
% 将图像传输到GPU
img_gpu = gpuArray(im2single(img));
% GPU版本的SIFT(需要自定义实现或第三方库)
% 这里简化为概念代码
[frames, descriptors] = sift_gpu_kernel(img_gpu);
% 将结果传回CPU
frames = gather(frames);
descriptors = gather(descriptors);
end
% 并行处理多图像
function process_multiple_images_parallel(images)
num_images = length(images);
all_features = cell(num_images, 2);
parfor i = 1:num_images
img_gray = rgb2gray(images{i});
img_single = im2single(img_gray);
[frames, descriptors] = vl_sift(img_single);
all_features{i, 1} = frames;
all_features{i, 2} = descriptors;
end
end
2. 特征选择与过滤
% 基于显著性的特征选择
function [selected_frames, selected_descriptors] = select_salient_features(frames, descriptors)
% 根据尺度选择(大尺度特征通常更稳定)
scales = frames(3, :);
scale_threshold = median(scales) * 1.5;
scale_mask = scales > scale_threshold;
% 根据对比度选择
contrasts = frames(4, :); % 假设第四行存储对比度
contrast_threshold = median(contrasts) * 0.5;
contrast_mask = contrasts > contrast_threshold;
% 结合掩码
combined_mask = scale_mask & contrast_mask;
selected_frames = frames(:, combined_mask);
selected_descriptors = descriptors(:, combined_mask);
end
参考代码 利用SIFT算子提取图像的特征描述子 www.youwenfan.com/contentcno/95750.html
四、总结与建议
1. SIFT特点总结
-
优点:
- 尺度不变性:不同尺度的图像都能检测到相同特征
- 旋转不变性:图像旋转不影响特征提取
- 光照鲁棒性:对光照变化不敏感
- 视角不变性:一定程度的视角变化仍能匹配
-
缺点:
- 计算复杂度高,速度较慢
- 对模糊图像敏感
- 专利问题(已过期)
2. 实际应用建议
-
参数调优:
- 低对比度图像:降低PeakThresh
- 纹理丰富图像:增加Octaves和Levels
- 实时应用:减少特征数量
-
替代方案:
- 更快:SURF、ORB、BRISK
- 更准确:ASIFT、Affine-SIFT
- 深度学习:SIFTNet、SuperPoint
-
应用领域:
- 图像拼接和全景图
- 三维重建和SFM
- 物体识别和跟踪
- 图像检索和分类
浙公网安备 33010602011771号