完整代码@折腾笔记[49]-cuda的SIFT特征匹配
摘要
使用CUDA在GPU上加速SIFT特征提取与匹配算法, 并封装为CSharp库, 支持图像相似度评分和模板缓存。
声明
本文人类为第一作者, 龙虾为通讯作者.本文有AI生成内容.
关键信息
- CUDA Toolkit 12.4+
- Linux amd64 / Windows x64
- C# / .NET Framework 4.7.2+
简介
SIFT算法简介
SIFT(Scale-Invariant Feature Transform,尺度不变特征变换)是由 David Lowe 于 1999 年提出、2004 年完善的经典局部特征描述算法。它能够在不同尺度、旋转、光照条件下提取稳定的图像特征点,广泛应用于图像匹配、目标识别、全景拼接、三维重建等领域。
主要特点:
- 尺度不变性:通过高斯金字塔和 DoG(Difference of Gaussian)空间检测不同尺度的特征点
- 旋转不变性:基于梯度方向直方图确定特征点主方向
- 光照鲁棒性:描述子经归一化处理,对光照变化不敏感
- 高区分度:128维特征描述子,具有极强的特征区分能力
SIFT算法流程
- 尺度空间极值检测:构建高斯金字塔(Gaussian Pyramid)和 DoG 金字塔,检测局部极值点作为候选特征点
- 关键点精确定位:通过三维二次函数拟合剔除低对比度和边缘响应点
- 方向赋值:计算关键点邻域梯度方向直方图,确定主方向以实现旋转不变性
- 描述子生成:在关键点周围 4×4 子区域计算 8 方向梯度直方图,生成 128 维特征向量
Lowe比率测试
特征匹配阶段采用 Lowe 提出的比率测试(Ratio Test):对于每个特征点,找到最近邻和次近邻匹配点,只有当最近邻距离与次近邻距离的比值小于阈值(通常 0.75)时才视为有效匹配。该策略能有效剔除模糊匹配,提高匹配精度。
参考链接: SIFT - Wikipedia
参考论文: Lowe, D. G. (2004). Distinctive image features from scale-invariant keypoints. International Journal of Computer Vision, 60(2), 91-110.
工程目录结构
CudaSharp/
├── CudaSharpNative.cu # CUDA DLL 导出层(二值化 + SIFT)
├── CudaBinarizeLib.cs # C# P/Invoke 封装层
├── CudaBinarizeLib.csproj # .NET 类库工程
├── build.py # CUDA DLL 编译脚本
├── Program.cs # C# 示例程序
├── gray_image.c/h # 灰度图像数据结构
├── image_ops.cu/h # CUDA 图像操作(高斯模糊、下采样、差分)
├── sift_types.c/h # SIFT Feature 类型定义
├── sift_algorithm.cu/h # SIFT 特征提取算法
├── sift_matcher.cu/h # CUDA 加速 SIFT 特征匹配
├── image_similarity.cu/h # 图像相似度评估器(含模板缓存)
├── test_sift.cpp # SIFT 命令行测试程序
└── benchmark_sift.py # SIFT 性能基准测试脚本
完整代码
gray_image.h
#ifndef GRAY_IMAGE_H
#define GRAY_IMAGE_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stddef.h>
typedef struct {
int width;
int height;
float* data; // row-major: data[y * width + x]
} GrayImage;
GrayImage* gray_image_create(int width, int height);
GrayImage* gray_image_create_from_data(int width, int height, const float* data);
void gray_image_free(GrayImage* img);
GrayImage* gray_image_clone(const GrayImage* img);
GrayImage* gray_image_load(const char* filename);
static inline float gray_image_get(const GrayImage* img, int row, int col) {
return img->data[row * img->width + col];
}
static inline void gray_image_set(GrayImage* img, int row, int col, float val) {
img->data[row * img->width + col] = val;
}
#ifdef __cplusplus
}
#endif
#endif
gray_image.c
#include "gray_image.h"
#include "stb_image.h"
#include <stdlib.h>
#include <string.h>
#include <math.h>
GrayImage* gray_image_create(int width, int height) {
GrayImage* img = (GrayImage*)malloc(sizeof(GrayImage));
if (!img) return NULL;
img->width = width;
img->height = height;
img->data = (float*)calloc(width * height, sizeof(float));
if (!img->data) {
free(img);
return NULL;
}
return img;
}
GrayImage* gray_image_create_from_data(int width, int height, const float* data) {
GrayImage* img = gray_image_create(width, height);
if (!img) return NULL;
memcpy(img->data, data, width * height * sizeof(float));
return img;
}
void gray_image_free(GrayImage* img) {
if (img) {
free(img->data);
free(img);
}
}
GrayImage* gray_image_clone(const GrayImage* img) {
if (!img) return NULL;
return gray_image_create_from_data(img->width, img->height, img->data);
}
GrayImage* gray_image_load(const char* filename) {
int w, h, channels;
unsigned char* pixels = stbi_load(filename, &w, &h, &channels, 0);
if (!pixels) return NULL;
GrayImage* img = gray_image_create(w, h);
if (!img) {
stbi_image_free(pixels);
return NULL;
}
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
int idx = (y * w + x) * channels;
float r, g, b;
if (channels >= 3) {
r = pixels[idx + 0] / 255.0f;
g = pixels[idx + 1] / 255.0f;
b = pixels[idx + 2] / 255.0f;
} else if (channels == 1) {
r = g = b = pixels[idx] / 255.0f;
} else {
r = g = b = 0;
}
float gray = r * 0.299f + g * 0.587f + b * 0.114f;
gray_image_set(img, y, x, gray);
}
}
stbi_image_free(pixels);
return img;
}
sift_types.h
#ifndef SIFT_TYPES_H
#define SIFT_TYPES_H
#ifdef __cplusplus
extern "C" {
#endif
#include <stddef.h>
typedef enum {
FEATURE_OXFD,
FEATURE_LOWE
} FeatureType;
typedef struct {
int r;
int c;
int octv;
int intvl;
double subintvl;
double scl_octv;
} DetectionData;
#define FEATURE_MAX_D 128
typedef struct Feature {
double x;
double y;
double a;
double b;
double c;
double scl;
double ori;
int d;
double descr[FEATURE_MAX_D];
FeatureType type;
int category;
struct Feature* fwd_match;
struct Feature* bck_match;
struct Feature* mdl_match;
float img_pt_x;
float img_pt_y;
DetectionData feature_data;
} Feature;
Feature* feature_new(void);
void feature_free(Feature* feat);
Feature* feature_clone(const Feature* feat);
#ifdef __cplusplus
}
#endif
#endif
sift_types.c
#include "sift_types.h"
#include <stdlib.h>
#include <string.h>
Feature* feature_new(void) {
Feature* feat = (Feature*)calloc(1, sizeof(Feature));
if (feat) {
memset(feat->descr, 0, sizeof(feat->descr));
feat->type = FEATURE_LOWE;
}
return feat;
}
void feature_free(Feature* feat) {
free(feat);
}
Feature* feature_clone(const Feature* feat) {
if (!feat) return NULL;
Feature* new_feat = feature_new();
if (!new_feat) return NULL;
new_feat->x = feat->x;
new_feat->y = feat->y;
new_feat->a = feat->a;
new_feat->b = feat->b;
new_feat->c = feat->c;
new_feat->scl = feat->scl;
new_feat->ori = feat->ori;
new_feat->d = feat->d;
new_feat->type = feat->type;
new_feat->category = feat->category;
new_feat->img_pt_x = feat->img_pt_x;
new_feat->img_pt_y = feat->img_pt_y;
new_feat->feature_data = feat->feature_data;
memcpy(new_feat->descr, feat->descr, sizeof(feat->descr));
return new_feat;
}
image_ops.h
#ifndef IMAGE_OPS_H
#define IMAGE_OPS_H
#ifdef __cplusplus
extern "C" {
#endif
#include "gray_image.h"
GrayImage* image_ops_gaussian_blur(const GrayImage* src, double sigma);
GrayImage* image_ops_downsample(const GrayImage* src);
GrayImage* image_ops_resize_cubic(const GrayImage* src, int newW, int newH);
GrayImage* image_ops_subtract(const GrayImage* a, const GrayImage* b);
#ifdef __cplusplus
}
#endif
#endif
image_ops.cu
#include "image_ops.h"
#include <cuda_runtime.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
__global__ void gaussian_blur_h_kernel(const float* src, float* dst, int width, int height,
const float* kernel, int kernel_size, int radius) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height) return;
float val = 0.0f;
for (int k = 0; k < kernel_size; k++) {
int px = x + k - radius;
if (px < 0) px = 0;
if (px >= width) px = width - 1;
val += src[y * width + px] * kernel[k];
}
dst[y * width + x] = val;
}
__global__ void gaussian_blur_v_kernel(const float* src, float* dst, int width, int height,
const float* kernel, int kernel_size, int radius) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height) return;
float val = 0.0f;
for (int k = 0; k < kernel_size; k++) {
int py = y + k - radius;
if (py < 0) py = 0;
if (py >= height) py = height - 1;
val += src[py * width + x] * kernel[k];
}
dst[y * width + x] = val;
}
__global__ void downsample_kernel(const float* src, float* dst, int srcW, int srcH, int dstW, int dstH) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= dstW || y >= dstH) return;
dst[y * dstW + x] = src[(y * 2) * srcW + (x * 2)];
}
__global__ void subtract_kernel(const float* a, const float* b, float* dst, int width, int height) {
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= width || y >= height) return;
int idx = y * width + x;
dst[idx] = a[idx] - b[idx];
}
static void check_cuda(cudaError_t err, const char* msg) {
if (err != cudaSuccess) {
fprintf(stderr, "CUDA error (%s): %s\n", msg, cudaGetErrorString(err));
exit(1);
}
}
GrayImage* image_ops_gaussian_blur(const GrayImage* src, double sigma) {
if (sigma <= 0.01) {
return gray_image_clone(src);
}
int size = (int)ceil(sigma * 6.0);
if (size % 2 == 0) size++;
int radius = size / 2;
float* h_kernel = (float*)malloc(size * sizeof(float));
double sum = 0.0;
for (int i = 0; i < size; i++) {
double x = i - radius;
h_kernel[i] = (float)exp(-(x * x) / (2.0 * sigma * sigma));
sum += h_kernel[i];
}
for (int i = 0; i < size; i++) {
h_kernel[i] /= (float)sum;
}
int w = src->width;
int h = src->height;
int num_pixels = w * h;
float *d_src, *d_tmp, *d_dst, *d_kernel;
check_cuda(cudaMalloc(&d_src, num_pixels * sizeof(float)), "cudaMalloc d_src");
check_cuda(cudaMalloc(&d_tmp, num_pixels * sizeof(float)), "cudaMalloc d_tmp");
check_cuda(cudaMalloc(&d_dst, num_pixels * sizeof(float)), "cudaMalloc d_dst");
check_cuda(cudaMalloc(&d_kernel, size * sizeof(float)), "cudaMalloc d_kernel");
check_cuda(cudaMemcpy(d_src, src->data, num_pixels * sizeof(float), cudaMemcpyHostToDevice), "memcpy src");
check_cuda(cudaMemcpy(d_kernel, h_kernel, size * sizeof(float), cudaMemcpyHostToDevice), "memcpy kernel");
dim3 block(16, 16);
dim3 grid((w + block.x - 1) / block.x, (h + block.y - 1) / block.y);
gaussian_blur_h_kernel<<<grid, block>>>(d_src, d_tmp, w, h, d_kernel, size, radius);
check_cuda(cudaGetLastError(), "gaussian_blur_h_kernel");
gaussian_blur_v_kernel<<<grid, block>>>(d_tmp, d_dst, w, h, d_kernel, size, radius);
check_cuda(cudaGetLastError(), "gaussian_blur_v_kernel");
GrayImage* dst = gray_image_create(w, h);
check_cuda(cudaMemcpy(dst->data, d_dst, num_pixels * sizeof(float), cudaMemcpyDeviceToHost), "memcpy dst");
cudaFree(d_src);
cudaFree(d_tmp);
cudaFree(d_dst);
cudaFree(d_kernel);
free(h_kernel);
return dst;
}
GrayImage* image_ops_downsample(const GrayImage* src) {
int newW = src->width / 2;
int newH = src->height / 2;
GrayImage* dst = gray_image_create(newW, newH);
float *d_src, *d_dst;
check_cuda(cudaMalloc(&d_src, src->width * src->height * sizeof(float)), "cudaMalloc d_src");
check_cuda(cudaMalloc(&d_dst, newW * newH * sizeof(float)), "cudaMalloc d_dst");
check_cuda(cudaMemcpy(d_src, src->data, src->width * src->height * sizeof(float), cudaMemcpyHostToDevice), "memcpy src");
dim3 block(16, 16);
dim3 grid((newW + block.x - 1) / block.x, (newH + block.y - 1) / block.y);
downsample_kernel<<<grid, block>>>(d_src, d_dst, src->width, src->height, newW, newH);
check_cuda(cudaGetLastError(), "downsample_kernel");
check_cuda(cudaMemcpy(dst->data, d_dst, newW * newH * sizeof(float), cudaMemcpyDeviceToHost), "memcpy dst");
cudaFree(d_src);
cudaFree(d_dst);
return dst;
}
GrayImage* image_ops_subtract(const GrayImage* a, const GrayImage* b) {
int w = a->width;
int h = a->height;
GrayImage* dst = gray_image_create(w, h);
float *d_a, *d_b, *d_dst;
check_cuda(cudaMalloc(&d_a, w * h * sizeof(float)), "cudaMalloc d_a");
check_cuda(cudaMalloc(&d_b, w * h * sizeof(float)), "cudaMalloc d_b");
check_cuda(cudaMalloc(&d_dst, w * h * sizeof(float)), "cudaMalloc d_dst");
check_cuda(cudaMemcpy(d_a, a->data, w * h * sizeof(float), cudaMemcpyHostToDevice), "memcpy a");
check_cuda(cudaMemcpy(d_b, b->data, w * h * sizeof(float), cudaMemcpyHostToDevice), "memcpy b");
dim3 block(16, 16);
dim3 grid((w + block.x - 1) / block.x, (h + block.y - 1) / block.y);
subtract_kernel<<<grid, block>>>(d_a, d_b, d_dst, w, h);
check_cuda(cudaGetLastError(), "subtract_kernel");
check_cuda(cudaMemcpy(dst->data, d_dst, w * h * sizeof(float), cudaMemcpyDeviceToHost), "memcpy dst");
cudaFree(d_a);
cudaFree(d_b);
cudaFree(d_dst);
return dst;
}
static double cubic_kernel(double x) {
x = fabs(x);
if (x <= 1.0)
return 1.5 * x * x * x - 2.5 * x * x + 1.0;
if (x <= 2.0)
return -0.5 * x * x * x + 2.5 * x * x - 4.0 * x + 2.0;
return 0.0;
}
static int clamp(int v, int min, int max) {
return v < min ? min : (v > max ? max : v);
}
static double bicubic_interpolate(const GrayImage* img, double x, double y) {
int ix = (int)floor(x);
int iy = (int)floor(y);
double dx = x - ix;
double dy = y - iy;
double sum = 0.0;
double wsum = 0.0;
for (int j = -1; j <= 2; j++) {
for (int i = -1; i <= 2; i++) {
int px = clamp(ix + i, 0, img->width - 1);
int py = clamp(iy + j, 0, img->height - 1);
double wx = cubic_kernel(dx - i);
double wy = cubic_kernel(dy - j);
double w = wx * wy;
sum += gray_image_get(img, py, px) * w;
wsum += w;
}
}
return wsum > 0.0 ? sum / wsum : 0.0;
}
GrayImage* image_ops_resize_cubic(const GrayImage* src, int newW, int newH) {
GrayImage* dst = gray_image_create(newW, newH);
double scaleX = (double)src->width / newW;
double scaleY = (double)src->height / newH;
for (int y = 0; y < newH; y++) {
for (int x = 0; x < newW; x++) {
double sx = (x + 0.5) * scaleX - 0.5;
double sy = (y + 0.5) * scaleY - 0.5;
gray_image_set(dst, y, x, (float)bicubic_interpolate(src, sx, sy));
}
}
return dst;
}
sift_algorithm.h
#ifndef SIFT_ALGORITHM_H
#define SIFT_ALGORITHM_H
#ifdef __cplusplus
extern "C" {
#endif
#include "gray_image.h"
#include "sift_types.h"
typedef struct {
Feature** data;
int size;
int capacity;
} FeatureArray;
FeatureArray* feature_array_new(void);
void feature_array_free(FeatureArray* arr);
void feature_array_push(FeatureArray* arr, Feature* feat);
void feature_array_sort_by_scale(FeatureArray* arr);
FeatureArray* sift_extract_features(const GrayImage* img);
#ifdef __cplusplus
}
#endif
#endif
sift_algorithm.cu
#include "sift_algorithm.h"
#include "image_ops.h"
#include <cuda_runtime.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#ifndef M_PI
#define M_PI 3.14159265358979323846
#endif
#define SIFT_INIT_SIGMA 0.5
#define SIFT_IMG_BORDER 5
#define SIFT_MAX_INTERP_STEPS 5
#define SIFT_ORI_HIST_BINS 36
#define SIFT_ORI_SIG_FCTR 1.5
#define SIFT_ORI_RADIUS (3.0 * SIFT_ORI_SIG_FCTR)
#define SIFT_ORI_SMOOTH_PASSES 2
#define SIFT_ORI_PEAK_RATIO 0.8
#define SIFT_DESCR_SCL_FCTR 3.0
#define SIFT_DESCR_MAG_THR 0.2
#define SIFT_INT_DESCR_FCTR 512.0
#define SIFT_INTVLS 3
#define SIFT_SIGMA 1.6
#define SIFT_CONTR_THR 0.04
#define SIFT_CURV_THR 10
#define SIFT_IMG_DBL 1
#define SIFT_DESCR_WIDTH 4
#define SIFT_DESCR_HIST_BINS 8
FeatureArray* feature_array_new(void) {
FeatureArray* arr = (FeatureArray*)malloc(sizeof(FeatureArray));
arr->data = NULL;
arr->size = 0;
arr->capacity = 0;
return arr;
}
void feature_array_free(FeatureArray* arr) {
if (!arr) return;
for (int i = 0; i < arr->size; i++) {
feature_free(arr->data[i]);
}
free(arr->data);
free(arr);
}
void feature_array_push(FeatureArray* arr, Feature* feat) {
if (arr->size >= arr->capacity) {
arr->capacity = arr->capacity == 0 ? 16 : arr->capacity * 2;
arr->data = (Feature**)realloc(arr->data, arr->capacity * sizeof(Feature*));
}
arr->data[arr->size++] = feat;
}
static int compare_feature_scale(const void* a, const void* b) {
Feature* fa = *(Feature**)a;
Feature* fb = *(Feature**)b;
if (fa->scl < fb->scl) return 1;
if (fa->scl > fb->scl) return -1;
return 0;
}
void feature_array_sort_by_scale(FeatureArray* arr) {
if (arr->size > 1) {
qsort(arr->data, arr->size, sizeof(Feature*), compare_feature_scale);
}
}
static void invert3x3(double H[3][3], double H_inv[3][3]) {
double aug[3][6];
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++) {
aug[i][j] = H[i][j];
aug[i][j + 3] = (i == j) ? 1.0 : 0.0;
}
}
for (int i = 0; i < 3; i++) {
double pivot = aug[i][i];
if (fabs(pivot) < 1e-12) {
for (int k = i + 1; k < 3; k++) {
if (fabs(aug[k][i]) > fabs(pivot)) {
for (int j = 0; j < 6; j++) {
double tmp = aug[i][j];
aug[i][j] = aug[k][j];
aug[k][j] = tmp;
}
pivot = aug[i][i];
break;
}
}
}
for (int j = 0; j < 6; j++) aug[i][j] /= pivot;
for (int k = 0; k < 3; k++) {
if (k != i) {
double factor = aug[k][i];
for (int j = 0; j < 6; j++)
aug[k][j] -= factor * aug[i][j];
}
}
}
for (int i = 0; i < 3; i++)
for (int j = 0; j < 3; j++)
H_inv[i][j] = aug[i][j + 3];
}
static GrayImage* create_init_img(const GrayImage* img, int img_dbl, double sigma) {
GrayImage* gray = gray_image_clone(img);
if (img_dbl != 0) {
float sig_diff = (float)sqrt(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA * 4.0);
GrayImage* dbl = image_ops_resize_cubic(gray, img->width * 2, img->height * 2);
gray_image_free(gray);
GrayImage* blurred = image_ops_gaussian_blur(dbl, sig_diff);
gray_image_free(dbl);
return blurred;
} else {
float sig_diff = (float)sqrt(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA);
GrayImage* blurred = image_ops_gaussian_blur(gray, sig_diff);
gray_image_free(gray);
return blurred;
}
}
static GrayImage*** build_gauss_pyr(GrayImage* basepic, int octvs, int intvls, double sigma) {
GrayImage*** gauss_pyr = (GrayImage***)malloc(octvs * sizeof(GrayImage**));
for (int o = 0; o < octvs; o++) {
gauss_pyr[o] = (GrayImage**)malloc((intvls + 3) * sizeof(GrayImage*));
}
double* sig = (double*)malloc((intvls + 3) * sizeof(double));
double k = pow(2.0, 1.0 / intvls);
sig[0] = sigma;
for (int i = 1; i < intvls + 3; i++) {
double sig_prev = pow(k, i - 1) * sigma;
double sig_total = sig_prev * k;
sig[i] = sqrt(sig_total * sig_total - sig_prev * sig_prev);
}
for (int o = 0; o < octvs; o++) {
for (int i = 0; i < intvls + 3; i++) {
if (o == 0 && i == 0) {
gauss_pyr[o][i] = gray_image_clone(basepic);
} else if (i == 0) {
gauss_pyr[o][i] = image_ops_downsample(gauss_pyr[o - 1][intvls]);
} else {
gauss_pyr[o][i] = image_ops_gaussian_blur(gauss_pyr[o][i - 1], sig[i]);
}
}
}
free(sig);
return gauss_pyr;
}
static GrayImage*** build_dog_pyr(GrayImage*** gauss_pyr, int octvs, int intvls) {
GrayImage*** dog_pyr = (GrayImage***)malloc(octvs * sizeof(GrayImage**));
for (int o = 0; o < octvs; o++) {
dog_pyr[o] = (GrayImage**)malloc((intvls + 2) * sizeof(GrayImage*));
for (int i = 0; i < intvls + 2; i++) {
dog_pyr[o][i] = image_ops_subtract(gauss_pyr[o][i + 1], gauss_pyr[o][i]);
}
}
return dog_pyr;
}
static void free_pyr(GrayImage*** pyr, int octvs, int layers) {
for (int o = 0; o < octvs; o++) {
for (int i = 0; i < layers; i++) {
gray_image_free(pyr[o][i]);
}
free(pyr[o]);
}
free(pyr);
}
static int calc_grad_mag_ori(const GrayImage* img, int r, int c, double* mag, double* ori) {
if (r > 0 && r < img->height - 1 && c > 0 && c < img->width - 1) {
double dx = gray_image_get(img, r, c + 1) - gray_image_get(img, r, c - 1);
double dy = gray_image_get(img, r - 1, c) - gray_image_get(img, r + 1, c);
*mag = sqrt(dx * dx + dy * dy);
*ori = atan2(dy, dx);
return 1;
}
*mag = 0;
*ori = 0;
return 0;
}
static double* ori_hist(const GrayImage* img, int r, int c, int n, int rad, double sigma) {
double* hist = (double*)calloc(n, sizeof(double));
double exp_denom = 2.0 * sigma * sigma;
double PI2 = M_PI * 2.0;
for (int i = -rad; i <= rad; i++) {
for (int j = -rad; j <= rad; j++) {
double mag, ori;
if (calc_grad_mag_ori(img, r + i, c + j, &mag, &ori) == 1) {
double w = exp(-(i * i + j * j) / exp_denom);
int bin = (int)round(n * (ori + M_PI) / PI2);
bin = (bin < n) ? bin : 0;
hist[bin] += w * mag;
}
}
}
return hist;
}
static void smooth_ori_hist(double* hist, int n) {
double h0 = hist[0];
double prev = hist[n - 1];
for (int i = 0; i < n; i++) {
double tmp = hist[i];
hist[i] = 0.25 * prev + 0.5 * hist[i] + 0.25 * ((i + 1 == n) ? h0 : hist[i + 1]);
prev = tmp;
}
}
static double dominant_ori(double* hist, int n) {
double omax = hist[0];
for (int i = 1; i < n; i++)
if (hist[i] > omax) omax = hist[i];
return omax;
}
static double interp_hist_peak(double l, double c, double r) {
return 0.5 * (l - r) / (l - 2.0 * c + r);
}
static void add_good_ori_features(FeatureArray* features, double* hist, int n,
double mag_thr, Feature* feat) {
double PI2 = M_PI * 2.0;
for (int i = 0; i < n; i++) {
int l = (i == 0) ? n - 1 : i - 1;
int r_idx = (i + 1) % n;
if (hist[i] > hist[l] && hist[i] > hist[r_idx] && hist[i] >= mag_thr) {
double bin = i + interp_hist_peak(hist[l], hist[i], hist[r_idx]);
bin = (bin < 0) ? n + bin : (bin >= n) ? bin - n : bin;
Feature* new_feat = feature_clone(feat);
new_feat->ori = ((PI2 * bin) / n) - M_PI;
feature_array_push(features, new_feat);
}
}
}
static void calc_feature_oris(FeatureArray* features, GrayImage*** gauss_pyr) {
int n = features->size;
for (int i = 0; i < n; i++) {
Feature* feat = features->data[0];
for (int j = 0; j < features->size - 1; j++) {
features->data[j] = features->data[j + 1];
}
features->size--;
DetectionData ddata = feat->feature_data;
double* hist = ori_hist(gauss_pyr[ddata.octv][ddata.intvl],
ddata.r, ddata.c, SIFT_ORI_HIST_BINS,
(int)round(SIFT_ORI_RADIUS * ddata.scl_octv),
SIFT_ORI_SIG_FCTR * ddata.scl_octv);
for (int j = 0; j < SIFT_ORI_SMOOTH_PASSES; j++)
smooth_ori_hist(hist, SIFT_ORI_HIST_BINS);
double omax = dominant_ori(hist, SIFT_ORI_HIST_BINS);
add_good_ori_features(features, hist, SIFT_ORI_HIST_BINS,
omax * SIFT_ORI_PEAK_RATIO, feat);
free(hist);
feature_free(feat);
}
}
static void calc_feature_scales(FeatureArray* features, double sigma, int intvls) {
for (int i = 0; i < features->size; i++) {
Feature* feat = features->data[i];
double intvl = feat->feature_data.intvl + feat->feature_data.subintvl;
feat->scl = sigma * pow(2.0, feat->feature_data.octv + intvl / intvls);
feat->feature_data.scl_octv = sigma * pow(2.0, intvl / intvls);
}
}
static void adjust_for_img_dbl(FeatureArray* features) {
for (int i = 0; i < features->size; i++) {
Feature* feat = features->data[i];
feat->x /= 2.0;
feat->y /= 2.0;
feat->scl /= 2.0;
feat->img_pt_x = (float)(feat->img_pt_x / 2.0f);
feat->img_pt_y = (float)(feat->img_pt_y / 2.0f);
}
}
static void interp_hist_entry(float* hist, int d, int n,
double rbin, double cbin, double obin, double mag) {
int r0 = (int)floor(rbin);
int c0 = (int)floor(cbin);
int o0 = (int)floor(obin);
float d_r = (float)(rbin - r0);
float d_c = (float)(cbin - c0);
float d_o = (float)(obin - o0);
for (int r = 0; r <= 1; r++) {
int rb = r0 + r;
if (rb >= 0 && rb < d) {
float v_r = (float)mag * ((r == 0) ? 1.0f - d_r : d_r);
for (int c = 0; c <= 1; c++) {
int cb = c0 + c;
if (cb >= 0 && cb < d) {
float v_c = v_r * ((c == 0) ? 1.0f - d_c : d_c);
for (int o = 0; o <= 1; o++) {
int ob = (o0 + o) % n;
float v_o = v_c * ((o == 0) ? 1.0f - d_o : d_o);
hist[((rb * d) + cb) * n + ob] += v_o;
}
}
}
}
}
}
static float* descr_hist(const GrayImage* img, int r, int c, double ori,
double scl, int d, int n) {
float* hist = (float*)calloc(d * d * n, sizeof(float));
double cos_t = cos(ori);
double sin_t = sin(ori);
double bins_per_rad = n / (2.0 * M_PI);
double exp_denom = d * d * 0.5;
double hist_width = SIFT_DESCR_SCL_FCTR * scl;
int radius = (int)(hist_width * sqrt(2.0) * (d + 1.0) * 0.5 + 0.5);
for (int i = -radius; i <= radius; i++) {
for (int j = -radius; j <= radius; j++) {
double c_rot = (j * cos_t - i * sin_t) / hist_width;
double r_rot = (j * sin_t + i * cos_t) / hist_width;
double rbin = r_rot + d / 2.0 - 0.5;
double cbin = c_rot + d / 2.0 - 0.5;
if (rbin > -1.0 && rbin < d && cbin > -1.0 && cbin < d) {
double grad_mag, grad_ori;
if (calc_grad_mag_ori(img, r + i, c + j, &grad_mag, &grad_ori) != 0) {
grad_ori -= ori;
while (grad_ori < 0.0) grad_ori += 2.0 * M_PI;
while (grad_ori >= 2.0 * M_PI) grad_ori -= 2.0 * M_PI;
double obin = grad_ori * bins_per_rad;
double w = exp(-(c_rot * c_rot + r_rot * r_rot) / exp_denom);
interp_hist_entry(hist, d, n, rbin, cbin, obin, grad_mag * w);
}
}
}
}
return hist;
}
static void normalize_descr(Feature* feat) {
double len_sq = 0.0;
for (int i = 0; i < feat->d; i++) {
double cur = feat->descr[i];
len_sq += cur * cur;
}
double len_inv = 1.0 / sqrt(len_sq);
for (int i = 0; i < feat->d; i++)
feat->descr[i] *= len_inv;
}
static void hist_to_descr(float* hist, int d, int n, Feature* feat) {
int k = 0;
for (int r = 0; r < d; r++)
for (int c = 0; c < d; c++)
for (int o = 0; o < n; o++)
feat->descr[k++] = hist[((r * d) + c) * n + o];
feat->d = k;
normalize_descr(feat);
for (int i = 0; i < k; i++)
if (feat->descr[i] > SIFT_DESCR_MAG_THR)
feat->descr[i] = SIFT_DESCR_MAG_THR;
normalize_descr(feat);
for (int i = 0; i < k; i++) {
int int_val = (int)(SIFT_INT_DESCR_FCTR * feat->descr[i]);
feat->descr[i] = int_val < 255 ? int_val : 255;
}
}
static void compute_descriptors(FeatureArray* features, GrayImage*** gauss_pyr, int d, int n) {
for (int i = 0; i < features->size; i++) {
Feature* feat = features->data[i];
DetectionData ddata = feat->feature_data;
float* hist = descr_hist(gauss_pyr[ddata.octv][ddata.intvl], ddata.r,
ddata.c, feat->ori, ddata.scl_octv, d, n);
hist_to_descr(hist, d, n, feat);
free(hist);
}
}
static int is_extremum(GrayImage*** dog_pyr, int octv, int intvl, int r, int c) {
float val = gray_image_get(dog_pyr[octv][intvl], r, c);
if (val > 0) {
for (int i = -1; i <= 1; i++)
for (int j = -1; j <= 1; j++)
for (int k = -1; k <= 1; k++)
if (val < gray_image_get(dog_pyr[octv][intvl + i], r + j, c + k))
return 0;
} else {
for (int i = -1; i <= 1; i++)
for (int j = -1; j <= 1; j++)
for (int k = -1; k <= 1; k++)
if (val > gray_image_get(dog_pyr[octv][intvl + i], r + j, c + k))
return 0;
}
return 1;
}
static void deriv_3D(GrayImage*** dog_pyr, int octv, int intvl, int r, int c, double dI[3]) {
dI[0] = (gray_image_get(dog_pyr[octv][intvl], r, c + 1) -
gray_image_get(dog_pyr[octv][intvl], r, c - 1)) / 2.0;
dI[1] = (gray_image_get(dog_pyr[octv][intvl], r + 1, c) -
gray_image_get(dog_pyr[octv][intvl], r - 1, c)) / 2.0;
dI[2] = (gray_image_get(dog_pyr[octv][intvl + 1], r, c) -
gray_image_get(dog_pyr[octv][intvl - 1], r, c)) / 2.0;
}
static void hessian_3D(GrayImage*** dog_pyr, int octv, int intvl, int r, int c, double H[3][3]) {
double v = gray_image_get(dog_pyr[octv][intvl], r, c);
double dxx = gray_image_get(dog_pyr[octv][intvl], r, c + 1) +
gray_image_get(dog_pyr[octv][intvl], r, c - 1) - 2 * v;
double dyy = gray_image_get(dog_pyr[octv][intvl], r + 1, c) +
gray_image_get(dog_pyr[octv][intvl], r - 1, c) - 2 * v;
double dss = gray_image_get(dog_pyr[octv][intvl + 1], r, c) +
gray_image_get(dog_pyr[octv][intvl - 1], r, c) - 2 * v;
double dxy = (gray_image_get(dog_pyr[octv][intvl], r + 1, c + 1) -
gray_image_get(dog_pyr[octv][intvl], r + 1, c - 1) -
gray_image_get(dog_pyr[octv][intvl], r - 1, c + 1) +
gray_image_get(dog_pyr[octv][intvl], r - 1, c - 1)) / 4.0;
double dxs = (gray_image_get(dog_pyr[octv][intvl + 1], r, c + 1) -
gray_image_get(dog_pyr[octv][intvl + 1], r, c - 1) -
gray_image_get(dog_pyr[octv][intvl - 1], r, c + 1) +
gray_image_get(dog_pyr[octv][intvl - 1], r, c - 1)) / 4.0;
double dys = (gray_image_get(dog_pyr[octv][intvl + 1], r + 1, c) -
gray_image_get(dog_pyr[octv][intvl + 1], r - 1, c) -
gray_image_get(dog_pyr[octv][intvl - 1], r + 1, c) +
gray_image_get(dog_pyr[octv][intvl - 1], r - 1, c)) / 4.0;
H[0][0] = dxx; H[0][1] = dxy; H[0][2] = dxs;
H[1][0] = dxy; H[1][1] = dyy; H[1][2] = dys;
H[2][0] = dxs; H[2][1] = dys; H[2][2] = dss;
}
static void interp_step(GrayImage*** dog_pyr, int octv, int intvl, int r, int c,
double* xi, double* xr, double* xc) {
double dD[3];
deriv_3D(dog_pyr, octv, intvl, r, c, dD);
double H[3][3];
hessian_3D(dog_pyr, octv, intvl, r, c, H);
double H_inv[3][3];
invert3x3(H, H_inv);
double x[3] = {0, 0, 0};
for (int i = 0; i < 3; i++) {
for (int j = 0; j < 3; j++)
x[i] -= H_inv[i][j] * dD[j];
}
*xc = x[0];
*xr = x[1];
*xi = x[2];
}
static double interp_contr(GrayImage*** dog_pyr, int octv, int intvl, int r,
int c, double xi, double xr, double xc) {
double dD[3];
deriv_3D(dog_pyr, octv, intvl, r, c, dD);
double t = dD[0] * xc + dD[1] * xr + dD[2] * xi;
return gray_image_get(dog_pyr[octv][intvl], r, c) + t * 0.5;
}
static Feature* new_feature(void) {
Feature* feat = feature_new();
feat->type = FEATURE_LOWE;
return feat;
}
static Feature* interp_extremum(GrayImage*** dog_pyr, int octv, int intvl,
int r, int c, int intvls, double contr_thr) {
double xi = 0, xr = 0, xc = 0, contr;
int i = 0;
while (i < SIFT_MAX_INTERP_STEPS) {
interp_step(dog_pyr, octv, intvl, r, c, &xi, &xr, &xc);
if (fabs(xi) < 0.5 && fabs(xr) < 0.5 && fabs(xc) < 0.5)
break;
c += (int)round(xc);
r += (int)round(xr);
intvl += (int)round(xi);
if (intvl < 1 || intvl > intvls ||
c < SIFT_IMG_BORDER || r < SIFT_IMG_BORDER ||
c >= dog_pyr[octv][0]->width - SIFT_IMG_BORDER ||
r >= dog_pyr[octv][0]->height - SIFT_IMG_BORDER) {
return NULL;
}
i++;
}
if (i >= SIFT_MAX_INTERP_STEPS)
return NULL;
contr = interp_contr(dog_pyr, octv, intvl, r, c, xi, xr, xc);
if (fabs(contr) < contr_thr / intvls)
return NULL;
Feature* feat = new_feature();
DetectionData ddata = feat->feature_data;
feat->img_pt_x = (float)((feat->x = (c + xc) * pow(2.0, octv)));
feat->img_pt_y = (float)((feat->y = (r + xr) * pow(2.0, octv)));
ddata.r = r;
ddata.c = c;
ddata.octv = octv;
ddata.intvl = intvl;
ddata.subintvl = xi;
feat->feature_data = ddata;
return feat;
}
static int is_too_edge_like(const GrayImage* dog_img, int r, int c, int curv_thr) {
if (c == 0 || r == 0) return 1;
double d = gray_image_get(dog_img, r, c);
double dxx = gray_image_get(dog_img, r, c + 1) + gray_image_get(dog_img, r, c - 1) - 2 * d;
double dyy = gray_image_get(dog_img, r + 1, c) + gray_image_get(dog_img, r - 1, c) - 2 * d;
double dxy = (gray_image_get(dog_img, r + 1, c + 1) - gray_image_get(dog_img, r + 1, c - 1) -
gray_image_get(dog_img, r - 1, c + 1) + gray_image_get(dog_img, r - 1, c - 1)) / 4.0;
double tr = dxx + dyy;
double det = dxx * dyy - dxy * dxy;
if (det <= 0) return 1;
if (tr * tr / det < (curv_thr + 1.0) * (curv_thr + 1.0) / curv_thr)
return 0;
return 1;
}
static FeatureArray* scale_space_extrema(GrayImage*** dog_pyr, int octvs, int intvls,
double contr_thr, int curv_thr) {
FeatureArray* features = feature_array_new();
double prelim_contr_thr = 0.5 * contr_thr / intvls;
for (int o = 0; o < octvs; o++) {
for (int i = 1; i <= intvls; i++) {
for (int r = SIFT_IMG_BORDER; r < dog_pyr[o][0]->height - SIFT_IMG_BORDER; r++) {
for (int c = SIFT_IMG_BORDER; c < dog_pyr[o][0]->width - SIFT_IMG_BORDER; c++) {
if (fabs(gray_image_get(dog_pyr[o][i], r, c)) > prelim_contr_thr) {
if (is_extremum(dog_pyr, o, i, r, c) == 1) {
Feature* feat = interp_extremum(dog_pyr, o, i, r, c, intvls, contr_thr);
if (feat != NULL) {
DetectionData ddata = feat->feature_data;
if (is_too_edge_like(dog_pyr[ddata.octv][ddata.intvl],
ddata.r, ddata.c, curv_thr) == 0) {
feature_array_push(features, feat);
} else {
feature_free(feat);
}
}
}
}
}
}
}
}
return features;
}
FeatureArray* sift_extract_features(const GrayImage* img) {
GrayImage* init_img = create_init_img(img, SIFT_IMG_DBL, SIFT_SIGMA);
int octvs = (int)(log(fmin(init_img->width, init_img->height)) / log(2.0) - 2);
if (octvs < 1) octvs = 1;
GrayImage*** gauss_pyr = build_gauss_pyr(init_img, octvs, SIFT_INTVLS, SIFT_SIGMA);
GrayImage*** dog_pyr = build_dog_pyr(gauss_pyr, octvs, SIFT_INTVLS);
FeatureArray* features = scale_space_extrema(dog_pyr, octvs, SIFT_INTVLS, SIFT_CONTR_THR, SIFT_CURV_THR);
calc_feature_scales(features, SIFT_SIGMA, SIFT_INTVLS);
if (SIFT_IMG_DBL != 0)
adjust_for_img_dbl(features);
calc_feature_oris(features, gauss_pyr);
compute_descriptors(features, gauss_pyr, SIFT_DESCR_WIDTH, SIFT_DESCR_HIST_BINS);
feature_array_sort_by_scale(features);
gray_image_free(init_img);
free_pyr(gauss_pyr, octvs, SIFT_INTVLS + 3);
free_pyr(dog_pyr, octvs, SIFT_INTVLS + 2);
return features;
}
sift_matcher.h
#ifndef SIFT_MATCHER_H
#define SIFT_MATCHER_H
#ifdef __cplusplus
extern "C" {
#endif
#include "sift_types.h"
#include "sift_algorithm.h"
double sift_matcher_compute_similarity(const FeatureArray* features1, const FeatureArray* features2);
int sift_matcher_count_matches(const FeatureArray* features1, const FeatureArray* features2);
double sift_matcher_descriptor_distance(const Feature* a, const Feature* b);
#ifdef __cplusplus
}
#endif
#endif
sift_matcher.cu
#include "sift_matcher.h"
#include <cuda_runtime.h>
#include <math.h>
#include <float.h>
#include <stdlib.h>
#include <stdio.h>
__global__ void descriptor_distances_kernel(const double* descr1, const double* descr2,
double* distances, int n1, int n2, int d) {
int idx1 = blockIdx.x * blockDim.x + threadIdx.x;
if (idx1 >= n1) return;
for (int idx2 = 0; idx2 < n2; idx2++) {
double sum = 0.0;
for (int i = 0; i < d; i++) {
double diff = descr1[idx1 * d + i] - descr2[idx2 * d + i];
sum += diff * diff;
}
distances[idx1 * n2 + idx2] = sqrt(sum);
}
}
double sift_matcher_descriptor_distance(const Feature* a, const Feature* b) {
int len = (a->d < b->d) ? a->d : b->d;
if (len == 0) {
int la = FEATURE_MAX_D;
int lb = FEATURE_MAX_D;
len = (la < lb) ? la : lb;
}
double sum = 0.0;
for (int i = 0; i < len; i++) {
double diff = a->descr[i] - b->descr[i];
sum += diff * diff;
}
return sqrt(sum);
}
static void check_cuda(cudaError_t err, const char* msg) {
if (err != cudaSuccess) {
fprintf(stderr, "CUDA error (%s): %s\n", msg, cudaGetErrorString(err));
exit(1);
}
}
int sift_matcher_count_matches(const FeatureArray* features1, const FeatureArray* features2) {
if (!features1 || !features2 || features1->size == 0 || features2->size == 0)
return 0;
int n1 = features1->size;
int n2 = features2->size;
int d = FEATURE_MAX_D;
double* h_descr1 = (double*)malloc(n1 * d * sizeof(double));
double* h_descr2 = (double*)malloc(n2 * d * sizeof(double));
for (int i = 0; i < n1; i++) {
memcpy(h_descr1 + i * d, features1->data[i]->descr, d * sizeof(double));
}
for (int i = 0; i < n2; i++) {
memcpy(h_descr2 + i * d, features2->data[i]->descr, d * sizeof(double));
}
double *d_descr1, *d_descr2, *d_distances;
check_cuda(cudaMalloc(&d_descr1, n1 * d * sizeof(double)), "cudaMalloc d_descr1");
check_cuda(cudaMalloc(&d_descr2, n2 * d * sizeof(double)), "cudaMalloc d_descr2");
check_cuda(cudaMalloc(&d_distances, n1 * n2 * sizeof(double)), "cudaMalloc d_distances");
check_cuda(cudaMemcpy(d_descr1, h_descr1, n1 * d * sizeof(double), cudaMemcpyHostToDevice), "memcpy descr1");
check_cuda(cudaMemcpy(d_descr2, h_descr2, n2 * d * sizeof(double), cudaMemcpyHostToDevice), "memcpy descr2");
int threadsPerBlock = 256;
int blocksPerGrid = (n1 + threadsPerBlock - 1) / threadsPerBlock;
descriptor_distances_kernel<<<blocksPerGrid, threadsPerBlock>>>(d_descr1, d_descr2, d_distances, n1, n2, d);
check_cuda(cudaGetLastError(), "descriptor_distances_kernel");
double* h_distances = (double*)malloc(n1 * n2 * sizeof(double));
check_cuda(cudaMemcpy(h_distances, d_distances, n1 * n2 * sizeof(double), cudaMemcpyDeviceToHost), "memcpy distances");
int matchCount = 0;
double ratioThresh = 0.75;
for (int i = 0; i < n1; i++) {
double bestDist = DBL_MAX;
double secondBestDist = DBL_MAX;
for (int j = 0; j < n2; j++) {
double dist = h_distances[i * n2 + j];
if (dist < bestDist) {
secondBestDist = bestDist;
bestDist = dist;
} else if (dist < secondBestDist) {
secondBestDist = dist;
}
}
if (secondBestDist > 0 && bestDist / secondBestDist < ratioThresh) {
matchCount++;
}
}
free(h_descr1);
free(h_descr2);
free(h_distances);
cudaFree(d_descr1);
cudaFree(d_descr2);
cudaFree(d_distances);
return matchCount;
}
double sift_matcher_compute_similarity(const FeatureArray* features1, const FeatureArray* features2) {
if (!features1 || !features2 || features1->size == 0 || features2->size == 0)
return 0.0;
int matches = sift_matcher_count_matches(features1, features2);
int minFeatures = (features1->size < features2->size) ? features1->size : features2->size;
if (minFeatures == 0)
return 0.0;
double ratio = (double)matches / minFeatures;
double score = 1.0 - exp(-3.0 * ratio);
if (score < 0.0) score = 0.0;
if (score > 1.0) score = 1.0;
return score;
}
image_similarity.h
#ifndef IMAGE_SIMILARITY_H
#define IMAGE_SIMILARITY_H
#ifdef __cplusplus
extern "C" {
#endif
#include "gray_image.h"
#include "sift_algorithm.h"
typedef struct TemplateCacheEntry {
char* label;
FeatureArray* features;
struct TemplateCacheEntry* next;
} TemplateCacheEntry;
typedef struct {
TemplateCacheEntry** buckets;
int bucket_count;
} TemplateCache;
typedef struct {
TemplateCache* cache;
} ImageSimilarityEvaluator;
ImageSimilarityEvaluator* evaluator_new(void);
void evaluator_free(ImageSimilarityEvaluator* eval);
double evaluator_evaluate(ImageSimilarityEvaluator* eval, const GrayImage* currentImage,
const char* label, const GrayImage* correctImage);
void evaluator_preload_template(ImageSimilarityEvaluator* eval, const char* label,
const GrayImage* correctImage);
void evaluator_clear_cache(ImageSimilarityEvaluator* eval, const char* label);
void evaluator_clear_all_cache(ImageSimilarityEvaluator* eval);
#ifdef __cplusplus
}
#endif
#endif
image_similarity.cu
#include "image_similarity.h"
#include "sift_matcher.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#ifdef _WIN32
#include <string.h>
#define strcasecmp _stricmp
#define strdup _strdup
#else
#include <strings.h>
#endif
static unsigned int hash_string(const char* str) {
unsigned int hash = 5381;
int c;
while ((c = *str++))
hash = ((hash << 5) + hash) + c;
return hash;
}
static TemplateCache* template_cache_new(void) {
TemplateCache* cache = (TemplateCache*)malloc(sizeof(TemplateCache));
cache->bucket_count = 64;
cache->buckets = (TemplateCacheEntry**)calloc(cache->bucket_count, sizeof(TemplateCacheEntry*));
return cache;
}
static void template_cache_free(TemplateCache* cache) {
if (!cache) return;
for (int i = 0; i < cache->bucket_count; i++) {
TemplateCacheEntry* entry = cache->buckets[i];
while (entry) {
TemplateCacheEntry* next = entry->next;
free(entry->label);
feature_array_free(entry->features);
free(entry);
entry = next;
}
}
free(cache->buckets);
free(cache);
}
static FeatureArray* template_cache_get(TemplateCache* cache, const char* label) {
unsigned int h = hash_string(label) % cache->bucket_count;
TemplateCacheEntry* entry = cache->buckets[h];
while (entry) {
if (strcasecmp(entry->label, label) == 0)
return entry->features;
entry = entry->next;
}
return NULL;
}
static void template_cache_set(TemplateCache* cache, const char* label, FeatureArray* features) {
unsigned int h = hash_string(label) % cache->bucket_count;
TemplateCacheEntry* entry = cache->buckets[h];
while (entry) {
if (strcasecmp(entry->label, label) == 0) {
feature_array_free(entry->features);
entry->features = features;
return;
}
entry = entry->next;
}
entry = (TemplateCacheEntry*)malloc(sizeof(TemplateCacheEntry));
entry->label = strdup(label);
entry->features = features;
entry->next = cache->buckets[h];
cache->buckets[h] = entry;
}
static void template_cache_remove(TemplateCache* cache, const char* label) {
unsigned int h = hash_string(label) % cache->bucket_count;
TemplateCacheEntry** p = &cache->buckets[h];
while (*p) {
if (strcasecmp((*p)->label, label) == 0) {
TemplateCacheEntry* to_remove = *p;
*p = (*p)->next;
free(to_remove->label);
feature_array_free(to_remove->features);
free(to_remove);
return;
}
p = &(*p)->next;
}
}
static FeatureArray* extract_features(const GrayImage* img) {
return sift_extract_features(img);
}
ImageSimilarityEvaluator* evaluator_new(void) {
ImageSimilarityEvaluator* eval = (ImageSimilarityEvaluator*)malloc(sizeof(ImageSimilarityEvaluator));
eval->cache = template_cache_new();
return eval;
}
void evaluator_free(ImageSimilarityEvaluator* eval) {
if (!eval) return;
template_cache_free(eval->cache);
free(eval);
}
double evaluator_evaluate(ImageSimilarityEvaluator* eval, const GrayImage* currentImage,
const char* label, const GrayImage* correctImage) {
if (!currentImage || !label || strlen(label) == 0)
return 0.0;
FeatureArray* templateFeatures = NULL;
if (correctImage != NULL) {
templateFeatures = extract_features(correctImage);
template_cache_set(eval->cache, label, templateFeatures);
} else {
templateFeatures = template_cache_get(eval->cache, label);
}
if (!templateFeatures)
return 0.0;
FeatureArray* currentFeatures = extract_features(currentImage);
if (currentFeatures->size == 0 || templateFeatures->size == 0) {
feature_array_free(currentFeatures);
return 0.0;
}
double similarity = sift_matcher_compute_similarity(currentFeatures, templateFeatures);
feature_array_free(currentFeatures);
return similarity;
}
void evaluator_preload_template(ImageSimilarityEvaluator* eval, const char* label,
const GrayImage* correctImage) {
if (!correctImage || !label || strlen(label) == 0)
return;
FeatureArray* features = extract_features(correctImage);
template_cache_set(eval->cache, label, features);
}
void evaluator_clear_cache(ImageSimilarityEvaluator* eval, const char* label) {
if (!label) return;
template_cache_remove(eval->cache, label);
}
void evaluator_clear_all_cache(ImageSimilarityEvaluator* eval) {
template_cache_free(eval->cache);
eval->cache = template_cache_new();
}
test_sift.cpp
#define STB_IMAGE_IMPLEMENTATION
#include "stb_image/stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image/stb_image_write.h"
#include "gray_image.h"
#include "sift_algorithm.h"
#include "sift_matcher.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <time.h>
#include <float.h>
#define FEATURE_MAX_D 128
typedef struct {
int idx1;
int idx2;
double dist;
} MatchPair;
static GrayImage* load_gray_image(const char* path) {
int w, h, channels;
unsigned char* pixels = stbi_load(path, &w, &h, &channels, 0);
if (!pixels) {
fprintf(stderr, "Failed to load image: %s\n", path);
return NULL;
}
GrayImage* img = gray_image_create(w, h);
if (!img) {
stbi_image_free(pixels);
return NULL;
}
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
int idx = (y * w + x) * channels;
float gray = 0.0f;
if (channels >= 3) {
float r = pixels[idx + 0] / 255.0f;
float g = pixels[idx + 1] / 255.0f;
float b = pixels[idx + 2] / 255.0f;
gray = r * 0.299f + g * 0.587f + b * 0.114f;
} else if (channels == 1) {
gray = pixels[idx] / 255.0f;
}
gray_image_set(img, y, x, gray);
}
}
stbi_image_free(pixels);
return img;
}
static double descriptor_distance(const Feature* a, const Feature* b) {
int len = FEATURE_MAX_D;
double sum = 0.0;
for (int i = 0; i < len; i++) {
double diff = a->descr[i] - b->descr[i];
sum += diff * diff;
}
return sqrt(sum);
}
static int match_features(const FeatureArray* f1, const FeatureArray* f2, MatchPair** out_matches) {
if (!f1 || !f2 || f1->size == 0 || f2->size == 0) return 0;
int n1 = f1->size;
int n2 = f2->size;
double ratioThresh = 0.75;
MatchPair* matches = (MatchPair*)malloc(n1 * sizeof(MatchPair));
int match_count = 0;
for (int i = 0; i < n1; i++) {
double bestDist = DBL_MAX;
double secondBestDist = DBL_MAX;
int bestIdx = -1;
for (int j = 0; j < n2; j++) {
double dist = descriptor_distance(f1->data[i], f2->data[j]);
if (dist < bestDist) {
secondBestDist = bestDist;
bestDist = dist;
bestIdx = j;
} else if (dist < secondBestDist) {
secondBestDist = dist;
}
}
if (secondBestDist > 0 && bestDist / secondBestDist < ratioThresh && bestIdx >= 0) {
matches[match_count].idx1 = i;
matches[match_count].idx2 = bestIdx;
matches[match_count].dist = bestDist;
match_count++;
}
}
*out_matches = matches;
return match_count;
}
static void set_pixel(unsigned char* img, int w, int h, int x, int y, unsigned char r, unsigned char g, unsigned char b) {
if (x < 0 || x >= w || y < 0 || y >= h) return;
int idx = (y * w + x) * 3;
img[idx + 0] = r;
img[idx + 1] = g;
img[idx + 2] = b;
}
static void draw_line(unsigned char* img, int w, int h, int x0, int y0, int x1, int y1, unsigned char r, unsigned char g, unsigned char b) {
int dx = abs(x1 - x0);
int dy = abs(y1 - y0);
int sx = (x0 < x1) ? 1 : -1;
int sy = (y0 < y1) ? 1 : -1;
int err = dx - dy;
while (1) {
set_pixel(img, w, h, x0, y0, r, g, b);
if (x0 == x1 && y0 == y1) break;
int e2 = 2 * err;
if (e2 > -dy) {
err -= dy;
x0 += sx;
}
if (e2 < dx) {
err += dx;
y0 += sy;
}
}
}
static void draw_circle(unsigned char* img, int w, int h, int cx, int cy, int radius, unsigned char r, unsigned char g, unsigned char b) {
int x = radius;
int y = 0;
int err = 0;
while (x >= y) {
set_pixel(img, w, h, cx + x, cy + y, r, g, b);
set_pixel(img, w, h, cx + y, cy + x, r, g, b);
set_pixel(img, w, h, cx - y, cy + x, r, g, b);
set_pixel(img, w, h, cx - x, cy + y, r, g, b);
set_pixel(img, w, h, cx - x, cy - y, r, g, b);
set_pixel(img, w, h, cx - y, cy - x, r, g, b);
set_pixel(img, w, h, cx + y, cy - x, r, g, b);
set_pixel(img, w, h, cx + x, cy - y, r, g, b);
if (err <= 0) {
y += 1;
err += 2 * y + 1;
}
if (err > 0) {
x -= 1;
err -= 2 * x + 1;
}
}
}
static unsigned char* create_side_by_side(const unsigned char* img1, int w1, int h1,
const unsigned char* img2, int w2, int h2,
int* out_w, int* out_h) {
int max_h = (h1 > h2) ? h1 : h2;
int out_w_val = w1 + w2;
int out_h_val = max_h;
*out_w = out_w_val;
*out_h = out_h_val;
unsigned char* out = (unsigned char*)calloc(out_w_val * out_h_val * 3, sizeof(unsigned char));
if (!out) return NULL;
for (int i = 0; i < out_w_val * out_h_val * 3; i++) {
out[i] = 30;
}
for (int y = 0; y < h1; y++) {
for (int x = 0; x < w1; x++) {
int src_idx = (y * w1 + x) * 3;
int dst_idx = (y * out_w_val + x) * 3;
out[dst_idx + 0] = img1[src_idx + 0];
out[dst_idx + 1] = img1[src_idx + 1];
out[dst_idx + 2] = img1[src_idx + 2];
}
}
for (int y = 0; y < h2; y++) {
for (int x = 0; x < w2; x++) {
int src_idx = (y * w2 + x) * 3;
int dst_idx = (y * out_w_val + (x + w1)) * 3;
out[dst_idx + 0] = img2[src_idx + 0];
out[dst_idx + 1] = img2[src_idx + 1];
out[dst_idx + 2] = img2[src_idx + 2];
}
}
return out;
}
static unsigned char* rgb_from_gray(const GrayImage* gray) {
int w = gray->width;
int h = gray->height;
unsigned char* rgb = (unsigned char*)malloc(w * h * 3);
if (!rgb) return NULL;
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
unsigned char val = (unsigned char)(gray_image_get(gray, y, x) * 255.0f);
int idx = (y * w + x) * 3;
rgb[idx + 0] = val;
rgb[idx + 1] = val;
rgb[idx + 2] = val;
}
}
return rgb;
}
int main(int argc, char** argv) {
if (argc < 3) {
printf("Usage: %s <image1> <image2> [output.jpg]\n", argv[0]);
return 1;
}
const char* img1_path = argv[1];
const char* img2_path = argv[2];
const char* output_path = (argc >= 4) ? argv[3] : "sift_match_result.jpg";
printf("Loading images...\n");
GrayImage* gray1 = load_gray_image(img1_path);
GrayImage* gray2 = load_gray_image(img2_path);
if (!gray1 || !gray2) {
fprintf(stderr, "Failed to load images\n");
return 1;
}
printf("Image1: %dx%d\n", gray1->width, gray1->height);
printf("Image2: %dx%d\n", gray2->width, gray2->height);
printf("Extracting SIFT features from image1...\n");
FeatureArray* f1 = sift_extract_features(gray1);
printf("Features1: %d\n", f1 ? f1->size : 0);
printf("Extracting SIFT features from image2...\n");
FeatureArray* f2 = sift_extract_features(gray2);
printf("Features2: %d\n", f2 ? f2->size : 0);
if (!f1 || !f2 || f1->size == 0 || f2->size == 0) {
fprintf(stderr, "No features found\n");
return 1;
}
printf("Matching features...\n");
MatchPair* matches = NULL;
int match_count = match_features(f1, f2, &matches);
printf("Matches: %d\n", match_count);
unsigned char* rgb1 = rgb_from_gray(gray1);
unsigned char* rgb2 = rgb_from_gray(gray2);
if (!rgb1 || !rgb2) {
fprintf(stderr, "Memory allocation failed\n");
return 1;
}
int out_w, out_h;
unsigned char* out = create_side_by_side(rgb1, gray1->width, gray1->height,
rgb2, gray2->width, gray2->height,
&out_w, &out_h);
if (!out) {
fprintf(stderr, "Memory allocation failed\n");
return 1;
}
unsigned char colors[][3] = {
{255, 0, 0}, {0, 255, 0}, {0, 0, 255},
{255, 255, 0}, {255, 0, 255}, {0, 255, 255},
{255, 128, 0}, {128, 255, 0}, {0, 128, 255},
{255, 0, 128}, {128, 0, 255}, {0, 255, 128}
};
int color_count = sizeof(colors) / sizeof(colors[0]);
int draw_count = (match_count > 50) ? 50 : match_count;
for (int i = 0; i < draw_count; i++) {
Feature* feat1 = f1->data[matches[i].idx1];
Feature* feat2 = f2->data[matches[i].idx2];
int x1 = (int)feat1->x;
int y1 = (int)feat1->y;
int x2 = (int)feat2->x + gray1->width;
int y2 = (int)feat2->y;
unsigned char* c = colors[i % color_count];
draw_line(out, out_w, out_h, x1, y1, x2, y2, c[0], c[1], c[2]);
draw_circle(out, out_w, out_h, x1, y1, 3, c[0], c[1], c[2]);
draw_circle(out, out_w, out_h, x2, y2, 3, c[0], c[1], c[2]);
}
printf("Saving result to %s...\n", output_path);
int success = stbi_write_jpg(output_path, out_w, out_h, 3, out, 95);
if (!success) {
fprintf(stderr, "Failed to write output image\n");
return 1;
}
printf("Done. Result saved to %s\n", output_path);
free(matches);
free(rgb1);
free(rgb2);
free(out);
feature_array_free(f1);
feature_array_free(f2);
gray_image_free(gray1);
gray_image_free(gray2);
return 0;
}
CudaSharpNative.cu (SIFT导出层节选)
// ==================== SIFT 模块导出接口 ====================
#include "gray_image.h"
#include "sift_algorithm.h"
#include "sift_matcher.h"
#include "image_similarity.h"
static GrayImage* load_gray_from_memory(const unsigned char* data, int size) {
int w, h, channels;
unsigned char* pixels = stbi_load_from_memory(data, size, &w, &h, &channels, 0);
if (!pixels) return nullptr;
GrayImage* img = gray_image_create(w, h);
if (!img) { stbi_image_free(pixels); return nullptr; }
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
int idx = (y * w + x) * channels;
float gray = 0.0f;
if (channels >= 3) {
float r = pixels[idx + 0] / 255.0f;
float g = pixels[idx + 1] / 255.0f;
float b = pixels[idx + 2] / 255.0f;
gray = r * 0.299f + g * 0.587f + b * 0.114f;
} else if (channels == 1) {
gray = pixels[idx] / 255.0f;
}
gray_image_set(img, y, x, gray);
}
}
stbi_image_free(pixels);
return img;
}
DLLEXPORT double CompareImagesSift(const unsigned char* data1, int size1,
const unsigned char* data2, int size2) {
if (!data1 || size1 <= 0 || !data2 || size2 <= 0) {
lastError = "Invalid arguments";
return -1.0;
}
GrayImage* img1 = load_gray_from_memory(data1, size1);
if (!img1) { lastError = std::string("Failed to decode image 1: ") + stbi_failure_reason(); return -1.0; }
GrayImage* img2 = load_gray_from_memory(data2, size2);
if (!img2) { lastError = std::string("Failed to decode image 2: ") + stbi_failure_reason(); gray_image_free(img1); return -1.0; }
FeatureArray* f1 = sift_extract_features(img1);
FeatureArray* f2 = sift_extract_features(img2);
double similarity = sift_matcher_compute_similarity(f1, f2);
feature_array_free(f1);
feature_array_free(f2);
gray_image_free(img1);
gray_image_free(img2);
return similarity;
}
DLLEXPORT int CountMatchesSift(const unsigned char* data1, int size1,
const unsigned char* data2, int size2) {
if (!data1 || size1 <= 0 || !data2 || size2 <= 0) {
lastError = "Invalid arguments";
return -1;
}
GrayImage* img1 = load_gray_from_memory(data1, size1);
if (!img1) { lastError = std::string("Failed to decode image 1: ") + stbi_failure_reason(); return -1; }
GrayImage* img2 = load_gray_from_memory(data2, size2);
if (!img2) { lastError = std::string("Failed to decode image 2: ") + stbi_failure_reason(); gray_image_free(img1); return -1; }
FeatureArray* f1 = sift_extract_features(img1);
FeatureArray* f2 = sift_extract_features(img2);
int matches = sift_matcher_count_matches(f1, f2);
feature_array_free(f1);
feature_array_free(f2);
gray_image_free(img1);
gray_image_free(img2);
return matches;
}
DLLEXPORT void* EvaluatorNew() {
return evaluator_new();
}
DLLEXPORT void EvaluatorFree(void* handle) {
if (handle) evaluator_free(static_cast<ImageSimilarityEvaluator*>(handle));
}
DLLEXPORT double EvaluatorEvaluate(void* handle,
const unsigned char* currentData, int currentSize,
const char* label,
const unsigned char* correctData, int correctSize) {
if (!handle || !currentData || currentSize <= 0 || !label) {
lastError = "Invalid arguments";
return -1.0;
}
GrayImage* currentImg = load_gray_from_memory(currentData, currentSize);
if (!currentImg) { lastError = std::string("Failed to decode current image: ") + stbi_failure_reason(); return -1.0; }
GrayImage* correctImg = nullptr;
if (correctData && correctSize > 0) {
correctImg = load_gray_from_memory(correctData, correctSize);
if (!correctImg) { lastError = std::string("Failed to decode correct image: ") + stbi_failure_reason(); gray_image_free(currentImg); return -1.0; }
}
double result = evaluator_evaluate(static_cast<ImageSimilarityEvaluator*>(handle),
currentImg, label, correctImg);
gray_image_free(currentImg);
gray_image_free(correctImg);
return result;
}
DLLEXPORT void EvaluatorPreloadTemplate(void* handle, const char* label,
const unsigned char* correctData, int correctSize) {
if (!handle || !label || !correctData || correctSize <= 0) return;
GrayImage* correctImg = load_gray_from_memory(correctData, correctSize);
if (!correctImg) return;
evaluator_preload_template(static_cast<ImageSimilarityEvaluator*>(handle), label, correctImg);
gray_image_free(correctImg);
}
CudaBinarizeLib.cs (SIFT部分)
using System;
using System.Runtime.InteropServices;
namespace CudaSharp
{
public class CudaImageSimilarityEvaluator : IDisposable
{
private const string DllName = "libCudaSharpNative.so";
private IntPtr _handle;
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
private static extern IntPtr EvaluatorNew();
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
private static extern void EvaluatorFree(IntPtr handle);
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
private static extern double EvaluatorEvaluate(IntPtr handle,
byte[] currentData, int currentSize, string label,
byte[] correctData, int correctSize);
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
private static extern void EvaluatorPreloadTemplate(IntPtr handle,
string label, byte[] correctData, int correctSize);
public CudaImageSimilarityEvaluator()
{
_handle = EvaluatorNew();
}
public double Evaluate(byte[] currentImage, string label)
{
return EvaluatorEvaluate(_handle, currentImage, currentImage.Length,
label, null, 0);
}
public double Evaluate(byte[] currentImage, string label, byte[] templateImage)
{
return EvaluatorEvaluate(_handle, currentImage, currentImage.Length,
label, templateImage, templateImage.Length);
}
public void PreloadTemplate(string label, byte[] templateImage)
{
EvaluatorPreloadTemplate(_handle, label, templateImage, templateImage.Length);
}
public void Dispose()
{
if (_handle != IntPtr.Zero)
{
EvaluatorFree(_handle);
_handle = IntPtr.Zero;
}
}
}
public static class CudaSift
{
private const string DllName = "libCudaSharpNative.so";
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
private static extern double CompareImagesSift(byte[] data1, int size1,
byte[] data2, int size2);
[DllImport(DllName, CallingConvention = CallingConvention.Cdecl)]
private static extern int CountMatchesSift(byte[] data1, int size1,
byte[] data2, int size2);
public static double CompareImages(byte[] image1, byte[] image2)
=> CompareImagesSift(image1, image1.Length, image2, image2.Length);
public static int CountMatches(byte[] image1, byte[] image2)
=> CountMatchesSift(image1, image1.Length, image2, image2.Length);
}
}
build.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
CudaSharp - CUDA 动态阈值二值化 + SIFT DLL 构建脚本
"""
import os
import sys
import subprocess
from pathlib import Path
os.environ['PYTHONIOENCODING'] = 'utf-8'
CUDA_ARCHS = [
("61", "sm_61"),
("75", "sm_75"),
("86", "sm_86"),
("89", "sm_89"),
]
SOURCE_FILES = [
"CudaSharpNative.cu",
"gray_image.c",
"image_ops.cu",
"sift_types.c",
"sift_algorithm.cu",
"sift_matcher.cu",
"image_similarity.cu",
]
OUTPUT_NAME = "CudaSharpNative.dll"
BUILD_DIR = "build"
def print_header():
print("=" * 50)
print("CudaSharp - CUDA DLL 构建脚本")
print("=" * 50)
print()
def find_nvcc_linux():
nvcc = "nvcc"
try:
result = subprocess.run(["which", nvcc], capture_output=True, text=True)
if result.returncode == 0:
return result.stdout.strip()
except Exception:
pass
cuda_path = os.environ.get("CUDA_PATH", "")
if cuda_path:
nvcc_path = os.path.join(cuda_path, "bin", "nvcc")
if os.path.exists(nvcc_path):
return nvcc_path
common_paths = [
"/usr/local/cuda/bin/nvcc",
"/usr/local/cuda-12.6/bin/nvcc",
"/usr/local/cuda-12.4/bin/nvcc",
"/usr/local/cuda-12.0/bin/nvcc",
"/usr/local/cuda-11.8/bin/nvcc",
]
for p in common_paths:
if os.path.exists(p):
return p
print("[错误] 找不到 nvcc,请确保 CUDA Toolkit 已安装并加入 PATH")
sys.exit(1)
def build_linux():
build_path = Path(BUILD_DIR)
build_path.mkdir(exist_ok=True)
nvcc = find_nvcc_linux()
print(f"[信息] 使用 NVCC: {nvcc}")
try:
result = subprocess.run([nvcc, "--version"], capture_output=True, text=True)
print("[信息] NVCC 版本:")
print(result.stdout)
except Exception as e:
print(f"[错误] 无法运行 nvcc: {e}")
sys.exit(1)
output_name = "libCudaSharpNative.so"
old_so = build_path / output_name
if old_so.exists():
old_so.unlink()
gencode_args = ["-arch=native"]
cmd = [
nvcc,
*SOURCE_FILES,
"--shared",
"-o", str(build_path / output_name),
"-O3",
*gencode_args,
"--use_fast_math",
"-Xcompiler", "-fPIC",
"-I.",
"-Istb_image",
"-DNDEBUG"
]
print("[信息] 开始编译 CUDA 共享库...")
print(f"[信息] 命令: {' '.join(cmd)}")
print()
log_path = build_path / "build.log"
try:
with open(log_path, "w", encoding="utf-8") as log_file:
result = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
encoding="utf-8",
errors="ignore"
)
log_file.write(result.stdout)
print(result.stdout, end="")
if result.returncode != 0:
print()
print(f"[错误] 编译失败!详细日志见: {log_path}")
sys.exit(1)
else:
print(f"[信息] 编译日志已保存到: {log_path}")
except Exception as e:
print(f"[错误] 编译过程出错: {e}")
sys.exit(1)
print()
print("=" * 50)
print(f"[成功] CUDA 共享库编译完成!")
print(f"输出文件: {BUILD_DIR}/{output_name}")
print("=" * 50)
print()
# 编译 SIFT 测试程序
test_cpp = Path("test_sift.cpp")
if test_cpp.exists():
print("[信息] 编译 SIFT 测试程序...")
test_out = build_path / "test_sift"
test_sources = [src for src in SOURCE_FILES if src != "CudaSharpNative.cu"]
test_cmd = [
nvcc,
str(test_cpp),
*test_sources,
"-o", str(test_out),
"-O3",
*gencode_args,
"--use_fast_math",
"-Xcompiler", "-fPIC",
"-I.",
"-Istb_image",
"-DNDEBUG"
]
print(f"[信息] 命令: {' '.join(test_cmd)}")
try:
result = subprocess.run(
test_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
encoding="utf-8",
errors="ignore"
)
print(result.stdout, end="")
if result.returncode == 0:
print(f"[成功] 测试程序编译完成: {test_out}")
else:
print("[警告] 测试程序编译失败")
except Exception as e:
print(f"[警告] 测试程序编译出错: {e}")
print()
def main():
print_header()
if sys.platform == "win32":
print("[信息] 检测到 Windows 平台")
else:
print("[信息] 检测到 Linux 平台,使用 Linux 编译路径")
print()
build_linux()
if __name__ == "__main__":
main()
benchmark_sift.py
#!/usr/bin/env python3
"""
SIFT 性能基准测试:连续运行 10 次,计算平均耗时
"""
import subprocess
import time
import os
import sys
TEST_PAIRS = [
("1.jpg", "2.jpg"),
("3.jpg", "4.jpg"),
]
RUNS = 10
TEST_SIFT_PATH = "./build/test_sift"
def run_single(pair_idx, img1, img2, run_idx):
"""运行一次 SIFT 测试,返回耗时(秒)"""
output_file = f"bench_{pair_idx}_{run_idx}.jpg"
cmd = [TEST_SIFT_PATH, img1, img2, output_file]
start = time.perf_counter()
result = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True
)
elapsed = time.perf_counter() - start
if result.returncode != 0:
print(f"[错误] 运行失败: {result.stderr}")
return None
matches = 0
for line in result.stdout.splitlines():
if "Matches:" in line:
try:
matches = int(line.split(":")[1].strip())
except:
pass
return elapsed, matches, output_file
def benchmark_pair(pair_idx, img1, img2):
"""对一对图像运行 10 次测试"""
print(f"\n{'='*50}")
print(f"测试组合 {pair_idx + 1}: {img1} & {img2}")
print(f"{'='*50}")
times = []
matches_list = []
last_output = None
for i in range(RUNS):
print(f" 第 {i+1}/{RUNS} 次运行...", end=" ", flush=True)
result = run_single(pair_idx, img1, img2, i)
if result is None:
continue
elapsed, matches, output_file = result
times.append(elapsed)
matches_list.append(matches)
last_output = output_file
print(f"耗时: {elapsed*1000:.2f} ms, 匹配: {matches}")
if not times:
return None
avg_ms = sum(times) / len(times) * 1000
min_ms = min(times) * 1000
max_ms = max(times) * 1000
avg_matches = sum(matches_list) / len(matches_list)
print(f"\n 统计结果:")
print(f" 平均耗时: {avg_ms:.2f} ms")
print(f" 最小耗时: {min_ms:.2f} ms")
print(f" 最大耗时: {max_ms:.2f} ms")
print(f" 平均匹配: {avg_matches:.1f}")
return {
"pair": f"{img1} & {img2}",
"avg_ms": avg_ms,
"min_ms": min_ms,
"max_ms": max_ms,
"avg_matches": avg_matches,
"runs": len(times),
"last_output": last_output
}
def main():
print("="*50)
print("SIFT 性能基准测试")
print(f"运行次数: {RUNS} 次/组合")
print(f"测试程序: {TEST_SIFT_PATH}")
print("="*50)
if not os.path.exists(TEST_SIFT_PATH):
print(f"[错误] 找不到测试程序: {TEST_SIFT_PATH}")
sys.exit(1)
results = []
for idx, (img1, img2) in enumerate(TEST_PAIRS):
result = benchmark_pair(idx, img1, img2)
if result:
results.append(result)
# 清理临时文件
for idx, (img1, img2) in enumerate(TEST_PAIRS):
for i in range(RUNS):
f = f"bench_{idx}_{i}.jpg"
if os.path.exists(f) and f != results[idx]["last_output"]:
os.remove(f)
# 生成报告
print("\n" + "="*50)
print("测试完成!生成报告...")
print("="*50)
report_lines = [
"【SIFT 性能基准测试报告】",
"",
f"测试配置:连续运行 {RUNS} 次/组合",
"",
]
for r in results:
report_lines.append(f"组合: {r['pair']}")
report_lines.append(f" 平均耗时: {r['avg_ms']:.2f} ms")
report_lines.append(f" 最小耗时: {r['min_ms']:.2f} ms")
report_lines.append(f" 最大耗时: {r['max_ms']:.2f} ms")
report_lines.append(f" 平均匹配: {r['avg_matches']:.1f}")
report_lines.append("")
report = "\n".join(report_lines)
print(report)
with open("benchmark_report.txt", "w", encoding="utf-8") as f:
f.write(report)
print("\n报告已保存到 benchmark_report.txt")
print("结果图:")
for r in results:
print(f" {r['last_output']}")
if __name__ == "__main__":
main()
编译及测试
# 完整构建流程
python3 build.py
# SIFT 特征匹配测试
./build/test_sift <image1> <image2> [output.jpg]
# 示例
./build/test_sift 1.jpg 2.jpg sift_result.jpg
# 性能基准测试
python3 benchmark_sift.py
运行输出:
Loading images...
Image1: 800x600
Image2: 1080x900
Extracting SIFT features from image1...
Features1: 156
Extracting SIFT features from image2...
Features2: 203
Matching features...
Matches: 89
Saving result to sift_result.jpg...
Done. Result saved to sift_result.jpg
许可证
本项目使用 stb_image 库,遵循其公共领域许可证条款。

使用CUDA在GPU上加速SIFT特征提取与匹配算法, 并封装为CSharp库, 支持图像相似度评分和模板缓存.
本文为工程完整代码.
浙公网安备 33010602011771号