Hog行人检测
HOG特征向量归一化
对block块内的HOG特征向量进行归一化。对block块内特征向量的归一化主要是为了使特征向量空间对光照,阴影和边缘变化具有鲁棒性。还有归一化是针对每一个block进行的,一般采用的归一化函数有以下四种:
在人体检测系统中进行HOG计算时一般使用L2-norm,Dalal的文章也验证了对于人体检测系统使用L2-norm的时候效果最好。
关于就算直方图时用到的三线性插值
1.第一步的作用在于将图像规范化,通过两个方面GAMMA和COLOUR, GAMMA方面的话,其规范化后图像中的参量可以被直接提取出来,方便后面的操作,颜色的规范化则是去除图像中光强值同时保留颜色值,例如去除阴影或者光强变化的像素。
在低FPPW中,均方根的GAMMA压缩能够提高其表现。而LOG则起到了反作用。
2.斜率的计算直接影响识别的表现,不同的斜率计算方法在FPPW的表现上不同,总体而言,较为简单的斜率计算能够获得更好的效果,此外,对于颜色的斜率计算则是对每一个颜色通道进行独立的斜率测量,并且寻找到最标准的一个作为像素的斜率向量。求导不仅能够捕捉人物轮廓信息,也能进一步削弱光强差异。
3.这个模块的主要目的在于通过计算每个像素的权重投票,通过局部空间地区(CELL)累计投票,投票是用来反映某像素的斜率幅度的大小。
4.针对图像中前景和背景之间的信息的不同,运用归一化使得信息得以统一,通过局部空间单元的信息组成向量(最关键部分),空间中的模块是重合的,这样每个单元可以包含多个单元的信息,使得向量能够反映更多的图像信息。这可以大幅度提高图像识别能力。
5.HOG技术在人物识别窗口中共有16个像素,这大大降低了识别的错误率,而这一步就是将像素收集并整理信息。
6.将之前整理的向量送入SVM进行分级,来判断其是否是人物。
Hog.h
#pragma once
#include<vector>
#include<map>
#define PI 3.1416
typedef unsigned char BYTE;
#define GradType BYTE
#define MagType BYTE
#define FeaType double
class BlockManager
{
private:
std::map<std::pair<int, int>, double*>cache;
int level;
public:
bool find(int y, int x)
{
return cache.find(std::pair<int, int>(y, x)) != cache.end();
}
double*GetBlockData(int y, int x)
{
return cache[std::pair<int, int>(y, x)];
}
void AddBlock(int y, int x, double*data)
{
cache.insert(std::pair<std::pair<int, int>, double*>(std::pair<int, int>(y, x), data));
}
void SetLevel(const int lev)
{
level = lev;
}
void deleteBlock(int y, int x)
{
delete[]cache[std::pair<int, int>(y, x)];
cache.erase(std::pair<int, int>(y, x));
}
void deleteAllBlocks()
{
std::map<std::pair<int, int>, double*>::iterator it;
for (it = cache.begin(); it != cache.end(); it++)
{
delete[](*it).second;
cache.erase(it);
it = cache.begin();
if (cache.empty())
return;
}
}
~BlockManager()
{
deleteAllBlocks();
}
};
class Hog
{
public:
int img_width;//待检测图片的宽度
int img_height;//待检测图片的高度
int window_width;//检测窗口的宽度,64
int window_height;//检测窗口的高度,128
int CellSize;//cell的大小,设为8
int blkcell;//block尺寸是cell的几倍,2*2
int blocksize;//block的大小
int blockSkipStep;//Block在检测窗口中上下移动尺寸为8,与blocksize=16相比,
//即overlap=1/2,blockSkipStep减小到4使得overlap增加到3/4后,可使精度增加,但计算量增大
int windowSkipStep;//滑动窗口在检测图片中滑动的尺寸为8
int m_histBin;//180度分几个区间,设为9,即1个cell的梯度直方图化成9个bin
int win_fea_dim;
int xblkSkipStepNum;
int yblkSkipStepNum;
private:
int max_pyramid_height;//图像金字塔高度
int current_pyramid_height;//当前图像金字塔高度
double ratio;//缩放比例
bool isGaussianWeight;//是否使用高斯权重
BYTE*RGBdata;
BYTE*greydata;
GradType*grad;//梯度矩阵
MagType*theta;//角度矩阵
std::vector < FeaType* > windowHOGFeature;
BlockManager blockmanager;
bool GetImgData();
void Gamma();
void RGB2Grey();
double* GetBlkFeature(int offsetX, int offsetY);
double GaussianKernel(int x, int y, int cent_x, int cent_y, int Hx, int Hy);
void NextPyramid();
public:
GradType*get_grad(){ return grad; }
MagType*get_mag(){ return theta; }
void ComputeGradient();
Hog(const int winW, const int winH, const int CellSize,
const int blkcell, const int blockSkipStep, const int windowSkipStep,
const int m_histBin, const double rat) :ratio(rat),
window_width(winW), window_height(winH),
CellSize(CellSize), blkcell(blkcell), blockSkipStep(blockSkipStep),
windowSkipStep(windowSkipStep), m_histBin(m_histBin)
{
blocksize = CellSize*blkcell;
//RGBdata = new BYTE[imgw*imgh * 3];
max_pyramid_height = 0;
current_pyramid_height = 1;
int ww = img_width = 64;
int hh = img_height = 128;
while (ww >= window_width&&hh >= window_height)
{
ww = ww / 2;
hh = hh / 2;
max_pyramid_height++;
}
xblkSkipStepNum = floor((window_width - blkcell * CellSize) / blockSkipStep + 1);
yblkSkipStepNum = floor((window_height - blkcell * CellSize) / blockSkipStep + 1);
win_fea_dim = xblkSkipStepNum*yblkSkipStepNum*blkcell*blkcell*m_histBin;
_ASSERTE(max_pyramid_height >= 1);
};
int getwindow_width(){ return window_width; };
int getwindow_height(){ return window_height; };
void GetWindowFeature(const int offsetY_againstImg, const int offsetX_againstImg);
void L2Normalize(double*vec, int length);
void set_img_size(const int h, const int w){ img_height = h; img_width = w; }
void SingleScaleDetect();
void MultiScaleDetect();
void setgreyData(BYTE*src){ this->greydata = src; }
GradType*get_grad_data(){ return grad; }
void writeHogFea2File();
std::vector < FeaType* >getwindowHOGFeature(){ return windowHOGFeature; }
~Hog()
{
if (RGBdata != NULL)
delete[]RGBdata;
if (greydata != NULL)
{
delete[]greydata;
}
for (int i = 0; i < windowHOGFeature.size(); i++)
if (windowHOGFeature[i] != NULL)
delete[]windowHOGFeature[i];
delete[]grad;
delete[]theta;
};
};
Hog.cpp
#include "stdafx.h"
#include "Hog.h"
#include<cmath>
#include <fstream>
void Hog::writeHogFea2File()
{
std::ofstream myfile;
myfile.open("example.txt");
myfile << "Writing HOG Feature to File.\n";
_ASSERTE(windowHOGFeature.size() == 105);
for (int z = 0; z < 105; ++z)
{
for (int i = 0; i < 36; i++)
myfile << windowHOGFeature[z][i] << std::endl;
}
myfile.close();
}
void Hog::ComputeGradient()
{
if (grad != NULL)
delete[]grad;
grad = new GradType[img_height*img_width];
if (theta != NULL)
delete[]theta;
theta = new MagType[img_height*img_width];
for (int i = 1; i < img_height; i++)
for (int j = 1; j < img_width; j++)
{
double dx = greydata[i*img_width + j + 1] - greydata[i*img_width + j - 1];
double dy = greydata[(i + 1)*img_width + j] - greydata[(i - 1)*img_width + j];
if (fabs(dx) <= 1.0e-6 && fabs(dy) <= 1.0e-6) {
grad[i*img_width + j] = 0;
}
else
grad[i*img_width + j] = (sqrt(dx*dx + dy*dy));
double theta = atan2(dy, dx);
if (theta < 0)
theta = (theta + PI); // normalize to [0, PI], CV_PI
if (theta > PI)
theta = theta - PI;
theta = (theta * 180 / PI);
this->theta[i*img_width + j] = theta;
std::cout << theta + 0 << std::endl;
}
// 边界点的梯度取其近邻点的值
int i = 0;
for (int j = 0; j < img_width; j++) {
grad[i*img_width + j] = grad[(i + 1)*img_width + j];
this->theta[i*img_width + j] = this->theta[(i + 1)*img_width + j];
}
i = img_height - 1;
for (int j = 0; j < img_width; j++) {
grad[i*img_width + j] = grad[(i - 1)*img_width + j];
this->theta[i*img_width + j] = this->theta[(i - 1)*img_width + j];
}
int j = 0;
for (i = 0; i < img_height; i++) {
grad[i*img_width + j] = grad[i*img_width + j + 1];
this->theta[i*img_width + j] = this->theta[i*img_width + j + 1];
}
j = img_width - 1;
for (i = 0; i < img_height; i++) {
grad[i*img_width + j] = grad[i*img_width + j - 1];
this->theta[i*img_width + j] = this->theta[i*img_width + j - 1];
}
}
void Hog::L2Normalize(double*vec, int length)//归一化
{
double sum = 0;
for (int i = 0; i < length; i++)
sum += vec[i] * vec[i];
sum = (double)1.0 / sqrt(sum + FLT_EPSILON);
for (int i = 0; i < length; i++)
vec[i] = vec[i] * sum;
}
double* Hog::GetBlkFeature(int offsetY_againstImg, int offsetX_againstImg)
{
double *blkHOG = new double[blkcell*blkcell*m_histBin];
int aa = sizeof(char);
memset(blkHOG, 0, 36 * sizeof(double));
int center_cell_0_X = CellSize / 2;
int center_cell_0_Y = CellSize / 2;
/*int center_cell_1_X = CellSize / 2+CellSize;
int center_cell_1_Y = CellSize / 2;
int center_cell_2_X = CellSize / 2;
int center_cell_2_Y = CellSize / 2+CellSize;
int center_cell_3_X = CellSize / 2+CellSize;
int center_cell_3_Y = CellSize / 2+CellSize;*/
int regionsize = CellSize;
for (int cell_no_y = 0; cell_no_y < blkcell; cell_no_y++) {
for (int cell_no_x = 0; cell_no_x < blkcell; cell_no_x++) {
// cell index in the blk
int cell_idx = cell_no_y*blkcell + cell_no_x;
// start of a cell
int cell_start_y = cell_no_y*CellSize;
int cell_start_x = cell_no_x*CellSize;
// compute in the cell
for (int y = cell_start_y; y < cell_start_y + CellSize; y++) {
for (int x = cell_start_x; x<cell_start_x + CellSize; x++) {
double theta = this->theta[(offsetY_againstImg + y)*img_width + offsetX_againstImg + x];
double magn = grad[(offsetY_againstImg + y)*img_width + offsetX_againstImg + x];
// 如果幅值为0, 没有梯度则不处理
if (magn >= 0.0) {
int theta_idx = (int)(theta / (180.0 / m_histBin));
//double gaussweight = isGaussianWeight == true ? GaussianKernel(x, y, CellSize, CellSize, CellSize, CellSize) : 1;
//magn= magn*gaussweight;//用高斯核函数调制
double tt = 1.0 - fabs(double(theta) / (180.0 / double(m_histBin)) - (double(theta_idx) + 0.5));
double fx0 = 1.0 - fabs(double(x - center_cell_0_X)) / double(regionsize);
double fy0 = 1.0 - fabs(double(y - center_cell_0_Y)) / double(regionsize);
if (y <= CellSize / 2 && x <= CellSize / 2 || y >= CellSize*blkcell - CellSize / 2 && x <= CellSize / 2
|| y >= CellSize*blkcell - CellSize / 2 && x >= CellSize*blkcell - CellSize / 2
|| x >= CellSize*blkcell - CellSize / 2 && y <= CellSize / 2)//四个角点不做三线性插值
{
blkHOG[m_histBin*cell_idx + theta_idx] = blkHOG[m_histBin*cell_idx + theta_idx] + double(magn)*tt;
blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt);
}
if (x>CellSize / 2 && x < CellSize && (y < CellSize / 2 || y> CellSize*blkcell - CellSize / 2))
{
blkHOG[m_histBin*cell_idx + theta_idx] += double(magn)*tt*fx0;
blkHOG[m_histBin*(cell_idx + 1) + theta_idx] += double(magn)*tt*(1.0 - fx0);
blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fx0;
blkHOG[m_histBin*(cell_idx + 1) + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fx0);
}
if (x>CellSize && x < CellSize*blkcell - CellSize / 2 && (y < CellSize / 2 || y> CellSize*blkcell - CellSize / 2))
{
blkHOG[m_histBin*cell_idx + theta_idx] += double(magn)*tt*fx0;
blkHOG[m_histBin*(cell_idx - 1) + theta_idx] += double(magn)*tt*(1.0 - fx0);
blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fx0;
blkHOG[m_histBin*(cell_idx - 1) + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fx0);
}
if (y>CellSize / 2 && y < CellSize && (x < CellSize / 2 || x> CellSize*blkcell - CellSize / 2))
{
blkHOG[m_histBin*cell_idx + theta_idx] += double(magn)*tt*fy0;
blkHOG[m_histBin*(cell_idx + blkcell) + theta_idx] += double(magn)*tt*(1.0 - fy0);
blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fy0;
blkHOG[m_histBin*(cell_idx + blkcell) + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fy0);
}
if (y>CellSize && y < CellSize*blkcell - CellSize / 2 && (x < CellSize / 2 || x> CellSize*blkcell - CellSize / 2))
{
blkHOG[m_histBin*cell_idx + theta_idx] += double(magn)*tt*fy0;
blkHOG[m_histBin*(cell_idx - blkcell) + theta_idx] += double(magn)*tt*(1.0 - fy0);
blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fy0;
blkHOG[m_histBin*(cell_idx - blkcell) + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fy0);
}
else//做三线性插值,将 4 个cell中的直方图串接起来
{
blkHOG[m_histBin * 0 + theta_idx] += double(magn)*tt*fx0*fy0;
blkHOG[m_histBin * 0 + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fx0*fy0;
blkHOG[m_histBin * 1 + theta_idx] += double(magn)*tt*(1.0 - fx0)*fy0;
blkHOG[m_histBin * 1 + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fx0)*fy0;
blkHOG[m_histBin * 2 + theta_idx] += double(magn)*tt*fx0*(1.0 - fy0);
blkHOG[m_histBin * 2 + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fx0*(1.0 - fy0);
blkHOG[m_histBin * 3 + theta_idx] += double(magn)*tt*(1.0 - fx0)*(1.0 - fy0);
blkHOG[m_histBin * 3 + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fx0)*(1.0 - fy0);
}
}
} // for(x)
} // for(y)
} // for(cell_no_x)
} // for(cell_no_y)
/*for (int i = 0; i < 36; i++)
std::cout << blkHOG[i] << std::endl;
std::cout << std::endl << std::endl;*/
L2Normalize(blkHOG, blkcell*blkcell*m_histBin);//以一个block为单位进行归一化
/*for (int i = 0; i < 36; i++)
std::cout << blkHOG[i] << std::endl;*/
blockmanager.AddBlock(offsetY_againstImg, offsetX_againstImg, blkHOG);//存入cache避免重复计算
return blkHOG;
}
double Hog::GaussianKernel(int x, int y, int cent_x, int cent_y, int Hx, int Hy)//高斯核函数
{
int dx = x - cent_x;
int dy = y - cent_y;
double temp = 1 - ((double)(dx*dx) / (Hx*Hx) + (double)(dy*dy) / (Hy*Hy)) / 2;
if (temp >= 0)
{
return (double)(4.0 * temp / (2 * PI));
}
else
{
return 0.0f;
}
}
void Hog::GetWindowFeature(const int offsetY_againstImg, const int offsetX_againstImg)//获得window的feature
{
windowHOGFeature.clear();
//double*imgHOGFeature = new double[blkcell*blkcell*m_histBin*xSkipStepNum*ySkipStepNum];
for (int i = 0; i < yblkSkipStepNum; i++)
for (int j = 0; j < xblkSkipStepNum; j++)
{
double*blkFea;
if (blockmanager.find(offsetY_againstImg + i*blockSkipStep, offsetX_againstImg + j*blockSkipStep))
blkFea = blockmanager.GetBlockData(offsetY_againstImg + i*blockSkipStep,
offsetX_againstImg + j*blockSkipStep);
else
blkFea = GetBlkFeature(offsetY_againstImg + i*blockSkipStep, offsetX_againstImg + j*blockSkipStep);
/*memcpy(imgHOGFeature + (i*xSkipStepNum + j)*blkcell*blkcell*m_histBin, blkFea,
blkcell*blkcell*m_histBin);
delete[]blkFea;*/
windowHOGFeature.push_back(blkFea);
}
}
void Hog::RGB2Grey()
{
if (greydata == NULL)
greydata = new BYTE[img_width*img_height];
for (int i = 0; i < img_height; i++)
for (int j = 0; j < img_width; j++)
{
greydata[i*img_width + j] = 0.299*RGBdata[i*img_width * 3 + 3 * j] +
0.587*RGBdata[i*img_width * 3 + 3 * j + 1] +
0.114*RGBdata[i*img_width * 3 + 3 * j + 2];
}
delete[]RGBdata;
}
void Hog::NextPyramid()//双线性插值获得下一层图像
{
int new_img_height = img_height / ratio;
int new_img_width = img_width / ratio;
BYTE*new_greydata = new BYTE[new_img_height*new_img_width];
double fw = ratio;//double(nW) / W1;
double fh = ratio;//double(nH) / H1;
int y1, y2, x1, x2, x0, y0;
double fx1, fx2, fy1, fy2;
for (int i = 0; i < new_img_height; i++)
{
y0 = i*fh;
y1 = int(y0);
if (y1 == img_height - 1) y2 = y1;
else y2 = y1 + 1;
fy1 = y1 - y0;
fy2 = 1.0f - fy1;
for (int j = 0; j < new_img_width; j++)
{
x0 = j*fw;
x1 = int(x0);
if (x1 == img_width - 1) x2 = x1;
else x2 = x1 + 1;
fx1 = y1 - y0;
fx2 = 1.0f - fx1;
double s1 = fx1*fy1;
double s2 = fx2*fy1;
double s3 = fx2*fy2;
double s4 = fx1*fy2;
BYTE c1r, c2r, c3r, c4r;
c1r = greydata[y1*img_width + x1];
c2r = greydata[y1*img_width + x2];
c3r = greydata[y2*img_width + x1];
c4r = greydata[y2*img_width + x2];
BYTE r;
r = (BYTE)(c1r*s3) + (BYTE)(c2r*s4) + (BYTE)(c3r*s2) + (BYTE)(c4r*s1);
new_greydata[i*new_img_width + j] = r;
}
}
delete[]greydata;
greydata = new_greydata;
img_height = new_img_height;
img_width = new_img_width;
current_pyramid_height++;
blockmanager.deleteAllBlocks();
blockmanager.SetLevel(current_pyramid_height);
}
/*void Hog::SingleScaleDetect()
{
int xSkipStepNum = floor((img_width - window_width) / windowSkipStep + 1);
int ySkipStepNum = floor((img_height - window_width) / windowSkipStep + 1);
//double*imgHOGFeature = new double[blkcell*blkcell*m_histBin*xSkipStepNum*ySkipStepNum];
for (int i = 0; i < ySkipStepNum; i++)
for (int j = 0; j < xSkipStepNum; j++)
{
GetWindowFeature(i*windowSkipStep, j*windowSkipStep);
}
}
void Hog::MultiScaleDetect()
{
while (current_pyramid_height < max_pyramid_height)
{
SingleScaleDetect();
NextPyramid();
}
}*/
版权声明:
浙公网安备 33010602011771号